|
{ |
|
"best_metric": 0.630589176753026, |
|
"best_model_checkpoint": "/beegfs/scratch/user/blee/project_3/models/NLU.mt5-base.task_type-1.fine_tune.gpu_a100-40g+.node-1x1.bsz-64.epochs-22.metric-ema.metric_lang-all/checkpoint-18712", |
|
"epoch": 13.0, |
|
"eval_steps": 500.0, |
|
"global_step": 30407, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4e-05, |
|
"loss": 14.5977, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8e-05, |
|
"loss": 3.2078, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.984392680166998e-05, |
|
"loss": 1.1863, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.968785360333997e-05, |
|
"loss": 0.9566, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.953178040500995e-05, |
|
"loss": 0.8517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.937570720667994e-05, |
|
"loss": 0.7691, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.921963400834992e-05, |
|
"loss": 0.7125, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.906356081001991e-05, |
|
"loss": 0.6588, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.89074876116899e-05, |
|
"loss": 0.5986, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.875141441335988e-05, |
|
"loss": 0.5448, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.859534121502985e-05, |
|
"loss": 1.0722, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.843926801669984e-05, |
|
"loss": 0.6586, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.828319481836982e-05, |
|
"loss": 0.5673, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.81271216200398e-05, |
|
"loss": 0.5194, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.797104842170979e-05, |
|
"loss": 0.4766, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.781497522337976e-05, |
|
"loss": 0.4481, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.765890202504975e-05, |
|
"loss": 0.4148, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.750282882671974e-05, |
|
"loss": 0.3991, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.734675562838972e-05, |
|
"loss": 0.3878, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.719068243005971e-05, |
|
"loss": 0.3539, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.703460923172969e-05, |
|
"loss": 0.342, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.687853603339967e-05, |
|
"loss": 0.3232, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.672246283506966e-05, |
|
"loss": 0.3185, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_validation_all_ex_match_acc": 0.38741622486746025, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004872331191643575, |
|
"eval_validation_all_intent_acc": 0.7521256376913074, |
|
"eval_validation_all_intent_acc_stderr": 0.004318432694874057, |
|
"eval_validation_all_loss": 0.2968784272670746, |
|
"eval_validation_all_runtime": 177.009, |
|
"eval_validation_all_samples_per_second": 56.477, |
|
"eval_validation_all_slot_micro_f1": 0.47014790945299517, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0019384774099713345, |
|
"eval_validation_all_steps_per_second": 1.768, |
|
"step": 2339 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 7.656638963673963e-05, |
|
"loss": 0.3149, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 7.641031643840962e-05, |
|
"loss": 0.2844, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 7.62542432400796e-05, |
|
"loss": 0.2801, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 7.609817004174959e-05, |
|
"loss": 0.2616, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.594209684341957e-05, |
|
"loss": 0.2654, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.578602364508956e-05, |
|
"loss": 0.2536, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.562995044675953e-05, |
|
"loss": 0.2433, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.547387724842952e-05, |
|
"loss": 0.2407, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.53178040500995e-05, |
|
"loss": 0.2346, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.516173085176949e-05, |
|
"loss": 0.2374, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.500565765343947e-05, |
|
"loss": 0.2267, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.484958445510945e-05, |
|
"loss": 0.2177, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.469351125677944e-05, |
|
"loss": 0.2198, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.453743805844941e-05, |
|
"loss": 0.2116, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 7.43813648601194e-05, |
|
"loss": 0.2103, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 7.422529166178938e-05, |
|
"loss": 0.2044, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.406921846345937e-05, |
|
"loss": 0.2018, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.391314526512936e-05, |
|
"loss": 0.2038, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.375707206679934e-05, |
|
"loss": 0.199, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.360099886846931e-05, |
|
"loss": 0.1903, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.34449256701393e-05, |
|
"loss": 0.1908, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.328885247180928e-05, |
|
"loss": 0.1877, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.313277927347926e-05, |
|
"loss": 0.1878, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_validation_all_ex_match_acc": 0.5762728818645594, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004942223428876296, |
|
"eval_validation_all_intent_acc": 0.7468240472141643, |
|
"eval_validation_all_intent_acc_stderr": 0.004348961141535433, |
|
"eval_validation_all_loss": 0.1461012214422226, |
|
"eval_validation_all_runtime": 180.06, |
|
"eval_validation_all_samples_per_second": 55.52, |
|
"eval_validation_all_slot_micro_f1": 0.7408780686564123, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0017017337770766719, |
|
"eval_validation_all_steps_per_second": 1.738, |
|
"step": 4678 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 7.297670607514925e-05, |
|
"loss": 0.1804, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 7.282063287681923e-05, |
|
"loss": 0.17, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 7.266455967848922e-05, |
|
"loss": 0.1696, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 7.250848648015921e-05, |
|
"loss": 0.1644, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 7.235241328182918e-05, |
|
"loss": 0.1651, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.219634008349917e-05, |
|
"loss": 0.1696, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.204026688516915e-05, |
|
"loss": 0.1668, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.188419368683913e-05, |
|
"loss": 0.1615, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.172812048850912e-05, |
|
"loss": 0.1577, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.15720472901791e-05, |
|
"loss": 0.1592, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 7.141597409184909e-05, |
|
"loss": 0.1542, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.125990089351906e-05, |
|
"loss": 0.1543, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.110382769518905e-05, |
|
"loss": 0.1532, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.094775449685903e-05, |
|
"loss": 0.1559, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.079168129852902e-05, |
|
"loss": 0.1524, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.0635608100199e-05, |
|
"loss": 0.1509, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.047953490186899e-05, |
|
"loss": 0.1453, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 7.032346170353896e-05, |
|
"loss": 0.1474, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.016738850520894e-05, |
|
"loss": 0.1444, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 7.001131530687893e-05, |
|
"loss": 0.1468, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 6.985524210854891e-05, |
|
"loss": 0.1421, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 6.96991689102189e-05, |
|
"loss": 0.1427, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 6.954309571188888e-05, |
|
"loss": 0.14, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 6.938702251355887e-05, |
|
"loss": 0.1364, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_validation_all_ex_match_acc": 0.5900770231069321, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004918929939767899, |
|
"eval_validation_all_intent_acc": 0.7644293287986396, |
|
"eval_validation_all_intent_acc_stderr": 0.004244186252062078, |
|
"eval_validation_all_loss": 0.1383330225944519, |
|
"eval_validation_all_runtime": 180.3126, |
|
"eval_validation_all_samples_per_second": 55.443, |
|
"eval_validation_all_slot_micro_f1": 0.7396164830681355, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0017044183392999975, |
|
"eval_validation_all_steps_per_second": 1.736, |
|
"step": 7017 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 6.923094931522886e-05, |
|
"loss": 0.1338, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 6.907487611689883e-05, |
|
"loss": 0.1305, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 6.891880291856881e-05, |
|
"loss": 0.1255, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 6.87627297202388e-05, |
|
"loss": 0.1302, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 6.860665652190878e-05, |
|
"loss": 0.1312, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 6.845058332357877e-05, |
|
"loss": 0.1309, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 6.829451012524874e-05, |
|
"loss": 0.1272, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.813843692691872e-05, |
|
"loss": 0.1277, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 6.798236372858871e-05, |
|
"loss": 0.1297, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.78262905302587e-05, |
|
"loss": 0.1239, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 6.767021733192868e-05, |
|
"loss": 0.1227, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 6.751414413359867e-05, |
|
"loss": 0.1241, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 6.735807093526865e-05, |
|
"loss": 0.1224, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 6.720199773693864e-05, |
|
"loss": 0.1234, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 6.704592453860861e-05, |
|
"loss": 0.1218, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 6.688985134027859e-05, |
|
"loss": 0.121, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 6.673377814194858e-05, |
|
"loss": 0.1182, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 6.657770494361856e-05, |
|
"loss": 0.1219, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.642163174528855e-05, |
|
"loss": 0.1198, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.626555854695853e-05, |
|
"loss": 0.1213, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.610948534862852e-05, |
|
"loss": 0.1202, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.59534121502985e-05, |
|
"loss": 0.1147, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 6.579733895196848e-05, |
|
"loss": 0.1191, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_validation_all_ex_match_acc": 0.5862758827648294, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004925741170944722, |
|
"eval_validation_all_intent_acc": 0.7755326597979394, |
|
"eval_validation_all_intent_acc_stderr": 0.004172936538970462, |
|
"eval_validation_all_loss": 0.14330914616584778, |
|
"eval_validation_all_runtime": 182.6873, |
|
"eval_validation_all_samples_per_second": 54.722, |
|
"eval_validation_all_slot_micro_f1": 0.7288510293073724, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0017265912249300535, |
|
"eval_validation_all_steps_per_second": 1.713, |
|
"step": 9356 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.564126575363846e-05, |
|
"loss": 0.1091, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 6.548519255530845e-05, |
|
"loss": 0.1111, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 6.532911935697843e-05, |
|
"loss": 0.111, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 6.51730461586484e-05, |
|
"loss": 0.109, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 6.50169729603184e-05, |
|
"loss": 0.1177, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 6.486089976198837e-05, |
|
"loss": 0.1069, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 6.470482656365836e-05, |
|
"loss": 0.1133, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 6.454875336532834e-05, |
|
"loss": 0.1055, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 6.439268016699833e-05, |
|
"loss": 0.1048, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 6.423660696866832e-05, |
|
"loss": 0.1081, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 6.40805337703383e-05, |
|
"loss": 0.1073, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 6.392446057200827e-05, |
|
"loss": 0.1055, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 6.376838737367826e-05, |
|
"loss": 0.1032, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 6.361231417534824e-05, |
|
"loss": 0.1045, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 6.345624097701823e-05, |
|
"loss": 0.1062, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 6.330016777868821e-05, |
|
"loss": 0.0999, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.314409458035818e-05, |
|
"loss": 0.1036, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 6.298802138202817e-05, |
|
"loss": 0.1063, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 6.283194818369816e-05, |
|
"loss": 0.0997, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 6.267587498536814e-05, |
|
"loss": 0.1019, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 6.251980178703813e-05, |
|
"loss": 0.1009, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 6.236372858870811e-05, |
|
"loss": 0.1016, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 6.22076553903781e-05, |
|
"loss": 0.0967, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_validation_all_ex_match_acc": 0.6202860858257477, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004853884404661511, |
|
"eval_validation_all_intent_acc": 0.771631489446834, |
|
"eval_validation_all_intent_acc_stderr": 0.0041984427435098615, |
|
"eval_validation_all_loss": 0.12277400493621826, |
|
"eval_validation_all_runtime": 180.5655, |
|
"eval_validation_all_samples_per_second": 55.365, |
|
"eval_validation_all_slot_micro_f1": 0.7819573425859782, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0016037195659180058, |
|
"eval_validation_all_steps_per_second": 1.733, |
|
"step": 11695 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 6.205158219204808e-05, |
|
"loss": 0.1019, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 6.189550899371805e-05, |
|
"loss": 0.0958, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 6.173943579538804e-05, |
|
"loss": 0.0913, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 6.158336259705802e-05, |
|
"loss": 0.0914, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 6.142728939872801e-05, |
|
"loss": 0.092, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 6.127121620039799e-05, |
|
"loss": 0.0934, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 6.111514300206798e-05, |
|
"loss": 0.0918, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 6.095906980373796e-05, |
|
"loss": 0.09, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 6.0802996605407945e-05, |
|
"loss": 0.0896, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 6.064692340707792e-05, |
|
"loss": 0.0938, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 6.049085020874791e-05, |
|
"loss": 0.0927, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 6.033477701041789e-05, |
|
"loss": 0.0935, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 6.017870381208787e-05, |
|
"loss": 0.0904, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 6.002263061375786e-05, |
|
"loss": 0.0923, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 5.986655741542784e-05, |
|
"loss": 0.0911, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 5.971048421709782e-05, |
|
"loss": 0.0894, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 5.955441101876781e-05, |
|
"loss": 0.0902, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 5.939833782043779e-05, |
|
"loss": 0.091, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 5.9242264622107775e-05, |
|
"loss": 0.0904, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 5.908619142377776e-05, |
|
"loss": 0.0912, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 5.8930118225447735e-05, |
|
"loss": 0.0914, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 5.8774045027117726e-05, |
|
"loss": 0.0884, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 5.86179718287877e-05, |
|
"loss": 0.0856, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 5.8461898630457686e-05, |
|
"loss": 0.0874, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_validation_all_ex_match_acc": 0.6209862958887666, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004852143300552274, |
|
"eval_validation_all_intent_acc": 0.7756326898069421, |
|
"eval_validation_all_intent_acc_stderr": 0.0041722756855579295, |
|
"eval_validation_all_loss": 0.12295829504728317, |
|
"eval_validation_all_runtime": 180.0678, |
|
"eval_validation_all_samples_per_second": 55.518, |
|
"eval_validation_all_slot_micro_f1": 0.7860288720332762, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0015928064325804261, |
|
"eval_validation_all_steps_per_second": 1.738, |
|
"step": 14034 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 5.830582543212768e-05, |
|
"loss": 0.0846, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 5.8149752233797653e-05, |
|
"loss": 0.0821, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 5.7993679035467644e-05, |
|
"loss": 0.0845, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 5.783760583713762e-05, |
|
"loss": 0.0831, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 5.7681532638807604e-05, |
|
"loss": 0.0805, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 5.752545944047759e-05, |
|
"loss": 0.0816, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 5.736938624214757e-05, |
|
"loss": 0.0813, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 5.721331304381755e-05, |
|
"loss": 0.08, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 5.705723984548754e-05, |
|
"loss": 0.0777, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 5.690116664715752e-05, |
|
"loss": 0.0803, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 5.6745093448827506e-05, |
|
"loss": 0.0807, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 5.658902025049749e-05, |
|
"loss": 0.0829, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 5.6432947052167467e-05, |
|
"loss": 0.08, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 5.627687385383746e-05, |
|
"loss": 0.0798, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 5.6120800655507434e-05, |
|
"loss": 0.0813, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 5.596472745717742e-05, |
|
"loss": 0.081, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 5.580865425884741e-05, |
|
"loss": 0.0796, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 5.5652581060517385e-05, |
|
"loss": 0.0776, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 5.5496507862187375e-05, |
|
"loss": 0.0797, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 5.534043466385735e-05, |
|
"loss": 0.0816, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 5.5184361465527336e-05, |
|
"loss": 0.0776, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 5.5028288267197326e-05, |
|
"loss": 0.0806, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 5.48722150688673e-05, |
|
"loss": 0.0793, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_validation_all_ex_match_acc": 0.6250875262578773, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004841729420424345, |
|
"eval_validation_all_intent_acc": 0.77333199959988, |
|
"eval_validation_all_intent_acc_stderr": 0.004187388454167221, |
|
"eval_validation_all_loss": 0.12834610044956207, |
|
"eval_validation_all_runtime": 179.7767, |
|
"eval_validation_all_samples_per_second": 55.608, |
|
"eval_validation_all_slot_micro_f1": 0.7850994663777063, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0015953179494001197, |
|
"eval_validation_all_steps_per_second": 1.741, |
|
"step": 16373 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 5.471614187053728e-05, |
|
"loss": 0.0767, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 5.456006867220727e-05, |
|
"loss": 0.0738, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 5.4403995473877254e-05, |
|
"loss": 0.0701, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 5.424792227554724e-05, |
|
"loss": 0.0705, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 5.409184907721722e-05, |
|
"loss": 0.0724, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 5.39357758788872e-05, |
|
"loss": 0.0711, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 5.377970268055719e-05, |
|
"loss": 0.0727, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 5.362362948222717e-05, |
|
"loss": 0.0714, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 5.346755628389715e-05, |
|
"loss": 0.0711, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 5.331148308556714e-05, |
|
"loss": 0.0699, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 5.3155409887237116e-05, |
|
"loss": 0.0692, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 5.2999336688907106e-05, |
|
"loss": 0.0711, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 5.284326349057708e-05, |
|
"loss": 0.073, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 5.268719029224707e-05, |
|
"loss": 0.0701, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 5.253111709391706e-05, |
|
"loss": 0.0709, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 5.2375043895587034e-05, |
|
"loss": 0.0748, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 5.221897069725701e-05, |
|
"loss": 0.0713, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 5.2062897498927e-05, |
|
"loss": 0.0706, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 5.1906824300596985e-05, |
|
"loss": 0.0748, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 5.175075110226697e-05, |
|
"loss": 0.0694, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 5.159467790393695e-05, |
|
"loss": 0.072, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 5.143860470560693e-05, |
|
"loss": 0.0705, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 5.128253150727692e-05, |
|
"loss": 0.068, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 5.11264583089469e-05, |
|
"loss": 0.0687, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_validation_all_ex_match_acc": 0.630589176753026, |
|
"eval_validation_all_ex_match_acc_stderr": 0.0048271769371602165, |
|
"eval_validation_all_intent_acc": 0.7778333500050015, |
|
"eval_validation_all_intent_acc_stderr": 0.004157649429320968, |
|
"eval_validation_all_loss": 0.1259431391954422, |
|
"eval_validation_all_runtime": 170.3009, |
|
"eval_validation_all_samples_per_second": 58.702, |
|
"eval_validation_all_slot_micro_f1": 0.7861930842802256, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.001592361422278994, |
|
"eval_validation_all_steps_per_second": 1.838, |
|
"step": 18712 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 5.097038511061688e-05, |
|
"loss": 0.0635, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 5.081431191228687e-05, |
|
"loss": 0.0628, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 5.065823871395685e-05, |
|
"loss": 0.0674, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 5.050216551562683e-05, |
|
"loss": 0.064, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 5.0346092317296815e-05, |
|
"loss": 0.0634, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 5.01900191189668e-05, |
|
"loss": 0.0665, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 5.003394592063679e-05, |
|
"loss": 0.064, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 4.9877872722306765e-05, |
|
"loss": 0.0646, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 4.972179952397675e-05, |
|
"loss": 0.0669, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 4.956572632564673e-05, |
|
"loss": 0.0629, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 4.9409653127316716e-05, |
|
"loss": 0.0652, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 4.925357992898669e-05, |
|
"loss": 0.0642, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 4.9097506730656684e-05, |
|
"loss": 0.0652, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 4.894143353232666e-05, |
|
"loss": 0.0659, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 4.878536033399665e-05, |
|
"loss": 0.0632, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 4.8629287135666634e-05, |
|
"loss": 0.0655, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 4.847321393733661e-05, |
|
"loss": 0.0624, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 4.83171407390066e-05, |
|
"loss": 0.0646, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 4.816106754067658e-05, |
|
"loss": 0.0659, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 4.800499434234656e-05, |
|
"loss": 0.064, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.7848921144016546e-05, |
|
"loss": 0.0642, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 4.769284794568653e-05, |
|
"loss": 0.0665, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 4.753677474735652e-05, |
|
"loss": 0.0626, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_validation_all_ex_match_acc": 0.6302890867260178, |
|
"eval_validation_all_ex_match_acc_stderr": 0.0048279880103909295, |
|
"eval_validation_all_intent_acc": 0.7782334700410123, |
|
"eval_validation_all_intent_acc_stderr": 0.004154972051978152, |
|
"eval_validation_all_loss": 0.1310112625360489, |
|
"eval_validation_all_runtime": 170.1316, |
|
"eval_validation_all_samples_per_second": 58.76, |
|
"eval_validation_all_slot_micro_f1": 0.789992449441871, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0015819585080535143, |
|
"eval_validation_all_steps_per_second": 1.84, |
|
"step": 21051 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.73807015490265e-05, |
|
"loss": 0.0602, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 4.722462835069648e-05, |
|
"loss": 0.0594, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 4.7068555152366464e-05, |
|
"loss": 0.0583, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 4.691248195403645e-05, |
|
"loss": 0.0584, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.6756408755706424e-05, |
|
"loss": 0.0565, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 4.6600335557376415e-05, |
|
"loss": 0.0568, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 4.644426235904639e-05, |
|
"loss": 0.0586, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 4.628818916071638e-05, |
|
"loss": 0.0552, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 4.6132115962386366e-05, |
|
"loss": 0.0608, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 4.597604276405634e-05, |
|
"loss": 0.0595, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 4.581996956572633e-05, |
|
"loss": 0.0574, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 4.566389636739631e-05, |
|
"loss": 0.0572, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 4.5507823169066293e-05, |
|
"loss": 0.0585, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 4.5351749970736284e-05, |
|
"loss": 0.0584, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 4.519567677240626e-05, |
|
"loss": 0.0575, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 4.503960357407625e-05, |
|
"loss": 0.0601, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 4.488353037574623e-05, |
|
"loss": 0.0586, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 4.472745717741621e-05, |
|
"loss": 0.0576, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 4.4571383979086195e-05, |
|
"loss": 0.057, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 4.441531078075618e-05, |
|
"loss": 0.057, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 4.4259237582426156e-05, |
|
"loss": 0.0596, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 4.4103164384096146e-05, |
|
"loss": 0.0592, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 4.394709118576613e-05, |
|
"loss": 0.058, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_validation_all_ex_match_acc": 0.6262878863659098, |
|
"eval_validation_all_ex_match_acc_stderr": 0.00483861143551642, |
|
"eval_validation_all_intent_acc": 0.7731319395818745, |
|
"eval_validation_all_intent_acc_stderr": 0.00418869405726844, |
|
"eval_validation_all_loss": 0.13434068858623505, |
|
"eval_validation_all_runtime": 178.1682, |
|
"eval_validation_all_samples_per_second": 56.11, |
|
"eval_validation_all_slot_micro_f1": 0.7893436766357048, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0015837494632691346, |
|
"eval_validation_all_steps_per_second": 1.757, |
|
"step": 23390 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.379101798743611e-05, |
|
"loss": 0.0602, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.36349447891061e-05, |
|
"loss": 0.0527, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 4.3478871590776074e-05, |
|
"loss": 0.0558, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 4.3322798392446064e-05, |
|
"loss": 0.0515, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 4.316672519411604e-05, |
|
"loss": 0.0521, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 4.3010651995786025e-05, |
|
"loss": 0.0512, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 4.2854578797456015e-05, |
|
"loss": 0.0525, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 4.269850559912599e-05, |
|
"loss": 0.053, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 4.254243240079598e-05, |
|
"loss": 0.0531, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 4.238635920246596e-05, |
|
"loss": 0.0536, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 4.223028600413594e-05, |
|
"loss": 0.0543, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 4.2074212805805926e-05, |
|
"loss": 0.0521, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 4.191813960747591e-05, |
|
"loss": 0.0527, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 4.176206640914589e-05, |
|
"loss": 0.0554, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 4.160599321081588e-05, |
|
"loss": 0.0529, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 4.144992001248586e-05, |
|
"loss": 0.0541, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 4.1293846814155845e-05, |
|
"loss": 0.0556, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 4.113777361582583e-05, |
|
"loss": 0.0512, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 4.0981700417495805e-05, |
|
"loss": 0.0535, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 4.0825627219165796e-05, |
|
"loss": 0.0523, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 4.066955402083577e-05, |
|
"loss": 0.0517, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 4.0513480822505756e-05, |
|
"loss": 0.0537, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 4.0357407624175746e-05, |
|
"loss": 0.054, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 4.020133442584572e-05, |
|
"loss": 0.0519, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_validation_all_ex_match_acc": 0.6264879463839151, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004838088679693641, |
|
"eval_validation_all_intent_acc": 0.7737321196358907, |
|
"eval_validation_all_intent_acc_stderr": 0.004184773155844174, |
|
"eval_validation_all_loss": 0.1407793015241623, |
|
"eval_validation_all_runtime": 181.9271, |
|
"eval_validation_all_samples_per_second": 54.951, |
|
"eval_validation_all_slot_micro_f1": 0.7874137545030835, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0015890415122001155, |
|
"eval_validation_all_steps_per_second": 1.72, |
|
"step": 25729 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 4.004526122751571e-05, |
|
"loss": 0.0513, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 3.988918802918569e-05, |
|
"loss": 0.0493, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 3.9733114830855674e-05, |
|
"loss": 0.0491, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 3.957704163252566e-05, |
|
"loss": 0.0473, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 3.942096843419564e-05, |
|
"loss": 0.0488, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 3.9264895235865625e-05, |
|
"loss": 0.0494, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 3.910882203753561e-05, |
|
"loss": 0.048, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 3.895274883920559e-05, |
|
"loss": 0.0478, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 3.8796675640875576e-05, |
|
"loss": 0.049, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 3.864060244254556e-05, |
|
"loss": 0.0505, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 3.8484529244215536e-05, |
|
"loss": 0.0499, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 3.832845604588552e-05, |
|
"loss": 0.0495, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 3.8172382847555504e-05, |
|
"loss": 0.0501, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 3.8016309649225494e-05, |
|
"loss": 0.047, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 3.786023645089547e-05, |
|
"loss": 0.0472, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 3.7704163252565455e-05, |
|
"loss": 0.0471, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 3.754809005423544e-05, |
|
"loss": 0.0488, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 3.739201685590542e-05, |
|
"loss": 0.0495, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 3.7235943657575405e-05, |
|
"loss": 0.0465, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 3.707987045924539e-05, |
|
"loss": 0.0476, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 3.692379726091537e-05, |
|
"loss": 0.048, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 3.6767724062585356e-05, |
|
"loss": 0.0487, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 3.661165086425534e-05, |
|
"loss": 0.0504, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_validation_all_ex_match_acc": 0.6299889966990097, |
|
"eval_validation_all_ex_match_acc_stderr": 0.004828797081892748, |
|
"eval_validation_all_intent_acc": 0.7760328098429529, |
|
"eval_validation_all_intent_acc_stderr": 0.004169628824053711, |
|
"eval_validation_all_loss": 0.1429910659790039, |
|
"eval_validation_all_runtime": 181.3487, |
|
"eval_validation_all_samples_per_second": 55.126, |
|
"eval_validation_all_slot_micro_f1": 0.7888446215139443, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.0015851230161344064, |
|
"eval_validation_all_steps_per_second": 1.726, |
|
"step": 28068 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3.6455577665925324e-05, |
|
"loss": 0.0442, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 3.629950446759531e-05, |
|
"loss": 0.0423, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 3.614343126926529e-05, |
|
"loss": 0.0446, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 3.598735807093527e-05, |
|
"loss": 0.0419, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 3.583128487260525e-05, |
|
"loss": 0.0453, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 3.567521167427524e-05, |
|
"loss": 0.0434, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 3.5519138475945225e-05, |
|
"loss": 0.0433, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 3.53630652776152e-05, |
|
"loss": 0.0457, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 3.5206992079285186e-05, |
|
"loss": 0.0458, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 3.505091888095517e-05, |
|
"loss": 0.0436, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 3.489484568262515e-05, |
|
"loss": 0.0465, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 3.473877248429514e-05, |
|
"loss": 0.0454, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 3.458269928596512e-05, |
|
"loss": 0.0457, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 3.4426626087635104e-05, |
|
"loss": 0.0474, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 3.427055288930509e-05, |
|
"loss": 0.0458, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 3.411447969097507e-05, |
|
"loss": 0.0452, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 3.3958406492645055e-05, |
|
"loss": 0.0455, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 3.380233329431504e-05, |
|
"loss": 0.0436, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 3.364626009598502e-05, |
|
"loss": 0.0472, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 3.3490186897655e-05, |
|
"loss": 0.0454, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 3.333411369932498e-05, |
|
"loss": 0.0431, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 3.317804050099497e-05, |
|
"loss": 0.0455, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 3.3021967302664957e-05, |
|
"loss": 0.0433, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.2865894104334933e-05, |
|
"loss": 0.0438, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_validation_all_ex_match_acc": 0.6277883365009503, |
|
"eval_validation_all_ex_match_acc_stderr": 0.0048346692041261715, |
|
"eval_validation_all_intent_acc": 0.7767330199059718, |
|
"eval_validation_all_intent_acc_stderr": 0.004164983516855836, |
|
"eval_validation_all_loss": 0.1506633162498474, |
|
"eval_validation_all_runtime": 180.9233, |
|
"eval_validation_all_samples_per_second": 55.255, |
|
"eval_validation_all_slot_micro_f1": 0.7894693944086723, |
|
"eval_validation_all_slot_micro_f1_stderr": 0.001583402887351871, |
|
"eval_validation_all_steps_per_second": 1.73, |
|
"step": 30407 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 51458, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 22, |
|
"save_steps": 500.0, |
|
"total_flos": 7.470556690869043e+16, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|