{ "best_metric": 0.630589176753026, "best_model_checkpoint": "/beegfs/scratch/user/blee/project_3/models/NLU.mt5-base.task_type-1.fine_tune.gpu_a100-40g+.node-1x1.bsz-64.epochs-22.metric-ema.metric_lang-all/checkpoint-18712", "epoch": 13.0, "eval_steps": 500.0, "global_step": 30407, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4e-05, "loss": 14.5977, "step": 100 }, { "epoch": 0.09, "learning_rate": 8e-05, "loss": 3.2078, "step": 200 }, { "epoch": 0.13, "learning_rate": 7.984392680166998e-05, "loss": 1.1863, "step": 300 }, { "epoch": 0.17, "learning_rate": 7.968785360333997e-05, "loss": 0.9566, "step": 400 }, { "epoch": 0.21, "learning_rate": 7.953178040500995e-05, "loss": 0.8517, "step": 500 }, { "epoch": 0.26, "learning_rate": 7.937570720667994e-05, "loss": 0.7691, "step": 600 }, { "epoch": 0.3, "learning_rate": 7.921963400834992e-05, "loss": 0.7125, "step": 700 }, { "epoch": 0.34, "learning_rate": 7.906356081001991e-05, "loss": 0.6588, "step": 800 }, { "epoch": 0.38, "learning_rate": 7.89074876116899e-05, "loss": 0.5986, "step": 900 }, { "epoch": 0.43, "learning_rate": 7.875141441335988e-05, "loss": 0.5448, "step": 1000 }, { "epoch": 0.47, "learning_rate": 7.859534121502985e-05, "loss": 1.0722, "step": 1100 }, { "epoch": 0.51, "learning_rate": 7.843926801669984e-05, "loss": 0.6586, "step": 1200 }, { "epoch": 0.56, "learning_rate": 7.828319481836982e-05, "loss": 0.5673, "step": 1300 }, { "epoch": 0.6, "learning_rate": 7.81271216200398e-05, "loss": 0.5194, "step": 1400 }, { "epoch": 0.64, "learning_rate": 7.797104842170979e-05, "loss": 0.4766, "step": 1500 }, { "epoch": 0.68, "learning_rate": 7.781497522337976e-05, "loss": 0.4481, "step": 1600 }, { "epoch": 0.73, "learning_rate": 7.765890202504975e-05, "loss": 0.4148, "step": 1700 }, { "epoch": 0.77, "learning_rate": 7.750282882671974e-05, "loss": 0.3991, "step": 1800 }, { "epoch": 0.81, "learning_rate": 7.734675562838972e-05, "loss": 0.3878, "step": 1900 }, { "epoch": 0.86, "learning_rate": 7.719068243005971e-05, "loss": 0.3539, "step": 2000 }, { "epoch": 0.9, "learning_rate": 7.703460923172969e-05, "loss": 0.342, "step": 2100 }, { "epoch": 0.94, "learning_rate": 7.687853603339967e-05, "loss": 0.3232, "step": 2200 }, { "epoch": 0.98, "learning_rate": 7.672246283506966e-05, "loss": 0.3185, "step": 2300 }, { "epoch": 1.0, "eval_validation_all_ex_match_acc": 0.38741622486746025, "eval_validation_all_ex_match_acc_stderr": 0.004872331191643575, "eval_validation_all_intent_acc": 0.7521256376913074, "eval_validation_all_intent_acc_stderr": 0.004318432694874057, "eval_validation_all_loss": 0.2968784272670746, "eval_validation_all_runtime": 177.009, "eval_validation_all_samples_per_second": 56.477, "eval_validation_all_slot_micro_f1": 0.47014790945299517, "eval_validation_all_slot_micro_f1_stderr": 0.0019384774099713345, "eval_validation_all_steps_per_second": 1.768, "step": 2339 }, { "epoch": 1.03, "learning_rate": 7.656638963673963e-05, "loss": 0.3149, "step": 2400 }, { "epoch": 1.07, "learning_rate": 7.641031643840962e-05, "loss": 0.2844, "step": 2500 }, { "epoch": 1.11, "learning_rate": 7.62542432400796e-05, "loss": 0.2801, "step": 2600 }, { "epoch": 1.15, "learning_rate": 7.609817004174959e-05, "loss": 0.2616, "step": 2700 }, { "epoch": 1.2, "learning_rate": 7.594209684341957e-05, "loss": 0.2654, "step": 2800 }, { "epoch": 1.24, "learning_rate": 7.578602364508956e-05, "loss": 0.2536, "step": 2900 }, { "epoch": 1.28, "learning_rate": 7.562995044675953e-05, "loss": 0.2433, "step": 3000 }, { "epoch": 1.33, "learning_rate": 7.547387724842952e-05, "loss": 0.2407, "step": 3100 }, { "epoch": 1.37, "learning_rate": 7.53178040500995e-05, "loss": 0.2346, "step": 3200 }, { "epoch": 1.41, "learning_rate": 7.516173085176949e-05, "loss": 0.2374, "step": 3300 }, { "epoch": 1.45, "learning_rate": 7.500565765343947e-05, "loss": 0.2267, "step": 3400 }, { "epoch": 1.5, "learning_rate": 7.484958445510945e-05, "loss": 0.2177, "step": 3500 }, { "epoch": 1.54, "learning_rate": 7.469351125677944e-05, "loss": 0.2198, "step": 3600 }, { "epoch": 1.58, "learning_rate": 7.453743805844941e-05, "loss": 0.2116, "step": 3700 }, { "epoch": 1.62, "learning_rate": 7.43813648601194e-05, "loss": 0.2103, "step": 3800 }, { "epoch": 1.67, "learning_rate": 7.422529166178938e-05, "loss": 0.2044, "step": 3900 }, { "epoch": 1.71, "learning_rate": 7.406921846345937e-05, "loss": 0.2018, "step": 4000 }, { "epoch": 1.75, "learning_rate": 7.391314526512936e-05, "loss": 0.2038, "step": 4100 }, { "epoch": 1.8, "learning_rate": 7.375707206679934e-05, "loss": 0.199, "step": 4200 }, { "epoch": 1.84, "learning_rate": 7.360099886846931e-05, "loss": 0.1903, "step": 4300 }, { "epoch": 1.88, "learning_rate": 7.34449256701393e-05, "loss": 0.1908, "step": 4400 }, { "epoch": 1.92, "learning_rate": 7.328885247180928e-05, "loss": 0.1877, "step": 4500 }, { "epoch": 1.97, "learning_rate": 7.313277927347926e-05, "loss": 0.1878, "step": 4600 }, { "epoch": 2.0, "eval_validation_all_ex_match_acc": 0.5762728818645594, "eval_validation_all_ex_match_acc_stderr": 0.004942223428876296, "eval_validation_all_intent_acc": 0.7468240472141643, "eval_validation_all_intent_acc_stderr": 0.004348961141535433, "eval_validation_all_loss": 0.1461012214422226, "eval_validation_all_runtime": 180.06, "eval_validation_all_samples_per_second": 55.52, "eval_validation_all_slot_micro_f1": 0.7408780686564123, "eval_validation_all_slot_micro_f1_stderr": 0.0017017337770766719, "eval_validation_all_steps_per_second": 1.738, "step": 4678 }, { "epoch": 2.01, "learning_rate": 7.297670607514925e-05, "loss": 0.1804, "step": 4700 }, { "epoch": 2.05, "learning_rate": 7.282063287681923e-05, "loss": 0.17, "step": 4800 }, { "epoch": 2.09, "learning_rate": 7.266455967848922e-05, "loss": 0.1696, "step": 4900 }, { "epoch": 2.14, "learning_rate": 7.250848648015921e-05, "loss": 0.1644, "step": 5000 }, { "epoch": 2.18, "learning_rate": 7.235241328182918e-05, "loss": 0.1651, "step": 5100 }, { "epoch": 2.22, "learning_rate": 7.219634008349917e-05, "loss": 0.1696, "step": 5200 }, { "epoch": 2.27, "learning_rate": 7.204026688516915e-05, "loss": 0.1668, "step": 5300 }, { "epoch": 2.31, "learning_rate": 7.188419368683913e-05, "loss": 0.1615, "step": 5400 }, { "epoch": 2.35, "learning_rate": 7.172812048850912e-05, "loss": 0.1577, "step": 5500 }, { "epoch": 2.39, "learning_rate": 7.15720472901791e-05, "loss": 0.1592, "step": 5600 }, { "epoch": 2.44, "learning_rate": 7.141597409184909e-05, "loss": 0.1542, "step": 5700 }, { "epoch": 2.48, "learning_rate": 7.125990089351906e-05, "loss": 0.1543, "step": 5800 }, { "epoch": 2.52, "learning_rate": 7.110382769518905e-05, "loss": 0.1532, "step": 5900 }, { "epoch": 2.57, "learning_rate": 7.094775449685903e-05, "loss": 0.1559, "step": 6000 }, { "epoch": 2.61, "learning_rate": 7.079168129852902e-05, "loss": 0.1524, "step": 6100 }, { "epoch": 2.65, "learning_rate": 7.0635608100199e-05, "loss": 0.1509, "step": 6200 }, { "epoch": 2.69, "learning_rate": 7.047953490186899e-05, "loss": 0.1453, "step": 6300 }, { "epoch": 2.74, "learning_rate": 7.032346170353896e-05, "loss": 0.1474, "step": 6400 }, { "epoch": 2.78, "learning_rate": 7.016738850520894e-05, "loss": 0.1444, "step": 6500 }, { "epoch": 2.82, "learning_rate": 7.001131530687893e-05, "loss": 0.1468, "step": 6600 }, { "epoch": 2.86, "learning_rate": 6.985524210854891e-05, "loss": 0.1421, "step": 6700 }, { "epoch": 2.91, "learning_rate": 6.96991689102189e-05, "loss": 0.1427, "step": 6800 }, { "epoch": 2.95, "learning_rate": 6.954309571188888e-05, "loss": 0.14, "step": 6900 }, { "epoch": 2.99, "learning_rate": 6.938702251355887e-05, "loss": 0.1364, "step": 7000 }, { "epoch": 3.0, "eval_validation_all_ex_match_acc": 0.5900770231069321, "eval_validation_all_ex_match_acc_stderr": 0.004918929939767899, "eval_validation_all_intent_acc": 0.7644293287986396, "eval_validation_all_intent_acc_stderr": 0.004244186252062078, "eval_validation_all_loss": 0.1383330225944519, "eval_validation_all_runtime": 180.3126, "eval_validation_all_samples_per_second": 55.443, "eval_validation_all_slot_micro_f1": 0.7396164830681355, "eval_validation_all_slot_micro_f1_stderr": 0.0017044183392999975, "eval_validation_all_steps_per_second": 1.736, "step": 7017 }, { "epoch": 3.04, "learning_rate": 6.923094931522886e-05, "loss": 0.1338, "step": 7100 }, { "epoch": 3.08, "learning_rate": 6.907487611689883e-05, "loss": 0.1305, "step": 7200 }, { "epoch": 3.12, "learning_rate": 6.891880291856881e-05, "loss": 0.1255, "step": 7300 }, { "epoch": 3.16, "learning_rate": 6.87627297202388e-05, "loss": 0.1302, "step": 7400 }, { "epoch": 3.21, "learning_rate": 6.860665652190878e-05, "loss": 0.1312, "step": 7500 }, { "epoch": 3.25, "learning_rate": 6.845058332357877e-05, "loss": 0.1309, "step": 7600 }, { "epoch": 3.29, "learning_rate": 6.829451012524874e-05, "loss": 0.1272, "step": 7700 }, { "epoch": 3.33, "learning_rate": 6.813843692691872e-05, "loss": 0.1277, "step": 7800 }, { "epoch": 3.38, "learning_rate": 6.798236372858871e-05, "loss": 0.1297, "step": 7900 }, { "epoch": 3.42, "learning_rate": 6.78262905302587e-05, "loss": 0.1239, "step": 8000 }, { "epoch": 3.46, "learning_rate": 6.767021733192868e-05, "loss": 0.1227, "step": 8100 }, { "epoch": 3.51, "learning_rate": 6.751414413359867e-05, "loss": 0.1241, "step": 8200 }, { "epoch": 3.55, "learning_rate": 6.735807093526865e-05, "loss": 0.1224, "step": 8300 }, { "epoch": 3.59, "learning_rate": 6.720199773693864e-05, "loss": 0.1234, "step": 8400 }, { "epoch": 3.63, "learning_rate": 6.704592453860861e-05, "loss": 0.1218, "step": 8500 }, { "epoch": 3.68, "learning_rate": 6.688985134027859e-05, "loss": 0.121, "step": 8600 }, { "epoch": 3.72, "learning_rate": 6.673377814194858e-05, "loss": 0.1182, "step": 8700 }, { "epoch": 3.76, "learning_rate": 6.657770494361856e-05, "loss": 0.1219, "step": 8800 }, { "epoch": 3.81, "learning_rate": 6.642163174528855e-05, "loss": 0.1198, "step": 8900 }, { "epoch": 3.85, "learning_rate": 6.626555854695853e-05, "loss": 0.1213, "step": 9000 }, { "epoch": 3.89, "learning_rate": 6.610948534862852e-05, "loss": 0.1202, "step": 9100 }, { "epoch": 3.93, "learning_rate": 6.59534121502985e-05, "loss": 0.1147, "step": 9200 }, { "epoch": 3.98, "learning_rate": 6.579733895196848e-05, "loss": 0.1191, "step": 9300 }, { "epoch": 4.0, "eval_validation_all_ex_match_acc": 0.5862758827648294, "eval_validation_all_ex_match_acc_stderr": 0.004925741170944722, "eval_validation_all_intent_acc": 0.7755326597979394, "eval_validation_all_intent_acc_stderr": 0.004172936538970462, "eval_validation_all_loss": 0.14330914616584778, "eval_validation_all_runtime": 182.6873, "eval_validation_all_samples_per_second": 54.722, "eval_validation_all_slot_micro_f1": 0.7288510293073724, "eval_validation_all_slot_micro_f1_stderr": 0.0017265912249300535, "eval_validation_all_steps_per_second": 1.713, "step": 9356 }, { "epoch": 4.02, "learning_rate": 6.564126575363846e-05, "loss": 0.1091, "step": 9400 }, { "epoch": 4.06, "learning_rate": 6.548519255530845e-05, "loss": 0.1111, "step": 9500 }, { "epoch": 4.1, "learning_rate": 6.532911935697843e-05, "loss": 0.111, "step": 9600 }, { "epoch": 4.15, "learning_rate": 6.51730461586484e-05, "loss": 0.109, "step": 9700 }, { "epoch": 4.19, "learning_rate": 6.50169729603184e-05, "loss": 0.1177, "step": 9800 }, { "epoch": 4.23, "learning_rate": 6.486089976198837e-05, "loss": 0.1069, "step": 9900 }, { "epoch": 4.28, "learning_rate": 6.470482656365836e-05, "loss": 0.1133, "step": 10000 }, { "epoch": 4.32, "learning_rate": 6.454875336532834e-05, "loss": 0.1055, "step": 10100 }, { "epoch": 4.36, "learning_rate": 6.439268016699833e-05, "loss": 0.1048, "step": 10200 }, { "epoch": 4.4, "learning_rate": 6.423660696866832e-05, "loss": 0.1081, "step": 10300 }, { "epoch": 4.45, "learning_rate": 6.40805337703383e-05, "loss": 0.1073, "step": 10400 }, { "epoch": 4.49, "learning_rate": 6.392446057200827e-05, "loss": 0.1055, "step": 10500 }, { "epoch": 4.53, "learning_rate": 6.376838737367826e-05, "loss": 0.1032, "step": 10600 }, { "epoch": 4.57, "learning_rate": 6.361231417534824e-05, "loss": 0.1045, "step": 10700 }, { "epoch": 4.62, "learning_rate": 6.345624097701823e-05, "loss": 0.1062, "step": 10800 }, { "epoch": 4.66, "learning_rate": 6.330016777868821e-05, "loss": 0.0999, "step": 10900 }, { "epoch": 4.7, "learning_rate": 6.314409458035818e-05, "loss": 0.1036, "step": 11000 }, { "epoch": 4.75, "learning_rate": 6.298802138202817e-05, "loss": 0.1063, "step": 11100 }, { "epoch": 4.79, "learning_rate": 6.283194818369816e-05, "loss": 0.0997, "step": 11200 }, { "epoch": 4.83, "learning_rate": 6.267587498536814e-05, "loss": 0.1019, "step": 11300 }, { "epoch": 4.87, "learning_rate": 6.251980178703813e-05, "loss": 0.1009, "step": 11400 }, { "epoch": 4.92, "learning_rate": 6.236372858870811e-05, "loss": 0.1016, "step": 11500 }, { "epoch": 4.96, "learning_rate": 6.22076553903781e-05, "loss": 0.0967, "step": 11600 }, { "epoch": 5.0, "eval_validation_all_ex_match_acc": 0.6202860858257477, "eval_validation_all_ex_match_acc_stderr": 0.004853884404661511, "eval_validation_all_intent_acc": 0.771631489446834, "eval_validation_all_intent_acc_stderr": 0.0041984427435098615, "eval_validation_all_loss": 0.12277400493621826, "eval_validation_all_runtime": 180.5655, "eval_validation_all_samples_per_second": 55.365, "eval_validation_all_slot_micro_f1": 0.7819573425859782, "eval_validation_all_slot_micro_f1_stderr": 0.0016037195659180058, "eval_validation_all_steps_per_second": 1.733, "step": 11695 }, { "epoch": 5.0, "learning_rate": 6.205158219204808e-05, "loss": 0.1019, "step": 11700 }, { "epoch": 5.04, "learning_rate": 6.189550899371805e-05, "loss": 0.0958, "step": 11800 }, { "epoch": 5.09, "learning_rate": 6.173943579538804e-05, "loss": 0.0913, "step": 11900 }, { "epoch": 5.13, "learning_rate": 6.158336259705802e-05, "loss": 0.0914, "step": 12000 }, { "epoch": 5.17, "learning_rate": 6.142728939872801e-05, "loss": 0.092, "step": 12100 }, { "epoch": 5.22, "learning_rate": 6.127121620039799e-05, "loss": 0.0934, "step": 12200 }, { "epoch": 5.26, "learning_rate": 6.111514300206798e-05, "loss": 0.0918, "step": 12300 }, { "epoch": 5.3, "learning_rate": 6.095906980373796e-05, "loss": 0.09, "step": 12400 }, { "epoch": 5.34, "learning_rate": 6.0802996605407945e-05, "loss": 0.0896, "step": 12500 }, { "epoch": 5.39, "learning_rate": 6.064692340707792e-05, "loss": 0.0938, "step": 12600 }, { "epoch": 5.43, "learning_rate": 6.049085020874791e-05, "loss": 0.0927, "step": 12700 }, { "epoch": 5.47, "learning_rate": 6.033477701041789e-05, "loss": 0.0935, "step": 12800 }, { "epoch": 5.52, "learning_rate": 6.017870381208787e-05, "loss": 0.0904, "step": 12900 }, { "epoch": 5.56, "learning_rate": 6.002263061375786e-05, "loss": 0.0923, "step": 13000 }, { "epoch": 5.6, "learning_rate": 5.986655741542784e-05, "loss": 0.0911, "step": 13100 }, { "epoch": 5.64, "learning_rate": 5.971048421709782e-05, "loss": 0.0894, "step": 13200 }, { "epoch": 5.69, "learning_rate": 5.955441101876781e-05, "loss": 0.0902, "step": 13300 }, { "epoch": 5.73, "learning_rate": 5.939833782043779e-05, "loss": 0.091, "step": 13400 }, { "epoch": 5.77, "learning_rate": 5.9242264622107775e-05, "loss": 0.0904, "step": 13500 }, { "epoch": 5.81, "learning_rate": 5.908619142377776e-05, "loss": 0.0912, "step": 13600 }, { "epoch": 5.86, "learning_rate": 5.8930118225447735e-05, "loss": 0.0914, "step": 13700 }, { "epoch": 5.9, "learning_rate": 5.8774045027117726e-05, "loss": 0.0884, "step": 13800 }, { "epoch": 5.94, "learning_rate": 5.86179718287877e-05, "loss": 0.0856, "step": 13900 }, { "epoch": 5.99, "learning_rate": 5.8461898630457686e-05, "loss": 0.0874, "step": 14000 }, { "epoch": 6.0, "eval_validation_all_ex_match_acc": 0.6209862958887666, "eval_validation_all_ex_match_acc_stderr": 0.004852143300552274, "eval_validation_all_intent_acc": 0.7756326898069421, "eval_validation_all_intent_acc_stderr": 0.0041722756855579295, "eval_validation_all_loss": 0.12295829504728317, "eval_validation_all_runtime": 180.0678, "eval_validation_all_samples_per_second": 55.518, "eval_validation_all_slot_micro_f1": 0.7860288720332762, "eval_validation_all_slot_micro_f1_stderr": 0.0015928064325804261, "eval_validation_all_steps_per_second": 1.738, "step": 14034 }, { "epoch": 6.03, "learning_rate": 5.830582543212768e-05, "loss": 0.0846, "step": 14100 }, { "epoch": 6.07, "learning_rate": 5.8149752233797653e-05, "loss": 0.0821, "step": 14200 }, { "epoch": 6.11, "learning_rate": 5.7993679035467644e-05, "loss": 0.0845, "step": 14300 }, { "epoch": 6.16, "learning_rate": 5.783760583713762e-05, "loss": 0.0831, "step": 14400 }, { "epoch": 6.2, "learning_rate": 5.7681532638807604e-05, "loss": 0.0805, "step": 14500 }, { "epoch": 6.24, "learning_rate": 5.752545944047759e-05, "loss": 0.0816, "step": 14600 }, { "epoch": 6.28, "learning_rate": 5.736938624214757e-05, "loss": 0.0813, "step": 14700 }, { "epoch": 6.33, "learning_rate": 5.721331304381755e-05, "loss": 0.08, "step": 14800 }, { "epoch": 6.37, "learning_rate": 5.705723984548754e-05, "loss": 0.0777, "step": 14900 }, { "epoch": 6.41, "learning_rate": 5.690116664715752e-05, "loss": 0.0803, "step": 15000 }, { "epoch": 6.46, "learning_rate": 5.6745093448827506e-05, "loss": 0.0807, "step": 15100 }, { "epoch": 6.5, "learning_rate": 5.658902025049749e-05, "loss": 0.0829, "step": 15200 }, { "epoch": 6.54, "learning_rate": 5.6432947052167467e-05, "loss": 0.08, "step": 15300 }, { "epoch": 6.58, "learning_rate": 5.627687385383746e-05, "loss": 0.0798, "step": 15400 }, { "epoch": 6.63, "learning_rate": 5.6120800655507434e-05, "loss": 0.0813, "step": 15500 }, { "epoch": 6.67, "learning_rate": 5.596472745717742e-05, "loss": 0.081, "step": 15600 }, { "epoch": 6.71, "learning_rate": 5.580865425884741e-05, "loss": 0.0796, "step": 15700 }, { "epoch": 6.76, "learning_rate": 5.5652581060517385e-05, "loss": 0.0776, "step": 15800 }, { "epoch": 6.8, "learning_rate": 5.5496507862187375e-05, "loss": 0.0797, "step": 15900 }, { "epoch": 6.84, "learning_rate": 5.534043466385735e-05, "loss": 0.0816, "step": 16000 }, { "epoch": 6.88, "learning_rate": 5.5184361465527336e-05, "loss": 0.0776, "step": 16100 }, { "epoch": 6.93, "learning_rate": 5.5028288267197326e-05, "loss": 0.0806, "step": 16200 }, { "epoch": 6.97, "learning_rate": 5.48722150688673e-05, "loss": 0.0793, "step": 16300 }, { "epoch": 7.0, "eval_validation_all_ex_match_acc": 0.6250875262578773, "eval_validation_all_ex_match_acc_stderr": 0.004841729420424345, "eval_validation_all_intent_acc": 0.77333199959988, "eval_validation_all_intent_acc_stderr": 0.004187388454167221, "eval_validation_all_loss": 0.12834610044956207, "eval_validation_all_runtime": 179.7767, "eval_validation_all_samples_per_second": 55.608, "eval_validation_all_slot_micro_f1": 0.7850994663777063, "eval_validation_all_slot_micro_f1_stderr": 0.0015953179494001197, "eval_validation_all_steps_per_second": 1.741, "step": 16373 }, { "epoch": 7.01, "learning_rate": 5.471614187053728e-05, "loss": 0.0767, "step": 16400 }, { "epoch": 7.05, "learning_rate": 5.456006867220727e-05, "loss": 0.0738, "step": 16500 }, { "epoch": 7.1, "learning_rate": 5.4403995473877254e-05, "loss": 0.0701, "step": 16600 }, { "epoch": 7.14, "learning_rate": 5.424792227554724e-05, "loss": 0.0705, "step": 16700 }, { "epoch": 7.18, "learning_rate": 5.409184907721722e-05, "loss": 0.0724, "step": 16800 }, { "epoch": 7.23, "learning_rate": 5.39357758788872e-05, "loss": 0.0711, "step": 16900 }, { "epoch": 7.27, "learning_rate": 5.377970268055719e-05, "loss": 0.0727, "step": 17000 }, { "epoch": 7.31, "learning_rate": 5.362362948222717e-05, "loss": 0.0714, "step": 17100 }, { "epoch": 7.35, "learning_rate": 5.346755628389715e-05, "loss": 0.0711, "step": 17200 }, { "epoch": 7.4, "learning_rate": 5.331148308556714e-05, "loss": 0.0699, "step": 17300 }, { "epoch": 7.44, "learning_rate": 5.3155409887237116e-05, "loss": 0.0692, "step": 17400 }, { "epoch": 7.48, "learning_rate": 5.2999336688907106e-05, "loss": 0.0711, "step": 17500 }, { "epoch": 7.52, "learning_rate": 5.284326349057708e-05, "loss": 0.073, "step": 17600 }, { "epoch": 7.57, "learning_rate": 5.268719029224707e-05, "loss": 0.0701, "step": 17700 }, { "epoch": 7.61, "learning_rate": 5.253111709391706e-05, "loss": 0.0709, "step": 17800 }, { "epoch": 7.65, "learning_rate": 5.2375043895587034e-05, "loss": 0.0748, "step": 17900 }, { "epoch": 7.7, "learning_rate": 5.221897069725701e-05, "loss": 0.0713, "step": 18000 }, { "epoch": 7.74, "learning_rate": 5.2062897498927e-05, "loss": 0.0706, "step": 18100 }, { "epoch": 7.78, "learning_rate": 5.1906824300596985e-05, "loss": 0.0748, "step": 18200 }, { "epoch": 7.82, "learning_rate": 5.175075110226697e-05, "loss": 0.0694, "step": 18300 }, { "epoch": 7.87, "learning_rate": 5.159467790393695e-05, "loss": 0.072, "step": 18400 }, { "epoch": 7.91, "learning_rate": 5.143860470560693e-05, "loss": 0.0705, "step": 18500 }, { "epoch": 7.95, "learning_rate": 5.128253150727692e-05, "loss": 0.068, "step": 18600 }, { "epoch": 7.99, "learning_rate": 5.11264583089469e-05, "loss": 0.0687, "step": 18700 }, { "epoch": 8.0, "eval_validation_all_ex_match_acc": 0.630589176753026, "eval_validation_all_ex_match_acc_stderr": 0.0048271769371602165, "eval_validation_all_intent_acc": 0.7778333500050015, "eval_validation_all_intent_acc_stderr": 0.004157649429320968, "eval_validation_all_loss": 0.1259431391954422, "eval_validation_all_runtime": 170.3009, "eval_validation_all_samples_per_second": 58.702, "eval_validation_all_slot_micro_f1": 0.7861930842802256, "eval_validation_all_slot_micro_f1_stderr": 0.001592361422278994, "eval_validation_all_steps_per_second": 1.838, "step": 18712 }, { "epoch": 8.04, "learning_rate": 5.097038511061688e-05, "loss": 0.0635, "step": 18800 }, { "epoch": 8.08, "learning_rate": 5.081431191228687e-05, "loss": 0.0628, "step": 18900 }, { "epoch": 8.12, "learning_rate": 5.065823871395685e-05, "loss": 0.0674, "step": 19000 }, { "epoch": 8.17, "learning_rate": 5.050216551562683e-05, "loss": 0.064, "step": 19100 }, { "epoch": 8.21, "learning_rate": 5.0346092317296815e-05, "loss": 0.0634, "step": 19200 }, { "epoch": 8.25, "learning_rate": 5.01900191189668e-05, "loss": 0.0665, "step": 19300 }, { "epoch": 8.29, "learning_rate": 5.003394592063679e-05, "loss": 0.064, "step": 19400 }, { "epoch": 8.34, "learning_rate": 4.9877872722306765e-05, "loss": 0.0646, "step": 19500 }, { "epoch": 8.38, "learning_rate": 4.972179952397675e-05, "loss": 0.0669, "step": 19600 }, { "epoch": 8.42, "learning_rate": 4.956572632564673e-05, "loss": 0.0629, "step": 19700 }, { "epoch": 8.47, "learning_rate": 4.9409653127316716e-05, "loss": 0.0652, "step": 19800 }, { "epoch": 8.51, "learning_rate": 4.925357992898669e-05, "loss": 0.0642, "step": 19900 }, { "epoch": 8.55, "learning_rate": 4.9097506730656684e-05, "loss": 0.0652, "step": 20000 }, { "epoch": 8.59, "learning_rate": 4.894143353232666e-05, "loss": 0.0659, "step": 20100 }, { "epoch": 8.64, "learning_rate": 4.878536033399665e-05, "loss": 0.0632, "step": 20200 }, { "epoch": 8.68, "learning_rate": 4.8629287135666634e-05, "loss": 0.0655, "step": 20300 }, { "epoch": 8.72, "learning_rate": 4.847321393733661e-05, "loss": 0.0624, "step": 20400 }, { "epoch": 8.76, "learning_rate": 4.83171407390066e-05, "loss": 0.0646, "step": 20500 }, { "epoch": 8.81, "learning_rate": 4.816106754067658e-05, "loss": 0.0659, "step": 20600 }, { "epoch": 8.85, "learning_rate": 4.800499434234656e-05, "loss": 0.064, "step": 20700 }, { "epoch": 8.89, "learning_rate": 4.7848921144016546e-05, "loss": 0.0642, "step": 20800 }, { "epoch": 8.94, "learning_rate": 4.769284794568653e-05, "loss": 0.0665, "step": 20900 }, { "epoch": 8.98, "learning_rate": 4.753677474735652e-05, "loss": 0.0626, "step": 21000 }, { "epoch": 9.0, "eval_validation_all_ex_match_acc": 0.6302890867260178, "eval_validation_all_ex_match_acc_stderr": 0.0048279880103909295, "eval_validation_all_intent_acc": 0.7782334700410123, "eval_validation_all_intent_acc_stderr": 0.004154972051978152, "eval_validation_all_loss": 0.1310112625360489, "eval_validation_all_runtime": 170.1316, "eval_validation_all_samples_per_second": 58.76, "eval_validation_all_slot_micro_f1": 0.789992449441871, "eval_validation_all_slot_micro_f1_stderr": 0.0015819585080535143, "eval_validation_all_steps_per_second": 1.84, "step": 21051 }, { "epoch": 9.02, "learning_rate": 4.73807015490265e-05, "loss": 0.0602, "step": 21100 }, { "epoch": 9.06, "learning_rate": 4.722462835069648e-05, "loss": 0.0594, "step": 21200 }, { "epoch": 9.11, "learning_rate": 4.7068555152366464e-05, "loss": 0.0583, "step": 21300 }, { "epoch": 9.15, "learning_rate": 4.691248195403645e-05, "loss": 0.0584, "step": 21400 }, { "epoch": 9.19, "learning_rate": 4.6756408755706424e-05, "loss": 0.0565, "step": 21500 }, { "epoch": 9.23, "learning_rate": 4.6600335557376415e-05, "loss": 0.0568, "step": 21600 }, { "epoch": 9.28, "learning_rate": 4.644426235904639e-05, "loss": 0.0586, "step": 21700 }, { "epoch": 9.32, "learning_rate": 4.628818916071638e-05, "loss": 0.0552, "step": 21800 }, { "epoch": 9.36, "learning_rate": 4.6132115962386366e-05, "loss": 0.0608, "step": 21900 }, { "epoch": 9.41, "learning_rate": 4.597604276405634e-05, "loss": 0.0595, "step": 22000 }, { "epoch": 9.45, "learning_rate": 4.581996956572633e-05, "loss": 0.0574, "step": 22100 }, { "epoch": 9.49, "learning_rate": 4.566389636739631e-05, "loss": 0.0572, "step": 22200 }, { "epoch": 9.53, "learning_rate": 4.5507823169066293e-05, "loss": 0.0585, "step": 22300 }, { "epoch": 9.58, "learning_rate": 4.5351749970736284e-05, "loss": 0.0584, "step": 22400 }, { "epoch": 9.62, "learning_rate": 4.519567677240626e-05, "loss": 0.0575, "step": 22500 }, { "epoch": 9.66, "learning_rate": 4.503960357407625e-05, "loss": 0.0601, "step": 22600 }, { "epoch": 9.71, "learning_rate": 4.488353037574623e-05, "loss": 0.0586, "step": 22700 }, { "epoch": 9.75, "learning_rate": 4.472745717741621e-05, "loss": 0.0576, "step": 22800 }, { "epoch": 9.79, "learning_rate": 4.4571383979086195e-05, "loss": 0.057, "step": 22900 }, { "epoch": 9.83, "learning_rate": 4.441531078075618e-05, "loss": 0.057, "step": 23000 }, { "epoch": 9.88, "learning_rate": 4.4259237582426156e-05, "loss": 0.0596, "step": 23100 }, { "epoch": 9.92, "learning_rate": 4.4103164384096146e-05, "loss": 0.0592, "step": 23200 }, { "epoch": 9.96, "learning_rate": 4.394709118576613e-05, "loss": 0.058, "step": 23300 }, { "epoch": 10.0, "eval_validation_all_ex_match_acc": 0.6262878863659098, "eval_validation_all_ex_match_acc_stderr": 0.00483861143551642, "eval_validation_all_intent_acc": 0.7731319395818745, "eval_validation_all_intent_acc_stderr": 0.00418869405726844, "eval_validation_all_loss": 0.13434068858623505, "eval_validation_all_runtime": 178.1682, "eval_validation_all_samples_per_second": 56.11, "eval_validation_all_slot_micro_f1": 0.7893436766357048, "eval_validation_all_slot_micro_f1_stderr": 0.0015837494632691346, "eval_validation_all_steps_per_second": 1.757, "step": 23390 }, { "epoch": 10.0, "learning_rate": 4.379101798743611e-05, "loss": 0.0602, "step": 23400 }, { "epoch": 10.05, "learning_rate": 4.36349447891061e-05, "loss": 0.0527, "step": 23500 }, { "epoch": 10.09, "learning_rate": 4.3478871590776074e-05, "loss": 0.0558, "step": 23600 }, { "epoch": 10.13, "learning_rate": 4.3322798392446064e-05, "loss": 0.0515, "step": 23700 }, { "epoch": 10.18, "learning_rate": 4.316672519411604e-05, "loss": 0.0521, "step": 23800 }, { "epoch": 10.22, "learning_rate": 4.3010651995786025e-05, "loss": 0.0512, "step": 23900 }, { "epoch": 10.26, "learning_rate": 4.2854578797456015e-05, "loss": 0.0525, "step": 24000 }, { "epoch": 10.3, "learning_rate": 4.269850559912599e-05, "loss": 0.053, "step": 24100 }, { "epoch": 10.35, "learning_rate": 4.254243240079598e-05, "loss": 0.0531, "step": 24200 }, { "epoch": 10.39, "learning_rate": 4.238635920246596e-05, "loss": 0.0536, "step": 24300 }, { "epoch": 10.43, "learning_rate": 4.223028600413594e-05, "loss": 0.0543, "step": 24400 }, { "epoch": 10.47, "learning_rate": 4.2074212805805926e-05, "loss": 0.0521, "step": 24500 }, { "epoch": 10.52, "learning_rate": 4.191813960747591e-05, "loss": 0.0527, "step": 24600 }, { "epoch": 10.56, "learning_rate": 4.176206640914589e-05, "loss": 0.0554, "step": 24700 }, { "epoch": 10.6, "learning_rate": 4.160599321081588e-05, "loss": 0.0529, "step": 24800 }, { "epoch": 10.65, "learning_rate": 4.144992001248586e-05, "loss": 0.0541, "step": 24900 }, { "epoch": 10.69, "learning_rate": 4.1293846814155845e-05, "loss": 0.0556, "step": 25000 }, { "epoch": 10.73, "learning_rate": 4.113777361582583e-05, "loss": 0.0512, "step": 25100 }, { "epoch": 10.77, "learning_rate": 4.0981700417495805e-05, "loss": 0.0535, "step": 25200 }, { "epoch": 10.82, "learning_rate": 4.0825627219165796e-05, "loss": 0.0523, "step": 25300 }, { "epoch": 10.86, "learning_rate": 4.066955402083577e-05, "loss": 0.0517, "step": 25400 }, { "epoch": 10.9, "learning_rate": 4.0513480822505756e-05, "loss": 0.0537, "step": 25500 }, { "epoch": 10.94, "learning_rate": 4.0357407624175746e-05, "loss": 0.054, "step": 25600 }, { "epoch": 10.99, "learning_rate": 4.020133442584572e-05, "loss": 0.0519, "step": 25700 }, { "epoch": 11.0, "eval_validation_all_ex_match_acc": 0.6264879463839151, "eval_validation_all_ex_match_acc_stderr": 0.004838088679693641, "eval_validation_all_intent_acc": 0.7737321196358907, "eval_validation_all_intent_acc_stderr": 0.004184773155844174, "eval_validation_all_loss": 0.1407793015241623, "eval_validation_all_runtime": 181.9271, "eval_validation_all_samples_per_second": 54.951, "eval_validation_all_slot_micro_f1": 0.7874137545030835, "eval_validation_all_slot_micro_f1_stderr": 0.0015890415122001155, "eval_validation_all_steps_per_second": 1.72, "step": 25729 }, { "epoch": 11.03, "learning_rate": 4.004526122751571e-05, "loss": 0.0513, "step": 25800 }, { "epoch": 11.07, "learning_rate": 3.988918802918569e-05, "loss": 0.0493, "step": 25900 }, { "epoch": 11.12, "learning_rate": 3.9733114830855674e-05, "loss": 0.0491, "step": 26000 }, { "epoch": 11.16, "learning_rate": 3.957704163252566e-05, "loss": 0.0473, "step": 26100 }, { "epoch": 11.2, "learning_rate": 3.942096843419564e-05, "loss": 0.0488, "step": 26200 }, { "epoch": 11.24, "learning_rate": 3.9264895235865625e-05, "loss": 0.0494, "step": 26300 }, { "epoch": 11.29, "learning_rate": 3.910882203753561e-05, "loss": 0.048, "step": 26400 }, { "epoch": 11.33, "learning_rate": 3.895274883920559e-05, "loss": 0.0478, "step": 26500 }, { "epoch": 11.37, "learning_rate": 3.8796675640875576e-05, "loss": 0.049, "step": 26600 }, { "epoch": 11.42, "learning_rate": 3.864060244254556e-05, "loss": 0.0505, "step": 26700 }, { "epoch": 11.46, "learning_rate": 3.8484529244215536e-05, "loss": 0.0499, "step": 26800 }, { "epoch": 11.5, "learning_rate": 3.832845604588552e-05, "loss": 0.0495, "step": 26900 }, { "epoch": 11.54, "learning_rate": 3.8172382847555504e-05, "loss": 0.0501, "step": 27000 }, { "epoch": 11.59, "learning_rate": 3.8016309649225494e-05, "loss": 0.047, "step": 27100 }, { "epoch": 11.63, "learning_rate": 3.786023645089547e-05, "loss": 0.0472, "step": 27200 }, { "epoch": 11.67, "learning_rate": 3.7704163252565455e-05, "loss": 0.0471, "step": 27300 }, { "epoch": 11.71, "learning_rate": 3.754809005423544e-05, "loss": 0.0488, "step": 27400 }, { "epoch": 11.76, "learning_rate": 3.739201685590542e-05, "loss": 0.0495, "step": 27500 }, { "epoch": 11.8, "learning_rate": 3.7235943657575405e-05, "loss": 0.0465, "step": 27600 }, { "epoch": 11.84, "learning_rate": 3.707987045924539e-05, "loss": 0.0476, "step": 27700 }, { "epoch": 11.89, "learning_rate": 3.692379726091537e-05, "loss": 0.048, "step": 27800 }, { "epoch": 11.93, "learning_rate": 3.6767724062585356e-05, "loss": 0.0487, "step": 27900 }, { "epoch": 11.97, "learning_rate": 3.661165086425534e-05, "loss": 0.0504, "step": 28000 }, { "epoch": 12.0, "eval_validation_all_ex_match_acc": 0.6299889966990097, "eval_validation_all_ex_match_acc_stderr": 0.004828797081892748, "eval_validation_all_intent_acc": 0.7760328098429529, "eval_validation_all_intent_acc_stderr": 0.004169628824053711, "eval_validation_all_loss": 0.1429910659790039, "eval_validation_all_runtime": 181.3487, "eval_validation_all_samples_per_second": 55.126, "eval_validation_all_slot_micro_f1": 0.7888446215139443, "eval_validation_all_slot_micro_f1_stderr": 0.0015851230161344064, "eval_validation_all_steps_per_second": 1.726, "step": 28068 }, { "epoch": 12.01, "learning_rate": 3.6455577665925324e-05, "loss": 0.0442, "step": 28100 }, { "epoch": 12.06, "learning_rate": 3.629950446759531e-05, "loss": 0.0423, "step": 28200 }, { "epoch": 12.1, "learning_rate": 3.614343126926529e-05, "loss": 0.0446, "step": 28300 }, { "epoch": 12.14, "learning_rate": 3.598735807093527e-05, "loss": 0.0419, "step": 28400 }, { "epoch": 12.18, "learning_rate": 3.583128487260525e-05, "loss": 0.0453, "step": 28500 }, { "epoch": 12.23, "learning_rate": 3.567521167427524e-05, "loss": 0.0434, "step": 28600 }, { "epoch": 12.27, "learning_rate": 3.5519138475945225e-05, "loss": 0.0433, "step": 28700 }, { "epoch": 12.31, "learning_rate": 3.53630652776152e-05, "loss": 0.0457, "step": 28800 }, { "epoch": 12.36, "learning_rate": 3.5206992079285186e-05, "loss": 0.0458, "step": 28900 }, { "epoch": 12.4, "learning_rate": 3.505091888095517e-05, "loss": 0.0436, "step": 29000 }, { "epoch": 12.44, "learning_rate": 3.489484568262515e-05, "loss": 0.0465, "step": 29100 }, { "epoch": 12.48, "learning_rate": 3.473877248429514e-05, "loss": 0.0454, "step": 29200 }, { "epoch": 12.53, "learning_rate": 3.458269928596512e-05, "loss": 0.0457, "step": 29300 }, { "epoch": 12.57, "learning_rate": 3.4426626087635104e-05, "loss": 0.0474, "step": 29400 }, { "epoch": 12.61, "learning_rate": 3.427055288930509e-05, "loss": 0.0458, "step": 29500 }, { "epoch": 12.65, "learning_rate": 3.411447969097507e-05, "loss": 0.0452, "step": 29600 }, { "epoch": 12.7, "learning_rate": 3.3958406492645055e-05, "loss": 0.0455, "step": 29700 }, { "epoch": 12.74, "learning_rate": 3.380233329431504e-05, "loss": 0.0436, "step": 29800 }, { "epoch": 12.78, "learning_rate": 3.364626009598502e-05, "loss": 0.0472, "step": 29900 }, { "epoch": 12.83, "learning_rate": 3.3490186897655e-05, "loss": 0.0454, "step": 30000 }, { "epoch": 12.87, "learning_rate": 3.333411369932498e-05, "loss": 0.0431, "step": 30100 }, { "epoch": 12.91, "learning_rate": 3.317804050099497e-05, "loss": 0.0455, "step": 30200 }, { "epoch": 12.95, "learning_rate": 3.3021967302664957e-05, "loss": 0.0433, "step": 30300 }, { "epoch": 13.0, "learning_rate": 3.2865894104334933e-05, "loss": 0.0438, "step": 30400 }, { "epoch": 13.0, "eval_validation_all_ex_match_acc": 0.6277883365009503, "eval_validation_all_ex_match_acc_stderr": 0.0048346692041261715, "eval_validation_all_intent_acc": 0.7767330199059718, "eval_validation_all_intent_acc_stderr": 0.004164983516855836, "eval_validation_all_loss": 0.1506633162498474, "eval_validation_all_runtime": 180.9233, "eval_validation_all_samples_per_second": 55.255, "eval_validation_all_slot_micro_f1": 0.7894693944086723, "eval_validation_all_slot_micro_f1_stderr": 0.001583402887351871, "eval_validation_all_steps_per_second": 1.73, "step": 30407 } ], "logging_steps": 100, "max_steps": 51458, "num_input_tokens_seen": 0, "num_train_epochs": 22, "save_steps": 500.0, "total_flos": 7.470556690869043e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }