diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.015, - "global_step": 15000, + "epoch": 0.055, + "global_step": 55000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -90246,11 +90246,240651 @@ "eval_samples_per_second": 16.463, "eval_steps_per_second": 4.125, "step": 15000 + }, + { + "epoch": 0.02, + "learning_rate": 4.997260646226938e-05, + "loss": 0.9748, + "step": 15001 + }, + { + "epoch": 0.02, + "learning_rate": 4.997260278607956e-05, + "loss": 1.0493, + "step": 15002 + }, + { + "epoch": 0.02, + "learning_rate": 4.997259910964321e-05, + "loss": 1.1049, + "step": 15003 + }, + { + "epoch": 0.02, + "learning_rate": 4.997259543296036e-05, + "loss": 1.2809, + "step": 15004 + }, + { + "epoch": 0.02, + "learning_rate": 4.997259175603098e-05, + "loss": 1.0699, + "step": 15005 + }, + { + "epoch": 0.02, + "learning_rate": 4.997258807885508e-05, + "loss": 1.1148, + "step": 15006 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972584401432664e-05, + "loss": 1.0759, + "step": 15007 + }, + { + "epoch": 0.02, + "learning_rate": 4.997258072376373e-05, + "loss": 1.3592, + "step": 15008 + }, + { + "epoch": 0.02, + "learning_rate": 4.997257704584827e-05, + "loss": 1.1556, + "step": 15009 + }, + { + "epoch": 0.02, + "learning_rate": 4.99725733676863e-05, + "loss": 1.2197, + "step": 15010 + }, + { + "epoch": 0.02, + "learning_rate": 4.99725696892778e-05, + "loss": 1.3116, + "step": 15011 + }, + { + "epoch": 0.02, + "learning_rate": 4.997256601062279e-05, + "loss": 1.1053, + "step": 15012 + }, + { + "epoch": 0.02, + "learning_rate": 4.997256233172126e-05, + "loss": 1.2717, + "step": 15013 + }, + { + "epoch": 0.02, + "learning_rate": 4.997255865257322e-05, + "loss": 1.3174, + "step": 15014 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972554973178654e-05, + "loss": 1.0946, + "step": 15015 + }, + { + "epoch": 0.02, + "learning_rate": 4.997255129353756e-05, + "loss": 0.9896, + "step": 15016 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972547613649964e-05, + "loss": 0.9097, + "step": 15017 + }, + { + "epoch": 0.02, + "learning_rate": 4.997254393351584e-05, + "loss": 0.9884, + "step": 15018 + }, + { + "epoch": 0.02, + "learning_rate": 4.99725402531352e-05, + "loss": 1.2711, + "step": 15019 + }, + { + "epoch": 0.02, + "learning_rate": 4.997253657250804e-05, + "loss": 0.9559, + "step": 15020 + }, + { + "epoch": 0.02, + "learning_rate": 4.997253289163436e-05, + "loss": 1.2035, + "step": 15021 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972529210514165e-05, + "loss": 1.775, + "step": 15022 + }, + { + "epoch": 0.02, + "learning_rate": 4.997252552914745e-05, + "loss": 1.278, + "step": 15023 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972521847534215e-05, + "loss": 1.323, + "step": 15024 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972518165674466e-05, + "loss": 1.3745, + "step": 15025 + }, + { + "epoch": 0.02, + "learning_rate": 4.99725144835682e-05, + "loss": 0.4495, + "step": 15026 + }, + { + "epoch": 0.02, + "learning_rate": 4.997251080121541e-05, + "loss": 1.0237, + "step": 15027 + }, + { + "epoch": 0.02, + "learning_rate": 4.997250711861611e-05, + "loss": 1.0747, + "step": 15028 + }, + { + "epoch": 0.02, + "learning_rate": 4.997250343577029e-05, + "loss": 1.0376, + "step": 15029 + }, + { + "epoch": 0.02, + "learning_rate": 4.997249975267794e-05, + "loss": 1.0421, + "step": 15030 + }, + { + "epoch": 0.02, + "learning_rate": 4.997249606933908e-05, + "loss": 0.9438, + "step": 15031 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972492385753706e-05, + "loss": 1.1047, + "step": 15032 + }, + { + "epoch": 0.02, + "learning_rate": 4.997248870192181e-05, + "loss": 1.4917, + "step": 15033 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972485017843395e-05, + "loss": 0.9421, + "step": 15034 + }, + { + "epoch": 0.02, + "learning_rate": 4.997248133351847e-05, + "loss": 1.058, + "step": 15035 + }, + { + "epoch": 0.02, + "learning_rate": 4.997247764894702e-05, + "loss": 1.0983, + "step": 15036 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972473964129054e-05, + "loss": 1.3499, + "step": 15037 + }, + { + "epoch": 0.02, + "learning_rate": 4.997247027906457e-05, + "loss": 0.8397, + "step": 15038 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972466593753565e-05, + "loss": 1.0555, + "step": 15039 + }, + { + "epoch": 0.02, + "learning_rate": 4.997246290819605e-05, + "loss": 1.1275, + "step": 15040 + }, + { + "epoch": 0.02, + "learning_rate": 4.997245922239201e-05, + "loss": 0.7054, + "step": 15041 + }, + { + "epoch": 0.02, + "learning_rate": 4.997245553634146e-05, + "loss": 0.7308, + "step": 15042 + }, + { + "epoch": 0.02, + "learning_rate": 4.997245185004438e-05, + "loss": 0.7697, + "step": 15043 + }, + { + "epoch": 0.02, + "learning_rate": 4.99724481635008e-05, + "loss": 0.7623, + "step": 15044 + }, + { + "epoch": 0.02, + "learning_rate": 4.997244447671069e-05, + "loss": 0.8263, + "step": 15045 + }, + { + "epoch": 0.02, + "learning_rate": 4.997244078967406e-05, + "loss": 1.3559, + "step": 15046 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972437102390926e-05, + "loss": 1.4373, + "step": 15047 + }, + { + "epoch": 0.02, + "learning_rate": 4.997243341486127e-05, + "loss": 1.274, + "step": 15048 + }, + { + "epoch": 0.02, + "learning_rate": 4.997242972708508e-05, + "loss": 1.3229, + "step": 15049 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972426039062395e-05, + "loss": 1.2814, + "step": 15050 + }, + { + "epoch": 0.02, + "learning_rate": 4.997242235079318e-05, + "loss": 1.1339, + "step": 15051 + }, + { + "epoch": 0.02, + "learning_rate": 4.997241866227745e-05, + "loss": 0.8458, + "step": 15052 + }, + { + "epoch": 0.02, + "learning_rate": 4.99724149735152e-05, + "loss": 1.1498, + "step": 15053 + }, + { + "epoch": 0.02, + "learning_rate": 4.997241128450644e-05, + "loss": 1.2158, + "step": 15054 + }, + { + "epoch": 0.02, + "learning_rate": 4.997240759525116e-05, + "loss": 0.8556, + "step": 15055 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972403905749364e-05, + "loss": 0.9101, + "step": 15056 + }, + { + "epoch": 0.02, + "learning_rate": 4.997240021600105e-05, + "loss": 1.0137, + "step": 15057 + }, + { + "epoch": 0.02, + "learning_rate": 4.997239652600622e-05, + "loss": 1.0061, + "step": 15058 + }, + { + "epoch": 0.02, + "learning_rate": 4.997239283576487e-05, + "loss": 1.145, + "step": 15059 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972389145277005e-05, + "loss": 1.3146, + "step": 15060 + }, + { + "epoch": 0.02, + "learning_rate": 4.997238545454263e-05, + "loss": 1.2995, + "step": 15061 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972381763561724e-05, + "loss": 1.0795, + "step": 15062 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972378072334306e-05, + "loss": 1.1407, + "step": 15063 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972374380860374e-05, + "loss": 1.2046, + "step": 15064 + }, + { + "epoch": 0.02, + "learning_rate": 4.997237068913992e-05, + "loss": 0.9195, + "step": 15065 + }, + { + "epoch": 0.02, + "learning_rate": 4.997236699717296e-05, + "loss": 0.7231, + "step": 15066 + }, + { + "epoch": 0.02, + "learning_rate": 4.997236330495948e-05, + "loss": 0.8774, + "step": 15067 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972359612499476e-05, + "loss": 1.0792, + "step": 15068 + }, + { + "epoch": 0.02, + "learning_rate": 4.997235591979296e-05, + "loss": 0.9843, + "step": 15069 + }, + { + "epoch": 0.02, + "learning_rate": 4.997235222683993e-05, + "loss": 1.1931, + "step": 15070 + }, + { + "epoch": 0.02, + "learning_rate": 4.997234853364038e-05, + "loss": 1.1407, + "step": 15071 + }, + { + "epoch": 0.02, + "learning_rate": 4.997234484019431e-05, + "loss": 1.1714, + "step": 15072 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972341146501736e-05, + "loss": 1.1567, + "step": 15073 + }, + { + "epoch": 0.02, + "learning_rate": 4.997233745256263e-05, + "loss": 1.1103, + "step": 15074 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972333758377024e-05, + "loss": 1.4842, + "step": 15075 + }, + { + "epoch": 0.02, + "learning_rate": 4.997233006394488e-05, + "loss": 0.9396, + "step": 15076 + }, + { + "epoch": 0.02, + "learning_rate": 4.997232636926624e-05, + "loss": 1.2873, + "step": 15077 + }, + { + "epoch": 0.02, + "learning_rate": 4.997232267434108e-05, + "loss": 1.1418, + "step": 15078 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972318979169394e-05, + "loss": 0.9179, + "step": 15079 + }, + { + "epoch": 0.02, + "learning_rate": 4.99723152837512e-05, + "loss": 1.2488, + "step": 15080 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972311588086484e-05, + "loss": 1.002, + "step": 15081 + }, + { + "epoch": 0.02, + "learning_rate": 4.997230789217526e-05, + "loss": 1.2295, + "step": 15082 + }, + { + "epoch": 0.02, + "learning_rate": 4.997230419601752e-05, + "loss": 0.7993, + "step": 15083 + }, + { + "epoch": 0.02, + "learning_rate": 4.997230049961326e-05, + "loss": 0.6249, + "step": 15084 + }, + { + "epoch": 0.02, + "learning_rate": 4.997229680296248e-05, + "loss": 0.5616, + "step": 15085 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972293106065185e-05, + "loss": 1.0836, + "step": 15086 + }, + { + "epoch": 0.02, + "learning_rate": 4.997228940892138e-05, + "loss": 1.234, + "step": 15087 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972285711531055e-05, + "loss": 1.1247, + "step": 15088 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972282013894214e-05, + "loss": 1.014, + "step": 15089 + }, + { + "epoch": 0.02, + "learning_rate": 4.997227831601086e-05, + "loss": 1.0118, + "step": 15090 + }, + { + "epoch": 0.02, + "learning_rate": 4.997227461788098e-05, + "loss": 1.0921, + "step": 15091 + }, + { + "epoch": 0.02, + "learning_rate": 4.997227091950459e-05, + "loss": 1.1307, + "step": 15092 + }, + { + "epoch": 0.02, + "learning_rate": 4.997226722088169e-05, + "loss": 1.0173, + "step": 15093 + }, + { + "epoch": 0.02, + "learning_rate": 4.997226352201227e-05, + "loss": 0.8703, + "step": 15094 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972259822896334e-05, + "loss": 1.2485, + "step": 15095 + }, + { + "epoch": 0.02, + "learning_rate": 4.997225612353388e-05, + "loss": 0.9719, + "step": 15096 + }, + { + "epoch": 0.02, + "learning_rate": 4.997225242392492e-05, + "loss": 1.0862, + "step": 15097 + }, + { + "epoch": 0.02, + "learning_rate": 4.997224872406944e-05, + "loss": 1.1109, + "step": 15098 + }, + { + "epoch": 0.02, + "learning_rate": 4.997224502396743e-05, + "loss": 1.2727, + "step": 15099 + }, + { + "epoch": 0.02, + "learning_rate": 4.997224132361893e-05, + "loss": 0.9653, + "step": 15100 + }, + { + "epoch": 0.02, + "learning_rate": 4.99722376230239e-05, + "loss": 0.8132, + "step": 15101 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972233922182353e-05, + "loss": 1.1867, + "step": 15102 + }, + { + "epoch": 0.02, + "learning_rate": 4.99722302210943e-05, + "loss": 0.5581, + "step": 15103 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972226519759725e-05, + "loss": 1.5255, + "step": 15104 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972222818178623e-05, + "loss": 1.0357, + "step": 15105 + }, + { + "epoch": 0.02, + "learning_rate": 4.997221911635103e-05, + "loss": 1.3428, + "step": 15106 + }, + { + "epoch": 0.02, + "learning_rate": 4.99722154142769e-05, + "loss": 1.1329, + "step": 15107 + }, + { + "epoch": 0.02, + "learning_rate": 4.997221171195628e-05, + "loss": 0.9716, + "step": 15108 + }, + { + "epoch": 0.02, + "learning_rate": 4.997220800938912e-05, + "loss": 0.9562, + "step": 15109 + }, + { + "epoch": 0.02, + "learning_rate": 4.997220430657546e-05, + "loss": 1.0278, + "step": 15110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972200603515273e-05, + "loss": 1.1521, + "step": 15111 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972196900208576e-05, + "loss": 0.7229, + "step": 15112 + }, + { + "epoch": 0.02, + "learning_rate": 4.997219319665537e-05, + "loss": 1.2424, + "step": 15113 + }, + { + "epoch": 0.02, + "learning_rate": 4.997218949285564e-05, + "loss": 0.7695, + "step": 15114 + }, + { + "epoch": 0.02, + "learning_rate": 4.99721857888094e-05, + "loss": 0.7015, + "step": 15115 + }, + { + "epoch": 0.02, + "learning_rate": 4.997218208451664e-05, + "loss": 0.7679, + "step": 15116 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972178379977376e-05, + "loss": 1.2424, + "step": 15117 + }, + { + "epoch": 0.02, + "learning_rate": 4.997217467519158e-05, + "loss": 1.1658, + "step": 15118 + }, + { + "epoch": 0.02, + "learning_rate": 4.99721709701593e-05, + "loss": 1.114, + "step": 15119 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972167264880476e-05, + "loss": 1.1159, + "step": 15120 + }, + { + "epoch": 0.02, + "learning_rate": 4.997216355935514e-05, + "loss": 1.2718, + "step": 15121 + }, + { + "epoch": 0.02, + "learning_rate": 4.99721598535833e-05, + "loss": 1.0424, + "step": 15122 + }, + { + "epoch": 0.02, + "learning_rate": 4.997215614756494e-05, + "loss": 1.3091, + "step": 15123 + }, + { + "epoch": 0.02, + "learning_rate": 4.997215244130007e-05, + "loss": 0.922, + "step": 15124 + }, + { + "epoch": 0.02, + "learning_rate": 4.997214873478868e-05, + "loss": 1.0213, + "step": 15125 + }, + { + "epoch": 0.02, + "learning_rate": 4.997214502803078e-05, + "loss": 1.1075, + "step": 15126 + }, + { + "epoch": 0.02, + "learning_rate": 4.997214132102636e-05, + "loss": 0.9798, + "step": 15127 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972137613775425e-05, + "loss": 1.6559, + "step": 15128 + }, + { + "epoch": 0.02, + "learning_rate": 4.997213390627798e-05, + "loss": 1.3745, + "step": 15129 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972130198534024e-05, + "loss": 1.0863, + "step": 15130 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972126490543546e-05, + "loss": 1.1254, + "step": 15131 + }, + { + "epoch": 0.02, + "learning_rate": 4.997212278230656e-05, + "loss": 1.329, + "step": 15132 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972119073823056e-05, + "loss": 1.3043, + "step": 15133 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972115365093043e-05, + "loss": 1.277, + "step": 15134 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972111656116504e-05, + "loss": 1.3675, + "step": 15135 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972107946893464e-05, + "loss": 1.0155, + "step": 15136 + }, + { + "epoch": 0.02, + "learning_rate": 4.99721042374239e-05, + "loss": 1.3606, + "step": 15137 + }, + { + "epoch": 0.02, + "learning_rate": 4.997210052770783e-05, + "loss": 1.0277, + "step": 15138 + }, + { + "epoch": 0.02, + "learning_rate": 4.997209681774524e-05, + "loss": 1.312, + "step": 15139 + }, + { + "epoch": 0.02, + "learning_rate": 4.997209310753614e-05, + "loss": 0.7359, + "step": 15140 + }, + { + "epoch": 0.02, + "learning_rate": 4.997208939708053e-05, + "loss": 0.6109, + "step": 15141 + }, + { + "epoch": 0.02, + "learning_rate": 4.997208568637839e-05, + "loss": 1.6273, + "step": 15142 + }, + { + "epoch": 0.02, + "learning_rate": 4.997208197542975e-05, + "loss": 1.1852, + "step": 15143 + }, + { + "epoch": 0.02, + "learning_rate": 4.997207826423459e-05, + "loss": 1.0924, + "step": 15144 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972074552792917e-05, + "loss": 1.1806, + "step": 15145 + }, + { + "epoch": 0.02, + "learning_rate": 4.997207084110473e-05, + "loss": 1.0955, + "step": 15146 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972067129170036e-05, + "loss": 0.8981, + "step": 15147 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972063416988825e-05, + "loss": 0.8413, + "step": 15148 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972059704561094e-05, + "loss": 0.9495, + "step": 15149 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972055991886856e-05, + "loss": 1.217, + "step": 15150 + }, + { + "epoch": 0.02, + "learning_rate": 4.99720522789661e-05, + "loss": 1.4445, + "step": 15151 + }, + { + "epoch": 0.02, + "learning_rate": 4.997204856579883e-05, + "loss": 1.2377, + "step": 15152 + }, + { + "epoch": 0.02, + "learning_rate": 4.997204485238505e-05, + "loss": 1.1629, + "step": 15153 + }, + { + "epoch": 0.02, + "learning_rate": 4.997204113872476e-05, + "loss": 1.1063, + "step": 15154 + }, + { + "epoch": 0.02, + "learning_rate": 4.997203742481795e-05, + "loss": 1.2614, + "step": 15155 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972033710664626e-05, + "loss": 1.0013, + "step": 15156 + }, + { + "epoch": 0.02, + "learning_rate": 4.997202999626479e-05, + "loss": 1.3374, + "step": 15157 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972026281618444e-05, + "loss": 1.1069, + "step": 15158 + }, + { + "epoch": 0.02, + "learning_rate": 4.997202256672558e-05, + "loss": 1.1944, + "step": 15159 + }, + { + "epoch": 0.02, + "learning_rate": 4.997201885158621e-05, + "loss": 0.973, + "step": 15160 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972015136200326e-05, + "loss": 0.4015, + "step": 15161 + }, + { + "epoch": 0.02, + "learning_rate": 4.9972011420567924e-05, + "loss": 1.2352, + "step": 15162 + }, + { + "epoch": 0.02, + "learning_rate": 4.997200770468901e-05, + "loss": 1.2335, + "step": 15163 + }, + { + "epoch": 0.02, + "learning_rate": 4.997200398856358e-05, + "loss": 1.2301, + "step": 15164 + }, + { + "epoch": 0.02, + "learning_rate": 4.997200027219164e-05, + "loss": 1.1323, + "step": 15165 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971996555573184e-05, + "loss": 0.8692, + "step": 15166 + }, + { + "epoch": 0.02, + "learning_rate": 4.997199283870821e-05, + "loss": 1.0918, + "step": 15167 + }, + { + "epoch": 0.02, + "learning_rate": 4.997198912159674e-05, + "loss": 1.2836, + "step": 15168 + }, + { + "epoch": 0.02, + "learning_rate": 4.997198540423875e-05, + "loss": 1.1655, + "step": 15169 + }, + { + "epoch": 0.02, + "learning_rate": 4.997198168663424e-05, + "loss": 1.237, + "step": 15170 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971977968783226e-05, + "loss": 1.0943, + "step": 15171 + }, + { + "epoch": 0.02, + "learning_rate": 4.997197425068569e-05, + "loss": 1.1444, + "step": 15172 + }, + { + "epoch": 0.02, + "learning_rate": 4.997197053234165e-05, + "loss": 1.0765, + "step": 15173 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971966813751094e-05, + "loss": 0.8227, + "step": 15174 + }, + { + "epoch": 0.02, + "learning_rate": 4.997196309491402e-05, + "loss": 1.2364, + "step": 15175 + }, + { + "epoch": 0.02, + "learning_rate": 4.997195937583043e-05, + "loss": 1.2285, + "step": 15176 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971955656500345e-05, + "loss": 1.0951, + "step": 15177 + }, + { + "epoch": 0.02, + "learning_rate": 4.997195193692373e-05, + "loss": 1.1524, + "step": 15178 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971948217100616e-05, + "loss": 1.1948, + "step": 15179 + }, + { + "epoch": 0.02, + "learning_rate": 4.997194449703099e-05, + "loss": 1.1285, + "step": 15180 + }, + { + "epoch": 0.02, + "learning_rate": 4.997194077671484e-05, + "loss": 1.0622, + "step": 15181 + }, + { + "epoch": 0.02, + "learning_rate": 4.997193705615218e-05, + "loss": 1.409, + "step": 15182 + }, + { + "epoch": 0.02, + "learning_rate": 4.997193333534301e-05, + "loss": 1.5899, + "step": 15183 + }, + { + "epoch": 0.02, + "learning_rate": 4.997192961428733e-05, + "loss": 1.1493, + "step": 15184 + }, + { + "epoch": 0.02, + "learning_rate": 4.997192589298513e-05, + "loss": 1.1914, + "step": 15185 + }, + { + "epoch": 0.02, + "learning_rate": 4.997192217143643e-05, + "loss": 1.2167, + "step": 15186 + }, + { + "epoch": 0.02, + "learning_rate": 4.997191844964121e-05, + "loss": 1.1121, + "step": 15187 + }, + { + "epoch": 0.02, + "learning_rate": 4.997191472759948e-05, + "loss": 1.2398, + "step": 15188 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971911005311234e-05, + "loss": 1.0817, + "step": 15189 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971907282776475e-05, + "loss": 1.3304, + "step": 15190 + }, + { + "epoch": 0.02, + "learning_rate": 4.99719035599952e-05, + "loss": 0.9831, + "step": 15191 + }, + { + "epoch": 0.02, + "learning_rate": 4.997189983696743e-05, + "loss": 0.8255, + "step": 15192 + }, + { + "epoch": 0.02, + "learning_rate": 4.997189611369314e-05, + "loss": 1.1217, + "step": 15193 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971892390172324e-05, + "loss": 1.2713, + "step": 15194 + }, + { + "epoch": 0.02, + "learning_rate": 4.997188866640501e-05, + "loss": 1.2356, + "step": 15195 + }, + { + "epoch": 0.02, + "learning_rate": 4.997188494239118e-05, + "loss": 1.2145, + "step": 15196 + }, + { + "epoch": 0.02, + "learning_rate": 4.997188121813084e-05, + "loss": 1.2601, + "step": 15197 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971877493623994e-05, + "loss": 0.9486, + "step": 15198 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971873768870625e-05, + "loss": 1.0815, + "step": 15199 + }, + { + "epoch": 0.02, + "learning_rate": 4.997187004387075e-05, + "loss": 1.1651, + "step": 15200 + }, + { + "epoch": 0.02, + "learning_rate": 4.997186631862436e-05, + "loss": 0.9465, + "step": 15201 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971862593131465e-05, + "loss": 0.9616, + "step": 15202 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971858867392055e-05, + "loss": 0.9271, + "step": 15203 + }, + { + "epoch": 0.02, + "learning_rate": 4.997185514140613e-05, + "loss": 1.215, + "step": 15204 + }, + { + "epoch": 0.02, + "learning_rate": 4.99718514151737e-05, + "loss": 1.2199, + "step": 15205 + }, + { + "epoch": 0.02, + "learning_rate": 4.997184768869475e-05, + "loss": 1.0676, + "step": 15206 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971843961969286e-05, + "loss": 0.9031, + "step": 15207 + }, + { + "epoch": 0.02, + "learning_rate": 4.997184023499732e-05, + "loss": 1.0616, + "step": 15208 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971836507778844e-05, + "loss": 1.3447, + "step": 15209 + }, + { + "epoch": 0.02, + "learning_rate": 4.997183278031385e-05, + "loss": 1.0256, + "step": 15210 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971829052602346e-05, + "loss": 0.9974, + "step": 15211 + }, + { + "epoch": 0.02, + "learning_rate": 4.997182532464433e-05, + "loss": 1.3337, + "step": 15212 + }, + { + "epoch": 0.02, + "learning_rate": 4.997182159643981e-05, + "loss": 1.1154, + "step": 15213 + }, + { + "epoch": 0.02, + "learning_rate": 4.997181786798877e-05, + "loss": 1.0998, + "step": 15214 + }, + { + "epoch": 0.02, + "learning_rate": 4.997181413929122e-05, + "loss": 1.0698, + "step": 15215 + }, + { + "epoch": 0.02, + "learning_rate": 4.997181041034716e-05, + "loss": 1.1894, + "step": 15216 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971806681156586e-05, + "loss": 1.0463, + "step": 15217 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971802951719505e-05, + "loss": 0.9366, + "step": 15218 + }, + { + "epoch": 0.02, + "learning_rate": 4.997179922203591e-05, + "loss": 1.1174, + "step": 15219 + }, + { + "epoch": 0.02, + "learning_rate": 4.997179549210581e-05, + "loss": 1.0023, + "step": 15220 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971791761929186e-05, + "loss": 0.7451, + "step": 15221 + }, + { + "epoch": 0.02, + "learning_rate": 4.997178803150606e-05, + "loss": 0.7231, + "step": 15222 + }, + { + "epoch": 0.02, + "learning_rate": 4.997178430083643e-05, + "loss": 1.0292, + "step": 15223 + }, + { + "epoch": 0.02, + "learning_rate": 4.997178056992028e-05, + "loss": 1.0638, + "step": 15224 + }, + { + "epoch": 0.02, + "learning_rate": 4.997177683875762e-05, + "loss": 0.8664, + "step": 15225 + }, + { + "epoch": 0.02, + "learning_rate": 4.997177310734845e-05, + "loss": 0.93, + "step": 15226 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971769375692766e-05, + "loss": 0.9748, + "step": 15227 + }, + { + "epoch": 0.02, + "learning_rate": 4.997176564379058e-05, + "loss": 1.1762, + "step": 15228 + }, + { + "epoch": 0.02, + "learning_rate": 4.997176191164187e-05, + "loss": 1.0592, + "step": 15229 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971758179246666e-05, + "loss": 1.0584, + "step": 15230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971754446604934e-05, + "loss": 1.2004, + "step": 15231 + }, + { + "epoch": 0.02, + "learning_rate": 4.99717507137167e-05, + "loss": 0.9591, + "step": 15232 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971746980581956e-05, + "loss": 1.1207, + "step": 15233 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971743247200696e-05, + "loss": 1.0051, + "step": 15234 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971739513572937e-05, + "loss": 1.245, + "step": 15235 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971735779698656e-05, + "loss": 1.0049, + "step": 15236 + }, + { + "epoch": 0.02, + "learning_rate": 4.997173204557787e-05, + "loss": 1.1428, + "step": 15237 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971728311210575e-05, + "loss": 0.9968, + "step": 15238 + }, + { + "epoch": 0.02, + "learning_rate": 4.997172457659677e-05, + "loss": 0.438, + "step": 15239 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971720841736446e-05, + "loss": 1.0456, + "step": 15240 + }, + { + "epoch": 0.02, + "learning_rate": 4.997171710662962e-05, + "loss": 1.1608, + "step": 15241 + }, + { + "epoch": 0.02, + "learning_rate": 4.997171337127628e-05, + "loss": 1.0993, + "step": 15242 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971709635676434e-05, + "loss": 1.2221, + "step": 15243 + }, + { + "epoch": 0.02, + "learning_rate": 4.997170589983007e-05, + "loss": 1.1112, + "step": 15244 + }, + { + "epoch": 0.02, + "learning_rate": 4.99717021637372e-05, + "loss": 1.2677, + "step": 15245 + }, + { + "epoch": 0.02, + "learning_rate": 4.997169842739782e-05, + "loss": 0.9942, + "step": 15246 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971694690811935e-05, + "loss": 1.1507, + "step": 15247 + }, + { + "epoch": 0.02, + "learning_rate": 4.997169095397953e-05, + "loss": 1.0, + "step": 15248 + }, + { + "epoch": 0.02, + "learning_rate": 4.997168721690062e-05, + "loss": 1.147, + "step": 15249 + }, + { + "epoch": 0.02, + "learning_rate": 4.99716834795752e-05, + "loss": 1.1028, + "step": 15250 + }, + { + "epoch": 0.02, + "learning_rate": 4.997167974200327e-05, + "loss": 0.8925, + "step": 15251 + }, + { + "epoch": 0.02, + "learning_rate": 4.997167600418483e-05, + "loss": 1.176, + "step": 15252 + }, + { + "epoch": 0.02, + "learning_rate": 4.997167226611988e-05, + "loss": 0.7811, + "step": 15253 + }, + { + "epoch": 0.02, + "learning_rate": 4.997166852780842e-05, + "loss": 1.1627, + "step": 15254 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971664789250456e-05, + "loss": 1.0467, + "step": 15255 + }, + { + "epoch": 0.02, + "learning_rate": 4.997166105044597e-05, + "loss": 0.9798, + "step": 15256 + }, + { + "epoch": 0.02, + "learning_rate": 4.997165731139499e-05, + "loss": 1.1626, + "step": 15257 + }, + { + "epoch": 0.02, + "learning_rate": 4.997165357209749e-05, + "loss": 0.8546, + "step": 15258 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971649832553477e-05, + "loss": 1.1482, + "step": 15259 + }, + { + "epoch": 0.02, + "learning_rate": 4.997164609276296e-05, + "loss": 1.0805, + "step": 15260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971642352725935e-05, + "loss": 1.2735, + "step": 15261 + }, + { + "epoch": 0.02, + "learning_rate": 4.99716386124424e-05, + "loss": 1.3956, + "step": 15262 + }, + { + "epoch": 0.02, + "learning_rate": 4.997163487191235e-05, + "loss": 1.3231, + "step": 15263 + }, + { + "epoch": 0.02, + "learning_rate": 4.99716311311358e-05, + "loss": 1.3394, + "step": 15264 + }, + { + "epoch": 0.02, + "learning_rate": 4.997162739011273e-05, + "loss": 1.1015, + "step": 15265 + }, + { + "epoch": 0.02, + "learning_rate": 4.997162364884316e-05, + "loss": 0.8929, + "step": 15266 + }, + { + "epoch": 0.02, + "learning_rate": 4.997161990732707e-05, + "loss": 1.2235, + "step": 15267 + }, + { + "epoch": 0.02, + "learning_rate": 4.997161616556448e-05, + "loss": 1.1731, + "step": 15268 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971612423555384e-05, + "loss": 1.2492, + "step": 15269 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971608681299767e-05, + "loss": 1.1721, + "step": 15270 + }, + { + "epoch": 0.02, + "learning_rate": 4.997160493879764e-05, + "loss": 0.9599, + "step": 15271 + }, + { + "epoch": 0.02, + "learning_rate": 4.997160119604902e-05, + "loss": 1.2362, + "step": 15272 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971597453053874e-05, + "loss": 1.1751, + "step": 15273 + }, + { + "epoch": 0.02, + "learning_rate": 4.997159370981223e-05, + "loss": 1.034, + "step": 15274 + }, + { + "epoch": 0.02, + "learning_rate": 4.997158996632407e-05, + "loss": 1.1111, + "step": 15275 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971586222589406e-05, + "loss": 1.1358, + "step": 15276 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971582478608234e-05, + "loss": 1.0874, + "step": 15277 + }, + { + "epoch": 0.02, + "learning_rate": 4.997157873438055e-05, + "loss": 0.946, + "step": 15278 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971574989906355e-05, + "loss": 1.2169, + "step": 15279 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971571245185655e-05, + "loss": 1.0533, + "step": 15280 + }, + { + "epoch": 0.02, + "learning_rate": 4.997156750021844e-05, + "loss": 1.1441, + "step": 15281 + }, + { + "epoch": 0.02, + "learning_rate": 4.997156375500472e-05, + "loss": 1.3319, + "step": 15282 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971560009544494e-05, + "loss": 1.1559, + "step": 15283 + }, + { + "epoch": 0.02, + "learning_rate": 4.997155626383776e-05, + "loss": 1.1068, + "step": 15284 + }, + { + "epoch": 0.02, + "learning_rate": 4.997155251788451e-05, + "loss": 0.8974, + "step": 15285 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971548771684764e-05, + "loss": 0.7726, + "step": 15286 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971545025238496e-05, + "loss": 0.862, + "step": 15287 + }, + { + "epoch": 0.02, + "learning_rate": 4.997154127854573e-05, + "loss": 0.8907, + "step": 15288 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971537531606445e-05, + "loss": 1.1277, + "step": 15289 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971533784420656e-05, + "loss": 1.3205, + "step": 15290 + }, + { + "epoch": 0.02, + "learning_rate": 4.997153003698836e-05, + "loss": 0.9791, + "step": 15291 + }, + { + "epoch": 0.02, + "learning_rate": 4.997152628930956e-05, + "loss": 1.2524, + "step": 15292 + }, + { + "epoch": 0.02, + "learning_rate": 4.997152254138424e-05, + "loss": 1.2695, + "step": 15293 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971518793212416e-05, + "loss": 1.2791, + "step": 15294 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971515044794086e-05, + "loss": 1.0037, + "step": 15295 + }, + { + "epoch": 0.02, + "learning_rate": 4.997151129612925e-05, + "loss": 0.9392, + "step": 15296 + }, + { + "epoch": 0.02, + "learning_rate": 4.99715075472179e-05, + "loss": 1.1555, + "step": 15297 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971503798060046e-05, + "loss": 1.1924, + "step": 15298 + }, + { + "epoch": 0.02, + "learning_rate": 4.997150004865568e-05, + "loss": 1.4881, + "step": 15299 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971496299004816e-05, + "loss": 0.9699, + "step": 15300 + }, + { + "epoch": 0.02, + "learning_rate": 4.997149254910744e-05, + "loss": 0.8355, + "step": 15301 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971488798963544e-05, + "loss": 1.1416, + "step": 15302 + }, + { + "epoch": 0.02, + "learning_rate": 4.997148504857315e-05, + "loss": 1.1125, + "step": 15303 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971481297936245e-05, + "loss": 1.1312, + "step": 15304 + }, + { + "epoch": 0.02, + "learning_rate": 4.997147754705284e-05, + "loss": 1.1235, + "step": 15305 + }, + { + "epoch": 0.02, + "learning_rate": 4.997147379592292e-05, + "loss": 1.1419, + "step": 15306 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971470044546485e-05, + "loss": 1.0428, + "step": 15307 + }, + { + "epoch": 0.02, + "learning_rate": 4.997146629292355e-05, + "loss": 1.3987, + "step": 15308 + }, + { + "epoch": 0.02, + "learning_rate": 4.997146254105411e-05, + "loss": 1.5465, + "step": 15309 + }, + { + "epoch": 0.02, + "learning_rate": 4.997145878893816e-05, + "loss": 1.1879, + "step": 15310 + }, + { + "epoch": 0.02, + "learning_rate": 4.99714550365757e-05, + "loss": 1.0822, + "step": 15311 + }, + { + "epoch": 0.02, + "learning_rate": 4.997145128396674e-05, + "loss": 1.0474, + "step": 15312 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971447531111256e-05, + "loss": 1.0235, + "step": 15313 + }, + { + "epoch": 0.02, + "learning_rate": 4.997144377800928e-05, + "loss": 1.0694, + "step": 15314 + }, + { + "epoch": 0.02, + "learning_rate": 4.997144002466079e-05, + "loss": 0.8507, + "step": 15315 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971436271065796e-05, + "loss": 1.1211, + "step": 15316 + }, + { + "epoch": 0.02, + "learning_rate": 4.997143251722429e-05, + "loss": 1.103, + "step": 15317 + }, + { + "epoch": 0.02, + "learning_rate": 4.997142876313628e-05, + "loss": 0.9408, + "step": 15318 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971425008801766e-05, + "loss": 1.4217, + "step": 15319 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971421254220735e-05, + "loss": 1.0344, + "step": 15320 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971417499393204e-05, + "loss": 1.1387, + "step": 15321 + }, + { + "epoch": 0.02, + "learning_rate": 4.997141374431916e-05, + "loss": 0.8562, + "step": 15322 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971409988998615e-05, + "loss": 1.0609, + "step": 15323 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971406233431564e-05, + "loss": 0.9183, + "step": 15324 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971402477618e-05, + "loss": 1.1931, + "step": 15325 + }, + { + "epoch": 0.02, + "learning_rate": 4.997139872155793e-05, + "loss": 1.5752, + "step": 15326 + }, + { + "epoch": 0.02, + "learning_rate": 4.997139496525135e-05, + "loss": 1.0926, + "step": 15327 + }, + { + "epoch": 0.02, + "learning_rate": 4.997139120869826e-05, + "loss": 1.2595, + "step": 15328 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971387451898676e-05, + "loss": 1.2706, + "step": 15329 + }, + { + "epoch": 0.02, + "learning_rate": 4.997138369485258e-05, + "loss": 1.8415, + "step": 15330 + }, + { + "epoch": 0.02, + "learning_rate": 4.997137993755997e-05, + "loss": 1.6819, + "step": 15331 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971376180020856e-05, + "loss": 1.5807, + "step": 15332 + }, + { + "epoch": 0.02, + "learning_rate": 4.997137242223524e-05, + "loss": 1.2045, + "step": 15333 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971368664203115e-05, + "loss": 1.2469, + "step": 15334 + }, + { + "epoch": 0.02, + "learning_rate": 4.997136490592448e-05, + "loss": 1.1578, + "step": 15335 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971361147399346e-05, + "loss": 1.1761, + "step": 15336 + }, + { + "epoch": 0.02, + "learning_rate": 4.99713573886277e-05, + "loss": 1.3091, + "step": 15337 + }, + { + "epoch": 0.02, + "learning_rate": 4.997135362960954e-05, + "loss": 1.1823, + "step": 15338 + }, + { + "epoch": 0.02, + "learning_rate": 4.997134987034489e-05, + "loss": 1.0622, + "step": 15339 + }, + { + "epoch": 0.02, + "learning_rate": 4.997134611083372e-05, + "loss": 0.9757, + "step": 15340 + }, + { + "epoch": 0.02, + "learning_rate": 4.997134235107604e-05, + "loss": 1.1082, + "step": 15341 + }, + { + "epoch": 0.02, + "learning_rate": 4.997133859107187e-05, + "loss": 1.1068, + "step": 15342 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971334830821184e-05, + "loss": 0.771, + "step": 15343 + }, + { + "epoch": 0.02, + "learning_rate": 4.997133107032399e-05, + "loss": 1.1108, + "step": 15344 + }, + { + "epoch": 0.02, + "learning_rate": 4.997132730958029e-05, + "loss": 0.6597, + "step": 15345 + }, + { + "epoch": 0.02, + "learning_rate": 4.997132354859009e-05, + "loss": 0.8194, + "step": 15346 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971319787353377e-05, + "loss": 0.6937, + "step": 15347 + }, + { + "epoch": 0.02, + "learning_rate": 4.997131602587016e-05, + "loss": 1.0032, + "step": 15348 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971312264140435e-05, + "loss": 1.3638, + "step": 15349 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971308502164203e-05, + "loss": 1.2591, + "step": 15350 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971304739941465e-05, + "loss": 1.4679, + "step": 15351 + }, + { + "epoch": 0.02, + "learning_rate": 4.997130097747223e-05, + "loss": 1.1335, + "step": 15352 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971297214756475e-05, + "loss": 1.2889, + "step": 15353 + }, + { + "epoch": 0.02, + "learning_rate": 4.997129345179422e-05, + "loss": 1.1222, + "step": 15354 + }, + { + "epoch": 0.02, + "learning_rate": 4.997128968858546e-05, + "loss": 1.2838, + "step": 15355 + }, + { + "epoch": 0.02, + "learning_rate": 4.997128592513019e-05, + "loss": 0.9344, + "step": 15356 + }, + { + "epoch": 0.02, + "learning_rate": 4.997128216142842e-05, + "loss": 0.9273, + "step": 15357 + }, + { + "epoch": 0.02, + "learning_rate": 4.997127839748014e-05, + "loss": 1.144, + "step": 15358 + }, + { + "epoch": 0.02, + "learning_rate": 4.997127463328535e-05, + "loss": 1.1295, + "step": 15359 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971270868844064e-05, + "loss": 1.0321, + "step": 15360 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971267104156264e-05, + "loss": 1.071, + "step": 15361 + }, + { + "epoch": 0.02, + "learning_rate": 4.997126333922196e-05, + "loss": 1.0994, + "step": 15362 + }, + { + "epoch": 0.02, + "learning_rate": 4.997125957404115e-05, + "loss": 0.6077, + "step": 15363 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971255808613834e-05, + "loss": 0.5418, + "step": 15364 + }, + { + "epoch": 0.02, + "learning_rate": 4.997125204294002e-05, + "loss": 1.4358, + "step": 15365 + }, + { + "epoch": 0.02, + "learning_rate": 4.997124827701969e-05, + "loss": 1.2474, + "step": 15366 + }, + { + "epoch": 0.02, + "learning_rate": 4.997124451085286e-05, + "loss": 0.8211, + "step": 15367 + }, + { + "epoch": 0.02, + "learning_rate": 4.997124074443953e-05, + "loss": 0.9611, + "step": 15368 + }, + { + "epoch": 0.02, + "learning_rate": 4.997123697777968e-05, + "loss": 1.064, + "step": 15369 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971233210873335e-05, + "loss": 1.1149, + "step": 15370 + }, + { + "epoch": 0.02, + "learning_rate": 4.997122944372048e-05, + "loss": 1.1558, + "step": 15371 + }, + { + "epoch": 0.02, + "learning_rate": 4.997122567632112e-05, + "loss": 1.1323, + "step": 15372 + }, + { + "epoch": 0.02, + "learning_rate": 4.997122190867526e-05, + "loss": 1.1184, + "step": 15373 + }, + { + "epoch": 0.02, + "learning_rate": 4.997121814078288e-05, + "loss": 1.1691, + "step": 15374 + }, + { + "epoch": 0.02, + "learning_rate": 4.997121437264401e-05, + "loss": 1.3078, + "step": 15375 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971210604258635e-05, + "loss": 1.2348, + "step": 15376 + }, + { + "epoch": 0.02, + "learning_rate": 4.997120683562675e-05, + "loss": 1.1815, + "step": 15377 + }, + { + "epoch": 0.02, + "learning_rate": 4.997120306674835e-05, + "loss": 1.2258, + "step": 15378 + }, + { + "epoch": 0.02, + "learning_rate": 4.997119929762346e-05, + "loss": 0.9017, + "step": 15379 + }, + { + "epoch": 0.02, + "learning_rate": 4.997119552825206e-05, + "loss": 1.1378, + "step": 15380 + }, + { + "epoch": 0.02, + "learning_rate": 4.997119175863415e-05, + "loss": 1.1385, + "step": 15381 + }, + { + "epoch": 0.02, + "learning_rate": 4.997118798876974e-05, + "loss": 1.0615, + "step": 15382 + }, + { + "epoch": 0.02, + "learning_rate": 4.997118421865882e-05, + "loss": 0.6674, + "step": 15383 + }, + { + "epoch": 0.02, + "learning_rate": 4.99711804483014e-05, + "loss": 0.4038, + "step": 15384 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971176677697476e-05, + "loss": 0.4091, + "step": 15385 + }, + { + "epoch": 0.02, + "learning_rate": 4.997117290684704e-05, + "loss": 0.3502, + "step": 15386 + }, + { + "epoch": 0.02, + "learning_rate": 4.99711691357501e-05, + "loss": 0.317, + "step": 15387 + }, + { + "epoch": 0.02, + "learning_rate": 4.997116536440667e-05, + "loss": 0.3268, + "step": 15388 + }, + { + "epoch": 0.02, + "learning_rate": 4.997116159281672e-05, + "loss": 0.7288, + "step": 15389 + }, + { + "epoch": 0.02, + "learning_rate": 4.997115782098027e-05, + "loss": 0.5239, + "step": 15390 + }, + { + "epoch": 0.02, + "learning_rate": 4.997115404889731e-05, + "loss": 0.4891, + "step": 15391 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971150276567855e-05, + "loss": 0.3504, + "step": 15392 + }, + { + "epoch": 0.02, + "learning_rate": 4.997114650399188e-05, + "loss": 0.2326, + "step": 15393 + }, + { + "epoch": 0.02, + "learning_rate": 4.997114273116942e-05, + "loss": 0.2361, + "step": 15394 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971138958100445e-05, + "loss": 1.0379, + "step": 15395 + }, + { + "epoch": 0.02, + "learning_rate": 4.997113518478497e-05, + "loss": 1.0147, + "step": 15396 + }, + { + "epoch": 0.02, + "learning_rate": 4.997113141122298e-05, + "loss": 1.1079, + "step": 15397 + }, + { + "epoch": 0.02, + "learning_rate": 4.997112763741449e-05, + "loss": 1.1869, + "step": 15398 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971123863359504e-05, + "loss": 1.1236, + "step": 15399 + }, + { + "epoch": 0.02, + "learning_rate": 4.997112008905801e-05, + "loss": 1.1269, + "step": 15400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971116314510005e-05, + "loss": 1.165, + "step": 15401 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971112539715495e-05, + "loss": 0.8438, + "step": 15402 + }, + { + "epoch": 0.02, + "learning_rate": 4.997110876467449e-05, + "loss": 1.2577, + "step": 15403 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971104989386976e-05, + "loss": 1.2677, + "step": 15404 + }, + { + "epoch": 0.02, + "learning_rate": 4.997110121385296e-05, + "loss": 1.0797, + "step": 15405 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971097438072435e-05, + "loss": 0.9922, + "step": 15406 + }, + { + "epoch": 0.02, + "learning_rate": 4.997109366204541e-05, + "loss": 0.9806, + "step": 15407 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971089885771874e-05, + "loss": 0.6245, + "step": 15408 + }, + { + "epoch": 0.02, + "learning_rate": 4.997108610925184e-05, + "loss": 1.2668, + "step": 15409 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971082332485305e-05, + "loss": 1.1568, + "step": 15410 + }, + { + "epoch": 0.02, + "learning_rate": 4.997107855547226e-05, + "loss": 1.2187, + "step": 15411 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971074778212715e-05, + "loss": 0.9575, + "step": 15412 + }, + { + "epoch": 0.02, + "learning_rate": 4.997107100070666e-05, + "loss": 1.3958, + "step": 15413 + }, + { + "epoch": 0.02, + "learning_rate": 4.997106722295411e-05, + "loss": 1.2579, + "step": 15414 + }, + { + "epoch": 0.02, + "learning_rate": 4.997106344495505e-05, + "loss": 1.6806, + "step": 15415 + }, + { + "epoch": 0.02, + "learning_rate": 4.997105966670949e-05, + "loss": 1.4584, + "step": 15416 + }, + { + "epoch": 0.02, + "learning_rate": 4.997105588821742e-05, + "loss": 1.3861, + "step": 15417 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971052109478855e-05, + "loss": 1.1164, + "step": 15418 + }, + { + "epoch": 0.02, + "learning_rate": 4.997104833049378e-05, + "loss": 1.0986, + "step": 15419 + }, + { + "epoch": 0.02, + "learning_rate": 4.99710445512622e-05, + "loss": 1.0733, + "step": 15420 + }, + { + "epoch": 0.02, + "learning_rate": 4.997104077178412e-05, + "loss": 1.006, + "step": 15421 + }, + { + "epoch": 0.02, + "learning_rate": 4.997103699205954e-05, + "loss": 1.0617, + "step": 15422 + }, + { + "epoch": 0.02, + "learning_rate": 4.997103321208845e-05, + "loss": 1.1979, + "step": 15423 + }, + { + "epoch": 0.02, + "learning_rate": 4.997102943187086e-05, + "loss": 1.1973, + "step": 15424 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971025651406766e-05, + "loss": 1.3848, + "step": 15425 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971021870696165e-05, + "loss": 1.2419, + "step": 15426 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971018089739064e-05, + "loss": 0.8851, + "step": 15427 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971014308535456e-05, + "loss": 1.1536, + "step": 15428 + }, + { + "epoch": 0.02, + "learning_rate": 4.997101052708535e-05, + "loss": 1.0217, + "step": 15429 + }, + { + "epoch": 0.02, + "learning_rate": 4.997100674538874e-05, + "loss": 1.0199, + "step": 15430 + }, + { + "epoch": 0.02, + "learning_rate": 4.9971002963445624e-05, + "loss": 1.0635, + "step": 15431 + }, + { + "epoch": 0.02, + "learning_rate": 4.997099918125601e-05, + "loss": 1.0826, + "step": 15432 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970995398819886e-05, + "loss": 0.6998, + "step": 15433 + }, + { + "epoch": 0.02, + "learning_rate": 4.997099161613726e-05, + "loss": 1.1935, + "step": 15434 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970987833208134e-05, + "loss": 0.9907, + "step": 15435 + }, + { + "epoch": 0.02, + "learning_rate": 4.99709840500325e-05, + "loss": 1.1779, + "step": 15436 + }, + { + "epoch": 0.02, + "learning_rate": 4.997098026661037e-05, + "loss": 1.2193, + "step": 15437 + }, + { + "epoch": 0.02, + "learning_rate": 4.997097648294173e-05, + "loss": 0.9302, + "step": 15438 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970972699026594e-05, + "loss": 1.2012, + "step": 15439 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970968914864944e-05, + "loss": 1.0083, + "step": 15440 + }, + { + "epoch": 0.02, + "learning_rate": 4.997096513045681e-05, + "loss": 1.1163, + "step": 15441 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970961345802156e-05, + "loss": 1.3618, + "step": 15442 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970957560901005e-05, + "loss": 1.189, + "step": 15443 + }, + { + "epoch": 0.02, + "learning_rate": 4.997095377575335e-05, + "loss": 1.1081, + "step": 15444 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970949990359196e-05, + "loss": 1.2028, + "step": 15445 + }, + { + "epoch": 0.02, + "learning_rate": 4.997094620471854e-05, + "loss": 1.2225, + "step": 15446 + }, + { + "epoch": 0.02, + "learning_rate": 4.997094241883137e-05, + "loss": 1.2214, + "step": 15447 + }, + { + "epoch": 0.02, + "learning_rate": 4.997093863269771e-05, + "loss": 1.1734, + "step": 15448 + }, + { + "epoch": 0.02, + "learning_rate": 4.997093484631754e-05, + "loss": 1.0583, + "step": 15449 + }, + { + "epoch": 0.02, + "learning_rate": 4.997093105969087e-05, + "loss": 1.1264, + "step": 15450 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970927272817705e-05, + "loss": 1.2098, + "step": 15451 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970923485698025e-05, + "loss": 0.7442, + "step": 15452 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970919698331846e-05, + "loss": 0.9979, + "step": 15453 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970915910719166e-05, + "loss": 1.0807, + "step": 15454 + }, + { + "epoch": 0.02, + "learning_rate": 4.997091212285999e-05, + "loss": 1.0449, + "step": 15455 + }, + { + "epoch": 0.02, + "learning_rate": 4.99709083347543e-05, + "loss": 0.8006, + "step": 15456 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970904546402113e-05, + "loss": 2.4288, + "step": 15457 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970900757803427e-05, + "loss": 1.2356, + "step": 15458 + }, + { + "epoch": 0.02, + "learning_rate": 4.997089696895824e-05, + "loss": 1.3419, + "step": 15459 + }, + { + "epoch": 0.02, + "learning_rate": 4.997089317986654e-05, + "loss": 1.4171, + "step": 15460 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970889390528345e-05, + "loss": 1.4521, + "step": 15461 + }, + { + "epoch": 0.02, + "learning_rate": 4.997088560094365e-05, + "loss": 1.4672, + "step": 15462 + }, + { + "epoch": 0.02, + "learning_rate": 4.997088181111245e-05, + "loss": 1.4953, + "step": 15463 + }, + { + "epoch": 0.02, + "learning_rate": 4.997087802103475e-05, + "loss": 1.1776, + "step": 15464 + }, + { + "epoch": 0.02, + "learning_rate": 4.997087423071055e-05, + "loss": 1.0757, + "step": 15465 + }, + { + "epoch": 0.02, + "learning_rate": 4.997087044013984e-05, + "loss": 0.5758, + "step": 15466 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970866649322637e-05, + "loss": 1.2338, + "step": 15467 + }, + { + "epoch": 0.02, + "learning_rate": 4.997086285825893e-05, + "loss": 1.3703, + "step": 15468 + }, + { + "epoch": 0.02, + "learning_rate": 4.997085906694872e-05, + "loss": 0.9041, + "step": 15469 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970855275392004e-05, + "loss": 1.1656, + "step": 15470 + }, + { + "epoch": 0.02, + "learning_rate": 4.997085148358879e-05, + "loss": 1.1609, + "step": 15471 + }, + { + "epoch": 0.02, + "learning_rate": 4.997084769153907e-05, + "loss": 0.9311, + "step": 15472 + }, + { + "epoch": 0.02, + "learning_rate": 4.997084389924285e-05, + "loss": 1.4685, + "step": 15473 + }, + { + "epoch": 0.02, + "learning_rate": 4.997084010670013e-05, + "loss": 1.0369, + "step": 15474 + }, + { + "epoch": 0.02, + "learning_rate": 4.997083631391091e-05, + "loss": 0.9598, + "step": 15475 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970832520875186e-05, + "loss": 1.0708, + "step": 15476 + }, + { + "epoch": 0.02, + "learning_rate": 4.997082872759297e-05, + "loss": 1.5911, + "step": 15477 + }, + { + "epoch": 0.02, + "learning_rate": 4.997082493406424e-05, + "loss": 1.3801, + "step": 15478 + }, + { + "epoch": 0.02, + "learning_rate": 4.997082114028901e-05, + "loss": 1.0498, + "step": 15479 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970817346267285e-05, + "loss": 1.2106, + "step": 15480 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970813551999054e-05, + "loss": 0.9464, + "step": 15481 + }, + { + "epoch": 0.02, + "learning_rate": 4.997080975748432e-05, + "loss": 0.984, + "step": 15482 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970805962723085e-05, + "loss": 1.0681, + "step": 15483 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970802167715354e-05, + "loss": 1.3086, + "step": 15484 + }, + { + "epoch": 0.02, + "learning_rate": 4.997079837246112e-05, + "loss": 1.1362, + "step": 15485 + }, + { + "epoch": 0.02, + "learning_rate": 4.997079457696038e-05, + "loss": 1.2258, + "step": 15486 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970790781213146e-05, + "loss": 1.4545, + "step": 15487 + }, + { + "epoch": 0.02, + "learning_rate": 4.997078698521941e-05, + "loss": 0.9743, + "step": 15488 + }, + { + "epoch": 0.02, + "learning_rate": 4.997078318897916e-05, + "loss": 1.0882, + "step": 15489 + }, + { + "epoch": 0.02, + "learning_rate": 4.997077939249243e-05, + "loss": 1.4589, + "step": 15490 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970775595759184e-05, + "loss": 1.0599, + "step": 15491 + }, + { + "epoch": 0.02, + "learning_rate": 4.997077179877944e-05, + "loss": 0.3353, + "step": 15492 + }, + { + "epoch": 0.02, + "learning_rate": 4.997076800155319e-05, + "loss": 0.1309, + "step": 15493 + }, + { + "epoch": 0.02, + "learning_rate": 4.997076420408045e-05, + "loss": 0.2341, + "step": 15494 + }, + { + "epoch": 0.02, + "learning_rate": 4.997076040636121e-05, + "loss": 0.9858, + "step": 15495 + }, + { + "epoch": 0.02, + "learning_rate": 4.997075660839546e-05, + "loss": 0.9304, + "step": 15496 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970752810183214e-05, + "loss": 1.1588, + "step": 15497 + }, + { + "epoch": 0.02, + "learning_rate": 4.997074901172446e-05, + "loss": 1.0732, + "step": 15498 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970745213019214e-05, + "loss": 1.1569, + "step": 15499 + }, + { + "epoch": 0.02, + "learning_rate": 4.997074141406747e-05, + "loss": 1.1404, + "step": 15500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0326253175735474, + "eval_runtime": 110.0077, + "eval_samples_per_second": 12.59, + "eval_steps_per_second": 3.154, + "step": 15500 + }, + { + "epoch": 0.02, + "learning_rate": 4.997073761486922e-05, + "loss": 1.084, + "step": 15501 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970733815424466e-05, + "loss": 1.0516, + "step": 15502 + }, + { + "epoch": 0.02, + "learning_rate": 4.997073001573321e-05, + "loss": 1.1499, + "step": 15503 + }, + { + "epoch": 0.02, + "learning_rate": 4.997072621579546e-05, + "loss": 1.196, + "step": 15504 + }, + { + "epoch": 0.02, + "learning_rate": 4.997072241561121e-05, + "loss": 1.0631, + "step": 15505 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970718615180455e-05, + "loss": 0.8387, + "step": 15506 + }, + { + "epoch": 0.02, + "learning_rate": 4.997071481450321e-05, + "loss": 1.1692, + "step": 15507 + }, + { + "epoch": 0.02, + "learning_rate": 4.997071101357945e-05, + "loss": 1.1049, + "step": 15508 + }, + { + "epoch": 0.02, + "learning_rate": 4.99707072124092e-05, + "loss": 1.1272, + "step": 15509 + }, + { + "epoch": 0.02, + "learning_rate": 4.997070341099245e-05, + "loss": 0.95, + "step": 15510 + }, + { + "epoch": 0.02, + "learning_rate": 4.997069960932919e-05, + "loss": 1.1122, + "step": 15511 + }, + { + "epoch": 0.02, + "learning_rate": 4.997069580741943e-05, + "loss": 1.0699, + "step": 15512 + }, + { + "epoch": 0.02, + "learning_rate": 4.997069200526318e-05, + "loss": 1.2448, + "step": 15513 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970688202860426e-05, + "loss": 1.1439, + "step": 15514 + }, + { + "epoch": 0.02, + "learning_rate": 4.997068440021117e-05, + "loss": 1.1283, + "step": 15515 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970680597315415e-05, + "loss": 0.8256, + "step": 15516 + }, + { + "epoch": 0.02, + "learning_rate": 4.997067679417316e-05, + "loss": 0.7458, + "step": 15517 + }, + { + "epoch": 0.02, + "learning_rate": 4.997067299078441e-05, + "loss": 1.4405, + "step": 15518 + }, + { + "epoch": 0.02, + "learning_rate": 4.997066918714915e-05, + "loss": 1.3147, + "step": 15519 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970665383267396e-05, + "loss": 1.3012, + "step": 15520 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970661579139146e-05, + "loss": 1.0775, + "step": 15521 + }, + { + "epoch": 0.02, + "learning_rate": 4.997065777476438e-05, + "loss": 1.2531, + "step": 15522 + }, + { + "epoch": 0.02, + "learning_rate": 4.997065397014313e-05, + "loss": 1.2214, + "step": 15523 + }, + { + "epoch": 0.02, + "learning_rate": 4.997065016527538e-05, + "loss": 1.1619, + "step": 15524 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970646360161125e-05, + "loss": 1.0365, + "step": 15525 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970642554800374e-05, + "loss": 1.0415, + "step": 15526 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970638749193116e-05, + "loss": 1.0644, + "step": 15527 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970634943339365e-05, + "loss": 1.1692, + "step": 15528 + }, + { + "epoch": 0.02, + "learning_rate": 4.997063113723911e-05, + "loss": 1.0218, + "step": 15529 + }, + { + "epoch": 0.02, + "learning_rate": 4.997062733089236e-05, + "loss": 1.1165, + "step": 15530 + }, + { + "epoch": 0.02, + "learning_rate": 4.997062352429911e-05, + "loss": 1.3149, + "step": 15531 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970619717459354e-05, + "loss": 1.0921, + "step": 15532 + }, + { + "epoch": 0.02, + "learning_rate": 4.997061591037311e-05, + "loss": 1.1878, + "step": 15533 + }, + { + "epoch": 0.02, + "learning_rate": 4.997061210304036e-05, + "loss": 1.0947, + "step": 15534 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970608295461105e-05, + "loss": 1.0979, + "step": 15535 + }, + { + "epoch": 0.02, + "learning_rate": 4.997060448763536e-05, + "loss": 1.2264, + "step": 15536 + }, + { + "epoch": 0.02, + "learning_rate": 4.997060067956311e-05, + "loss": 0.8933, + "step": 15537 + }, + { + "epoch": 0.02, + "learning_rate": 4.997059687124436e-05, + "loss": 1.6329, + "step": 15538 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970593062679117e-05, + "loss": 1.0252, + "step": 15539 + }, + { + "epoch": 0.02, + "learning_rate": 4.997058925386737e-05, + "loss": 1.0822, + "step": 15540 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970585444809125e-05, + "loss": 1.362, + "step": 15541 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970581635504385e-05, + "loss": 1.6719, + "step": 15542 + }, + { + "epoch": 0.02, + "learning_rate": 4.997057782595314e-05, + "loss": 2.0673, + "step": 15543 + }, + { + "epoch": 0.02, + "learning_rate": 4.99705740161554e-05, + "loss": 1.0982, + "step": 15544 + }, + { + "epoch": 0.02, + "learning_rate": 4.997057020611115e-05, + "loss": 1.1138, + "step": 15545 + }, + { + "epoch": 0.02, + "learning_rate": 4.997056639582042e-05, + "loss": 1.0255, + "step": 15546 + }, + { + "epoch": 0.02, + "learning_rate": 4.997056258528317e-05, + "loss": 0.8348, + "step": 15547 + }, + { + "epoch": 0.02, + "learning_rate": 4.997055877449944e-05, + "loss": 1.1316, + "step": 15548 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970554963469206e-05, + "loss": 1.0755, + "step": 15549 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970551152192466e-05, + "loss": 0.81, + "step": 15550 + }, + { + "epoch": 0.02, + "learning_rate": 4.997054734066924e-05, + "loss": 0.8974, + "step": 15551 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970543528899505e-05, + "loss": 0.92, + "step": 15552 + }, + { + "epoch": 0.02, + "learning_rate": 4.997053971688327e-05, + "loss": 0.8723, + "step": 15553 + }, + { + "epoch": 0.02, + "learning_rate": 4.997053590462054e-05, + "loss": 1.2132, + "step": 15554 + }, + { + "epoch": 0.02, + "learning_rate": 4.997053209211131e-05, + "loss": 1.1012, + "step": 15555 + }, + { + "epoch": 0.02, + "learning_rate": 4.997052827935559e-05, + "loss": 1.1338, + "step": 15556 + }, + { + "epoch": 0.02, + "learning_rate": 4.997052446635336e-05, + "loss": 1.1448, + "step": 15557 + }, + { + "epoch": 0.02, + "learning_rate": 4.997052065310463e-05, + "loss": 1.1036, + "step": 15558 + }, + { + "epoch": 0.02, + "learning_rate": 4.997051683960942e-05, + "loss": 0.7476, + "step": 15559 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970513025867696e-05, + "loss": 1.2147, + "step": 15560 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970509211879474e-05, + "loss": 1.139, + "step": 15561 + }, + { + "epoch": 0.02, + "learning_rate": 4.997050539764475e-05, + "loss": 0.7803, + "step": 15562 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970501583163544e-05, + "loss": 0.4161, + "step": 15563 + }, + { + "epoch": 0.02, + "learning_rate": 4.997049776843583e-05, + "loss": 0.5792, + "step": 15564 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970493953461613e-05, + "loss": 0.9664, + "step": 15565 + }, + { + "epoch": 0.02, + "learning_rate": 4.99704901382409e-05, + "loss": 1.0948, + "step": 15566 + }, + { + "epoch": 0.02, + "learning_rate": 4.997048632277369e-05, + "loss": 1.0837, + "step": 15567 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970482507059987e-05, + "loss": 1.0737, + "step": 15568 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970478691099784e-05, + "loss": 1.1318, + "step": 15569 + }, + { + "epoch": 0.02, + "learning_rate": 4.997047487489308e-05, + "loss": 1.0954, + "step": 15570 + }, + { + "epoch": 0.02, + "learning_rate": 4.997047105843988e-05, + "loss": 1.222, + "step": 15571 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970467241740176e-05, + "loss": 1.1029, + "step": 15572 + }, + { + "epoch": 0.02, + "learning_rate": 4.997046342479398e-05, + "loss": 1.2778, + "step": 15573 + }, + { + "epoch": 0.02, + "learning_rate": 4.997045960760128e-05, + "loss": 1.0396, + "step": 15574 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970455790162094e-05, + "loss": 1.2474, + "step": 15575 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970451972476404e-05, + "loss": 0.8859, + "step": 15576 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970448154544214e-05, + "loss": 1.1095, + "step": 15577 + }, + { + "epoch": 0.02, + "learning_rate": 4.997044433636553e-05, + "loss": 1.0876, + "step": 15578 + }, + { + "epoch": 0.02, + "learning_rate": 4.997044051794034e-05, + "loss": 1.0309, + "step": 15579 + }, + { + "epoch": 0.02, + "learning_rate": 4.997043669926866e-05, + "loss": 1.2329, + "step": 15580 + }, + { + "epoch": 0.02, + "learning_rate": 4.997043288035048e-05, + "loss": 0.9731, + "step": 15581 + }, + { + "epoch": 0.02, + "learning_rate": 4.997042906118581e-05, + "loss": 0.5186, + "step": 15582 + }, + { + "epoch": 0.02, + "learning_rate": 4.997042524177463e-05, + "loss": 0.9851, + "step": 15583 + }, + { + "epoch": 0.02, + "learning_rate": 4.997042142211696e-05, + "loss": 1.1017, + "step": 15584 + }, + { + "epoch": 0.02, + "learning_rate": 4.997041760221279e-05, + "loss": 1.1057, + "step": 15585 + }, + { + "epoch": 0.02, + "learning_rate": 4.997041378206212e-05, + "loss": 1.1474, + "step": 15586 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970409961664955e-05, + "loss": 0.9451, + "step": 15587 + }, + { + "epoch": 0.02, + "learning_rate": 4.99704061410213e-05, + "loss": 0.9753, + "step": 15588 + }, + { + "epoch": 0.02, + "learning_rate": 4.997040232013114e-05, + "loss": 1.1145, + "step": 15589 + }, + { + "epoch": 0.02, + "learning_rate": 4.997039849899448e-05, + "loss": 1.0578, + "step": 15590 + }, + { + "epoch": 0.02, + "learning_rate": 4.997039467761133e-05, + "loss": 1.2155, + "step": 15591 + }, + { + "epoch": 0.02, + "learning_rate": 4.997039085598167e-05, + "loss": 1.0917, + "step": 15592 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970387034105534e-05, + "loss": 1.1275, + "step": 15593 + }, + { + "epoch": 0.02, + "learning_rate": 4.997038321198288e-05, + "loss": 1.0806, + "step": 15594 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970379389613744e-05, + "loss": 1.0596, + "step": 15595 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970375566998105e-05, + "loss": 0.3714, + "step": 15596 + }, + { + "epoch": 0.02, + "learning_rate": 4.997037174413597e-05, + "loss": 0.366, + "step": 15597 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970367921027335e-05, + "loss": 0.3177, + "step": 15598 + }, + { + "epoch": 0.02, + "learning_rate": 4.997036409767221e-05, + "loss": 0.3467, + "step": 15599 + }, + { + "epoch": 0.02, + "learning_rate": 4.997036027407058e-05, + "loss": 0.8203, + "step": 15600 + }, + { + "epoch": 0.02, + "learning_rate": 4.997035645022246e-05, + "loss": 1.2714, + "step": 15601 + }, + { + "epoch": 0.02, + "learning_rate": 4.997035262612784e-05, + "loss": 0.5748, + "step": 15602 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970348801786713e-05, + "loss": 0.9596, + "step": 15603 + }, + { + "epoch": 0.02, + "learning_rate": 4.99703449771991e-05, + "loss": 1.3077, + "step": 15604 + }, + { + "epoch": 0.02, + "learning_rate": 4.997034115236499e-05, + "loss": 1.227, + "step": 15605 + }, + { + "epoch": 0.02, + "learning_rate": 4.997033732728438e-05, + "loss": 1.1659, + "step": 15606 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970333501957276e-05, + "loss": 1.1771, + "step": 15607 + }, + { + "epoch": 0.02, + "learning_rate": 4.997032967638368e-05, + "loss": 1.172, + "step": 15608 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970325850563584e-05, + "loss": 0.7679, + "step": 15609 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970322024496984e-05, + "loss": 1.0431, + "step": 15610 + }, + { + "epoch": 0.02, + "learning_rate": 4.99703181981839e-05, + "loss": 1.2105, + "step": 15611 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970314371624304e-05, + "loss": 0.9255, + "step": 15612 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970310544818225e-05, + "loss": 1.1481, + "step": 15613 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970306717765645e-05, + "loss": 0.9807, + "step": 15614 + }, + { + "epoch": 0.02, + "learning_rate": 4.997030289046657e-05, + "loss": 1.1817, + "step": 15615 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970299062921e-05, + "loss": 1.0103, + "step": 15616 + }, + { + "epoch": 0.02, + "learning_rate": 4.997029523512893e-05, + "loss": 0.9556, + "step": 15617 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970291407090364e-05, + "loss": 1.0283, + "step": 15618 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970287578805303e-05, + "loss": 1.0917, + "step": 15619 + }, + { + "epoch": 0.02, + "learning_rate": 4.997028375027374e-05, + "loss": 0.9854, + "step": 15620 + }, + { + "epoch": 0.02, + "learning_rate": 4.997027992149569e-05, + "loss": 0.6732, + "step": 15621 + }, + { + "epoch": 0.02, + "learning_rate": 4.997027609247114e-05, + "loss": 1.2088, + "step": 15622 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970272263200094e-05, + "loss": 0.7174, + "step": 15623 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970268433682554e-05, + "loss": 0.8792, + "step": 15624 + }, + { + "epoch": 0.02, + "learning_rate": 4.997026460391851e-05, + "loss": 1.2889, + "step": 15625 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970260773907985e-05, + "loss": 1.0461, + "step": 15626 + }, + { + "epoch": 0.02, + "learning_rate": 4.997025694365095e-05, + "loss": 1.0169, + "step": 15627 + }, + { + "epoch": 0.02, + "learning_rate": 4.997025311314743e-05, + "loss": 0.9686, + "step": 15628 + }, + { + "epoch": 0.02, + "learning_rate": 4.997024928239741e-05, + "loss": 0.9619, + "step": 15629 + }, + { + "epoch": 0.02, + "learning_rate": 4.997024545140089e-05, + "loss": 0.8603, + "step": 15630 + }, + { + "epoch": 0.02, + "learning_rate": 4.997024162015787e-05, + "loss": 0.802, + "step": 15631 + }, + { + "epoch": 0.02, + "learning_rate": 4.997023778866837e-05, + "loss": 0.7393, + "step": 15632 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970233956932364e-05, + "loss": 0.7595, + "step": 15633 + }, + { + "epoch": 0.02, + "learning_rate": 4.997023012494987e-05, + "loss": 0.7294, + "step": 15634 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970226292720874e-05, + "loss": 0.7022, + "step": 15635 + }, + { + "epoch": 0.02, + "learning_rate": 4.997022246024538e-05, + "loss": 0.6951, + "step": 15636 + }, + { + "epoch": 0.02, + "learning_rate": 4.99702186275234e-05, + "loss": 0.6884, + "step": 15637 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970214794554916e-05, + "loss": 0.6694, + "step": 15638 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970210961339934e-05, + "loss": 0.6479, + "step": 15639 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970207127878465e-05, + "loss": 0.6595, + "step": 15640 + }, + { + "epoch": 0.02, + "learning_rate": 4.997020329417049e-05, + "loss": 0.6397, + "step": 15641 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970199460216035e-05, + "loss": 0.6237, + "step": 15642 + }, + { + "epoch": 0.02, + "learning_rate": 4.997019562601507e-05, + "loss": 0.619, + "step": 15643 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970191791567624e-05, + "loss": 0.6189, + "step": 15644 + }, + { + "epoch": 0.02, + "learning_rate": 4.997018795687367e-05, + "loss": 0.5966, + "step": 15645 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970184121933226e-05, + "loss": 0.6017, + "step": 15646 + }, + { + "epoch": 0.02, + "learning_rate": 4.997018028674629e-05, + "loss": 0.599, + "step": 15647 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970176451312855e-05, + "loss": 0.5918, + "step": 15648 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970172615632926e-05, + "loss": 0.5899, + "step": 15649 + }, + { + "epoch": 0.02, + "learning_rate": 4.99701687797065e-05, + "loss": 0.5678, + "step": 15650 + }, + { + "epoch": 0.02, + "learning_rate": 4.997016494353358e-05, + "loss": 0.5699, + "step": 15651 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970161107114165e-05, + "loss": 0.5593, + "step": 15652 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970157270448256e-05, + "loss": 0.5443, + "step": 15653 + }, + { + "epoch": 0.02, + "learning_rate": 4.997015343353585e-05, + "loss": 0.5405, + "step": 15654 + }, + { + "epoch": 0.02, + "learning_rate": 4.997014959637696e-05, + "loss": 0.5375, + "step": 15655 + }, + { + "epoch": 0.02, + "learning_rate": 4.997014575897156e-05, + "loss": 0.5517, + "step": 15656 + }, + { + "epoch": 0.02, + "learning_rate": 4.997014192131967e-05, + "loss": 0.5406, + "step": 15657 + }, + { + "epoch": 0.02, + "learning_rate": 4.997013808342129e-05, + "loss": 0.5306, + "step": 15658 + }, + { + "epoch": 0.02, + "learning_rate": 4.997013424527641e-05, + "loss": 0.5325, + "step": 15659 + }, + { + "epoch": 0.02, + "learning_rate": 4.997013040688504e-05, + "loss": 0.5294, + "step": 15660 + }, + { + "epoch": 0.02, + "learning_rate": 4.997012656824717e-05, + "loss": 0.5251, + "step": 15661 + }, + { + "epoch": 0.02, + "learning_rate": 4.997012272936281e-05, + "loss": 0.5146, + "step": 15662 + }, + { + "epoch": 0.02, + "learning_rate": 4.997011889023195e-05, + "loss": 0.5038, + "step": 15663 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970115050854603e-05, + "loss": 0.5171, + "step": 15664 + }, + { + "epoch": 0.02, + "learning_rate": 4.997011121123076e-05, + "loss": 0.5031, + "step": 15665 + }, + { + "epoch": 0.02, + "learning_rate": 4.997010737136042e-05, + "loss": 0.4977, + "step": 15666 + }, + { + "epoch": 0.02, + "learning_rate": 4.997010353124359e-05, + "loss": 0.5015, + "step": 15667 + }, + { + "epoch": 0.02, + "learning_rate": 4.997009969088026e-05, + "loss": 0.4928, + "step": 15668 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970095850270433e-05, + "loss": 0.4876, + "step": 15669 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970092009414117e-05, + "loss": 0.4895, + "step": 15670 + }, + { + "epoch": 0.02, + "learning_rate": 4.99700881683113e-05, + "loss": 0.4892, + "step": 15671 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970084326961996e-05, + "loss": 0.483, + "step": 15672 + }, + { + "epoch": 0.02, + "learning_rate": 4.99700804853662e-05, + "loss": 0.4925, + "step": 15673 + }, + { + "epoch": 0.02, + "learning_rate": 4.99700766435239e-05, + "loss": 0.4721, + "step": 15674 + }, + { + "epoch": 0.02, + "learning_rate": 4.997007280143512e-05, + "loss": 0.4789, + "step": 15675 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970068959099835e-05, + "loss": 0.4667, + "step": 15676 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970065116518064e-05, + "loss": 0.4765, + "step": 15677 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970061273689794e-05, + "loss": 0.4686, + "step": 15678 + }, + { + "epoch": 0.02, + "learning_rate": 4.997005743061502e-05, + "loss": 0.4647, + "step": 15679 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970053587293766e-05, + "loss": 0.4689, + "step": 15680 + }, + { + "epoch": 0.02, + "learning_rate": 4.997004974372602e-05, + "loss": 0.473, + "step": 15681 + }, + { + "epoch": 0.02, + "learning_rate": 4.997004589991178e-05, + "loss": 0.4641, + "step": 15682 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970042055851034e-05, + "loss": 1.1291, + "step": 15683 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970038211543804e-05, + "loss": 1.5296, + "step": 15684 + }, + { + "epoch": 0.02, + "learning_rate": 4.997003436699007e-05, + "loss": 1.2582, + "step": 15685 + }, + { + "epoch": 0.02, + "learning_rate": 4.997003052218986e-05, + "loss": 0.8071, + "step": 15686 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970026677143145e-05, + "loss": 1.3081, + "step": 15687 + }, + { + "epoch": 0.02, + "learning_rate": 4.997002283184994e-05, + "loss": 1.1521, + "step": 15688 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970018986310236e-05, + "loss": 1.0356, + "step": 15689 + }, + { + "epoch": 0.02, + "learning_rate": 4.997001514052404e-05, + "loss": 0.9397, + "step": 15690 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970011294491354e-05, + "loss": 1.3001, + "step": 15691 + }, + { + "epoch": 0.02, + "learning_rate": 4.997000744821218e-05, + "loss": 1.0951, + "step": 15692 + }, + { + "epoch": 0.02, + "learning_rate": 4.99700036016865e-05, + "loss": 1.0805, + "step": 15693 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969999754914335e-05, + "loss": 0.788, + "step": 15694 + }, + { + "epoch": 0.02, + "learning_rate": 4.996999590789567e-05, + "loss": 1.1341, + "step": 15695 + }, + { + "epoch": 0.02, + "learning_rate": 4.996999206063051e-05, + "loss": 1.2373, + "step": 15696 + }, + { + "epoch": 0.02, + "learning_rate": 4.996998821311887e-05, + "loss": 0.9787, + "step": 15697 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969984365360723e-05, + "loss": 1.1946, + "step": 15698 + }, + { + "epoch": 0.02, + "learning_rate": 4.996998051735609e-05, + "loss": 0.9751, + "step": 15699 + }, + { + "epoch": 0.02, + "learning_rate": 4.996997666910497e-05, + "loss": 1.1606, + "step": 15700 + }, + { + "epoch": 0.02, + "learning_rate": 4.996997282060735e-05, + "loss": 0.8047, + "step": 15701 + }, + { + "epoch": 0.02, + "learning_rate": 4.996996897186323e-05, + "loss": 1.475, + "step": 15702 + }, + { + "epoch": 0.02, + "learning_rate": 4.996996512287263e-05, + "loss": 1.091, + "step": 15703 + }, + { + "epoch": 0.02, + "learning_rate": 4.996996127363552e-05, + "loss": 1.1386, + "step": 15704 + }, + { + "epoch": 0.02, + "learning_rate": 4.996995742415194e-05, + "loss": 1.4912, + "step": 15705 + }, + { + "epoch": 0.02, + "learning_rate": 4.996995357442185e-05, + "loss": 1.189, + "step": 15706 + }, + { + "epoch": 0.02, + "learning_rate": 4.996994972444527e-05, + "loss": 1.1181, + "step": 15707 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969945874222204e-05, + "loss": 1.01, + "step": 15708 + }, + { + "epoch": 0.02, + "learning_rate": 4.996994202375264e-05, + "loss": 1.1171, + "step": 15709 + }, + { + "epoch": 0.02, + "learning_rate": 4.996993817303658e-05, + "loss": 1.1735, + "step": 15710 + }, + { + "epoch": 0.02, + "learning_rate": 4.996993432207403e-05, + "loss": 1.1781, + "step": 15711 + }, + { + "epoch": 0.02, + "learning_rate": 4.996993047086499e-05, + "loss": 1.0949, + "step": 15712 + }, + { + "epoch": 0.02, + "learning_rate": 4.996992661940946e-05, + "loss": 1.2326, + "step": 15713 + }, + { + "epoch": 0.02, + "learning_rate": 4.996992276770742e-05, + "loss": 1.1842, + "step": 15714 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969918915758903e-05, + "loss": 1.1811, + "step": 15715 + }, + { + "epoch": 0.02, + "learning_rate": 4.996991506356389e-05, + "loss": 0.9913, + "step": 15716 + }, + { + "epoch": 0.02, + "learning_rate": 4.996991121112239e-05, + "loss": 1.2811, + "step": 15717 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969907358434395e-05, + "loss": 1.0541, + "step": 15718 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969903505499904e-05, + "loss": 1.0126, + "step": 15719 + }, + { + "epoch": 0.02, + "learning_rate": 4.996989965231892e-05, + "loss": 1.0806, + "step": 15720 + }, + { + "epoch": 0.02, + "learning_rate": 4.996989579889145e-05, + "loss": 1.4714, + "step": 15721 + }, + { + "epoch": 0.02, + "learning_rate": 4.996989194521748e-05, + "loss": 1.0939, + "step": 15722 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969888091297024e-05, + "loss": 1.6136, + "step": 15723 + }, + { + "epoch": 0.02, + "learning_rate": 4.996988423713007e-05, + "loss": 1.173, + "step": 15724 + }, + { + "epoch": 0.02, + "learning_rate": 4.996988038271663e-05, + "loss": 1.1176, + "step": 15725 + }, + { + "epoch": 0.02, + "learning_rate": 4.996987652805669e-05, + "loss": 1.0242, + "step": 15726 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969872673150264e-05, + "loss": 0.894, + "step": 15727 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969868817997345e-05, + "loss": 0.8595, + "step": 15728 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969864962597934e-05, + "loss": 1.0393, + "step": 15729 + }, + { + "epoch": 0.02, + "learning_rate": 4.996986110695203e-05, + "loss": 1.8482, + "step": 15730 + }, + { + "epoch": 0.02, + "learning_rate": 4.996985725105964e-05, + "loss": 1.6636, + "step": 15731 + }, + { + "epoch": 0.02, + "learning_rate": 4.996985339492075e-05, + "loss": 1.0546, + "step": 15732 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969849538535374e-05, + "loss": 0.9092, + "step": 15733 + }, + { + "epoch": 0.02, + "learning_rate": 4.99698456819035e-05, + "loss": 1.3023, + "step": 15734 + }, + { + "epoch": 0.02, + "learning_rate": 4.996984182502514e-05, + "loss": 1.2776, + "step": 15735 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969837967900285e-05, + "loss": 1.2188, + "step": 15736 + }, + { + "epoch": 0.02, + "learning_rate": 4.996983411052893e-05, + "loss": 1.1586, + "step": 15737 + }, + { + "epoch": 0.02, + "learning_rate": 4.99698302529111e-05, + "loss": 1.073, + "step": 15738 + }, + { + "epoch": 0.02, + "learning_rate": 4.996982639504677e-05, + "loss": 1.191, + "step": 15739 + }, + { + "epoch": 0.02, + "learning_rate": 4.996982253693595e-05, + "loss": 1.5325, + "step": 15740 + }, + { + "epoch": 0.02, + "learning_rate": 4.996981867857864e-05, + "loss": 1.4183, + "step": 15741 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969814819974834e-05, + "loss": 1.0973, + "step": 15742 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969810961124536e-05, + "loss": 1.0266, + "step": 15743 + }, + { + "epoch": 0.02, + "learning_rate": 4.996980710202775e-05, + "loss": 1.138, + "step": 15744 + }, + { + "epoch": 0.02, + "learning_rate": 4.996980324268447e-05, + "loss": 1.094, + "step": 15745 + }, + { + "epoch": 0.02, + "learning_rate": 4.99697993830947e-05, + "loss": 0.8729, + "step": 15746 + }, + { + "epoch": 0.02, + "learning_rate": 4.996979552325844e-05, + "loss": 0.9571, + "step": 15747 + }, + { + "epoch": 0.02, + "learning_rate": 4.996979166317569e-05, + "loss": 1.3084, + "step": 15748 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969787802846445e-05, + "loss": 1.196, + "step": 15749 + }, + { + "epoch": 0.02, + "learning_rate": 4.996978394227071e-05, + "loss": 0.8647, + "step": 15750 + }, + { + "epoch": 0.02, + "learning_rate": 4.996978008144848e-05, + "loss": 1.1199, + "step": 15751 + }, + { + "epoch": 0.02, + "learning_rate": 4.996977622037976e-05, + "loss": 0.9679, + "step": 15752 + }, + { + "epoch": 0.02, + "learning_rate": 4.996977235906456e-05, + "loss": 1.1227, + "step": 15753 + }, + { + "epoch": 0.02, + "learning_rate": 4.996976849750285e-05, + "loss": 1.3058, + "step": 15754 + }, + { + "epoch": 0.02, + "learning_rate": 4.996976463569467e-05, + "loss": 0.9908, + "step": 15755 + }, + { + "epoch": 0.02, + "learning_rate": 4.996976077363998e-05, + "loss": 1.087, + "step": 15756 + }, + { + "epoch": 0.02, + "learning_rate": 4.996975691133882e-05, + "loss": 1.2508, + "step": 15757 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969753048791146e-05, + "loss": 1.0065, + "step": 15758 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969749185996995e-05, + "loss": 0.9778, + "step": 15759 + }, + { + "epoch": 0.02, + "learning_rate": 4.996974532295635e-05, + "loss": 1.1656, + "step": 15760 + }, + { + "epoch": 0.02, + "learning_rate": 4.996974145966922e-05, + "loss": 0.9799, + "step": 15761 + }, + { + "epoch": 0.02, + "learning_rate": 4.996973759613559e-05, + "loss": 1.1699, + "step": 15762 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969733732355475e-05, + "loss": 1.233, + "step": 15763 + }, + { + "epoch": 0.02, + "learning_rate": 4.996972986832886e-05, + "loss": 1.0715, + "step": 15764 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969726004055765e-05, + "loss": 1.1573, + "step": 15765 + }, + { + "epoch": 0.02, + "learning_rate": 4.996972213953617e-05, + "loss": 1.1353, + "step": 15766 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969718274770095e-05, + "loss": 1.2174, + "step": 15767 + }, + { + "epoch": 0.02, + "learning_rate": 4.996971440975752e-05, + "loss": 1.0838, + "step": 15768 + }, + { + "epoch": 0.02, + "learning_rate": 4.996971054449846e-05, + "loss": 0.8773, + "step": 15769 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969706678992913e-05, + "loss": 1.0404, + "step": 15770 + }, + { + "epoch": 0.02, + "learning_rate": 4.996970281324087e-05, + "loss": 1.2448, + "step": 15771 + }, + { + "epoch": 0.02, + "learning_rate": 4.996969894724234e-05, + "loss": 0.8504, + "step": 15772 + }, + { + "epoch": 0.02, + "learning_rate": 4.996969508099731e-05, + "loss": 1.1887, + "step": 15773 + }, + { + "epoch": 0.02, + "learning_rate": 4.99696912145058e-05, + "loss": 1.093, + "step": 15774 + }, + { + "epoch": 0.02, + "learning_rate": 4.99696873477678e-05, + "loss": 1.0875, + "step": 15775 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969683480783304e-05, + "loss": 1.0078, + "step": 15776 + }, + { + "epoch": 0.02, + "learning_rate": 4.996967961355232e-05, + "loss": 1.1218, + "step": 15777 + }, + { + "epoch": 0.02, + "learning_rate": 4.996967574607485e-05, + "loss": 1.436, + "step": 15778 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969671878350884e-05, + "loss": 1.0902, + "step": 15779 + }, + { + "epoch": 0.02, + "learning_rate": 4.996966801038043e-05, + "loss": 0.6765, + "step": 15780 + }, + { + "epoch": 0.02, + "learning_rate": 4.996966414216349e-05, + "loss": 1.1032, + "step": 15781 + }, + { + "epoch": 0.02, + "learning_rate": 4.996966027370005e-05, + "loss": 1.1222, + "step": 15782 + }, + { + "epoch": 0.02, + "learning_rate": 4.996965640499013e-05, + "loss": 0.9782, + "step": 15783 + }, + { + "epoch": 0.02, + "learning_rate": 4.996965253603372e-05, + "loss": 0.9998, + "step": 15784 + }, + { + "epoch": 0.02, + "learning_rate": 4.996964866683081e-05, + "loss": 1.0014, + "step": 15785 + }, + { + "epoch": 0.02, + "learning_rate": 4.996964479738142e-05, + "loss": 1.6226, + "step": 15786 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969640927685533e-05, + "loss": 1.6385, + "step": 15787 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969637057743156e-05, + "loss": 1.5961, + "step": 15788 + }, + { + "epoch": 0.02, + "learning_rate": 4.99696331875543e-05, + "loss": 1.5374, + "step": 15789 + }, + { + "epoch": 0.02, + "learning_rate": 4.996962931711894e-05, + "loss": 1.485, + "step": 15790 + }, + { + "epoch": 0.02, + "learning_rate": 4.99696254464371e-05, + "loss": 1.4111, + "step": 15791 + }, + { + "epoch": 0.02, + "learning_rate": 4.996962157550877e-05, + "loss": 1.4236, + "step": 15792 + }, + { + "epoch": 0.02, + "learning_rate": 4.996961770433395e-05, + "loss": 1.3338, + "step": 15793 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969613832912636e-05, + "loss": 1.2793, + "step": 15794 + }, + { + "epoch": 0.02, + "learning_rate": 4.996960996124483e-05, + "loss": 1.234, + "step": 15795 + }, + { + "epoch": 0.02, + "learning_rate": 4.996960608933055e-05, + "loss": 1.1902, + "step": 15796 + }, + { + "epoch": 0.02, + "learning_rate": 4.996960221716976e-05, + "loss": 1.1726, + "step": 15797 + }, + { + "epoch": 0.02, + "learning_rate": 4.99695983447625e-05, + "loss": 1.1311, + "step": 15798 + }, + { + "epoch": 0.02, + "learning_rate": 4.996959447210874e-05, + "loss": 1.0969, + "step": 15799 + }, + { + "epoch": 0.02, + "learning_rate": 4.996959059920849e-05, + "loss": 1.1718, + "step": 15800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969586726061753e-05, + "loss": 1.2182, + "step": 15801 + }, + { + "epoch": 0.02, + "learning_rate": 4.996958285266853e-05, + "loss": 1.1148, + "step": 15802 + }, + { + "epoch": 0.02, + "learning_rate": 4.996957897902881e-05, + "loss": 1.2094, + "step": 15803 + }, + { + "epoch": 0.02, + "learning_rate": 4.996957510514261e-05, + "loss": 1.1015, + "step": 15804 + }, + { + "epoch": 0.02, + "learning_rate": 4.996957123100992e-05, + "loss": 0.9884, + "step": 15805 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969567356630734e-05, + "loss": 1.1299, + "step": 15806 + }, + { + "epoch": 0.02, + "learning_rate": 4.996956348200506e-05, + "loss": 1.0892, + "step": 15807 + }, + { + "epoch": 0.02, + "learning_rate": 4.99695596071329e-05, + "loss": 0.977, + "step": 15808 + }, + { + "epoch": 0.02, + "learning_rate": 4.996955573201425e-05, + "loss": 1.168, + "step": 15809 + }, + { + "epoch": 0.02, + "learning_rate": 4.996955185664911e-05, + "loss": 1.2611, + "step": 15810 + }, + { + "epoch": 0.02, + "learning_rate": 4.996954798103748e-05, + "loss": 1.1094, + "step": 15811 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969544105179364e-05, + "loss": 1.2689, + "step": 15812 + }, + { + "epoch": 0.02, + "learning_rate": 4.996954022907476e-05, + "loss": 1.1934, + "step": 15813 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969536352723664e-05, + "loss": 0.558, + "step": 15814 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969532476126094e-05, + "loss": 0.8069, + "step": 15815 + }, + { + "epoch": 0.02, + "learning_rate": 4.996952859928202e-05, + "loss": 1.4292, + "step": 15816 + }, + { + "epoch": 0.02, + "learning_rate": 4.996952472219146e-05, + "loss": 1.057, + "step": 15817 + }, + { + "epoch": 0.02, + "learning_rate": 4.996952084485441e-05, + "loss": 1.5976, + "step": 15818 + }, + { + "epoch": 0.02, + "learning_rate": 4.996951696727087e-05, + "loss": 1.5469, + "step": 15819 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969513089440844e-05, + "loss": 1.0112, + "step": 15820 + }, + { + "epoch": 0.02, + "learning_rate": 4.996950921136433e-05, + "loss": 1.5691, + "step": 15821 + }, + { + "epoch": 0.02, + "learning_rate": 4.996950533304133e-05, + "loss": 1.4388, + "step": 15822 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969501454471834e-05, + "loss": 0.7895, + "step": 15823 + }, + { + "epoch": 0.02, + "learning_rate": 4.996949757565586e-05, + "loss": 0.6242, + "step": 15824 + }, + { + "epoch": 0.02, + "learning_rate": 4.996949369659339e-05, + "loss": 1.5227, + "step": 15825 + }, + { + "epoch": 0.02, + "learning_rate": 4.996948981728443e-05, + "loss": 1.4323, + "step": 15826 + }, + { + "epoch": 0.02, + "learning_rate": 4.996948593772899e-05, + "loss": 1.4847, + "step": 15827 + }, + { + "epoch": 0.02, + "learning_rate": 4.996948205792705e-05, + "loss": 1.0376, + "step": 15828 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969478177878636e-05, + "loss": 1.1002, + "step": 15829 + }, + { + "epoch": 0.02, + "learning_rate": 4.996947429758373e-05, + "loss": 0.8962, + "step": 15830 + }, + { + "epoch": 0.02, + "learning_rate": 4.996947041704233e-05, + "loss": 0.9425, + "step": 15831 + }, + { + "epoch": 0.02, + "learning_rate": 4.996946653625444e-05, + "loss": 1.2343, + "step": 15832 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969462655220066e-05, + "loss": 1.0559, + "step": 15833 + }, + { + "epoch": 0.02, + "learning_rate": 4.99694587739392e-05, + "loss": 1.1196, + "step": 15834 + }, + { + "epoch": 0.02, + "learning_rate": 4.996945489241186e-05, + "loss": 1.2245, + "step": 15835 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969451010638025e-05, + "loss": 1.2001, + "step": 15836 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969447128617696e-05, + "loss": 1.1571, + "step": 15837 + }, + { + "epoch": 0.02, + "learning_rate": 4.996944324635088e-05, + "loss": 1.1728, + "step": 15838 + }, + { + "epoch": 0.02, + "learning_rate": 4.996943936383758e-05, + "loss": 0.9492, + "step": 15839 + }, + { + "epoch": 0.02, + "learning_rate": 4.996943548107779e-05, + "loss": 0.8979, + "step": 15840 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969431598071513e-05, + "loss": 1.3406, + "step": 15841 + }, + { + "epoch": 0.02, + "learning_rate": 4.996942771481875e-05, + "loss": 1.2007, + "step": 15842 + }, + { + "epoch": 0.02, + "learning_rate": 4.99694238313195e-05, + "loss": 1.0421, + "step": 15843 + }, + { + "epoch": 0.02, + "learning_rate": 4.996941994757376e-05, + "loss": 0.7652, + "step": 15844 + }, + { + "epoch": 0.02, + "learning_rate": 4.996941606358153e-05, + "loss": 0.6384, + "step": 15845 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969412179342824e-05, + "loss": 0.7307, + "step": 15846 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969408294857615e-05, + "loss": 1.1159, + "step": 15847 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969404410125934e-05, + "loss": 1.1519, + "step": 15848 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969400525147745e-05, + "loss": 1.179, + "step": 15849 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969396639923084e-05, + "loss": 0.9514, + "step": 15850 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969392754451936e-05, + "loss": 1.0071, + "step": 15851 + }, + { + "epoch": 0.02, + "learning_rate": 4.996938886873429e-05, + "loss": 0.697, + "step": 15852 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969384982770166e-05, + "loss": 0.7484, + "step": 15853 + }, + { + "epoch": 0.02, + "learning_rate": 4.996938109655955e-05, + "loss": 1.0321, + "step": 15854 + }, + { + "epoch": 0.02, + "learning_rate": 4.996937721010245e-05, + "loss": 0.9037, + "step": 15855 + }, + { + "epoch": 0.02, + "learning_rate": 4.996937332339887e-05, + "loss": 0.9142, + "step": 15856 + }, + { + "epoch": 0.02, + "learning_rate": 4.996936943644879e-05, + "loss": 1.0078, + "step": 15857 + }, + { + "epoch": 0.02, + "learning_rate": 4.996936554925223e-05, + "loss": 1.4005, + "step": 15858 + }, + { + "epoch": 0.02, + "learning_rate": 4.996936166180918e-05, + "loss": 1.2878, + "step": 15859 + }, + { + "epoch": 0.02, + "learning_rate": 4.996935777411964e-05, + "loss": 1.5016, + "step": 15860 + }, + { + "epoch": 0.02, + "learning_rate": 4.996935388618362e-05, + "loss": 0.8036, + "step": 15861 + }, + { + "epoch": 0.02, + "learning_rate": 4.996934999800111e-05, + "loss": 1.081, + "step": 15862 + }, + { + "epoch": 0.02, + "learning_rate": 4.996934610957211e-05, + "loss": 0.9565, + "step": 15863 + }, + { + "epoch": 0.02, + "learning_rate": 4.996934222089663e-05, + "loss": 1.0705, + "step": 15864 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969338331974655e-05, + "loss": 0.9327, + "step": 15865 + }, + { + "epoch": 0.02, + "learning_rate": 4.99693344428062e-05, + "loss": 1.0016, + "step": 15866 + }, + { + "epoch": 0.02, + "learning_rate": 4.996933055339126e-05, + "loss": 1.0836, + "step": 15867 + }, + { + "epoch": 0.02, + "learning_rate": 4.996932666372982e-05, + "loss": 0.4842, + "step": 15868 + }, + { + "epoch": 0.02, + "learning_rate": 4.99693227738219e-05, + "loss": 0.452, + "step": 15869 + }, + { + "epoch": 0.02, + "learning_rate": 4.996931888366751e-05, + "loss": 0.3079, + "step": 15870 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969314993266614e-05, + "loss": 0.4565, + "step": 15871 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969311102619234e-05, + "loss": 1.4961, + "step": 15872 + }, + { + "epoch": 0.02, + "learning_rate": 4.996930721172537e-05, + "loss": 0.9223, + "step": 15873 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969303320585014e-05, + "loss": 0.8829, + "step": 15874 + }, + { + "epoch": 0.02, + "learning_rate": 4.996929942919818e-05, + "loss": 1.1984, + "step": 15875 + }, + { + "epoch": 0.02, + "learning_rate": 4.996929553756486e-05, + "loss": 0.9723, + "step": 15876 + }, + { + "epoch": 0.02, + "learning_rate": 4.996929164568505e-05, + "loss": 0.7124, + "step": 15877 + }, + { + "epoch": 0.02, + "learning_rate": 4.996928775355875e-05, + "loss": 1.1402, + "step": 15878 + }, + { + "epoch": 0.02, + "learning_rate": 4.996928386118597e-05, + "loss": 0.9664, + "step": 15879 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969279968566704e-05, + "loss": 1.0777, + "step": 15880 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969276075700944e-05, + "loss": 1.0117, + "step": 15881 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969272182588705e-05, + "loss": 0.7138, + "step": 15882 + }, + { + "epoch": 0.02, + "learning_rate": 4.996926828922997e-05, + "loss": 0.6949, + "step": 15883 + }, + { + "epoch": 0.02, + "learning_rate": 4.996926439562477e-05, + "loss": 0.6052, + "step": 15884 + }, + { + "epoch": 0.02, + "learning_rate": 4.996926050177306e-05, + "loss": 0.6187, + "step": 15885 + }, + { + "epoch": 0.02, + "learning_rate": 4.996925660767488e-05, + "loss": 0.6995, + "step": 15886 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969252713330204e-05, + "loss": 0.6548, + "step": 15887 + }, + { + "epoch": 0.02, + "learning_rate": 4.996924881873905e-05, + "loss": 0.5436, + "step": 15888 + }, + { + "epoch": 0.02, + "learning_rate": 4.996924492390141e-05, + "loss": 0.5614, + "step": 15889 + }, + { + "epoch": 0.02, + "learning_rate": 4.996924102881728e-05, + "loss": 0.4978, + "step": 15890 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969237133486664e-05, + "loss": 0.6292, + "step": 15891 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969233237909565e-05, + "loss": 1.0817, + "step": 15892 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969229342085974e-05, + "loss": 0.889, + "step": 15893 + }, + { + "epoch": 0.02, + "learning_rate": 4.99692254460159e-05, + "loss": 1.2768, + "step": 15894 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969221549699345e-05, + "loss": 1.5298, + "step": 15895 + }, + { + "epoch": 0.02, + "learning_rate": 4.99692176531363e-05, + "loss": 2.4553, + "step": 15896 + }, + { + "epoch": 0.02, + "learning_rate": 4.996921375632677e-05, + "loss": 1.4702, + "step": 15897 + }, + { + "epoch": 0.02, + "learning_rate": 4.996920985927076e-05, + "loss": 0.6525, + "step": 15898 + }, + { + "epoch": 0.02, + "learning_rate": 4.996920596196826e-05, + "loss": 1.0691, + "step": 15899 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969202064419276e-05, + "loss": 1.0772, + "step": 15900 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969198166623805e-05, + "loss": 0.3766, + "step": 15901 + }, + { + "epoch": 0.02, + "learning_rate": 4.996919426858185e-05, + "loss": 0.4076, + "step": 15902 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969190370293404e-05, + "loss": 0.8458, + "step": 15903 + }, + { + "epoch": 0.02, + "learning_rate": 4.996918647175848e-05, + "loss": 1.4799, + "step": 15904 + }, + { + "epoch": 0.02, + "learning_rate": 4.996918257297707e-05, + "loss": 0.9496, + "step": 15905 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969178673949167e-05, + "loss": 0.9569, + "step": 15906 + }, + { + "epoch": 0.02, + "learning_rate": 4.996917477467479e-05, + "loss": 1.0882, + "step": 15907 + }, + { + "epoch": 0.02, + "learning_rate": 4.996917087515391e-05, + "loss": 1.2019, + "step": 15908 + }, + { + "epoch": 0.02, + "learning_rate": 4.996916697538656e-05, + "loss": 1.1008, + "step": 15909 + }, + { + "epoch": 0.02, + "learning_rate": 4.996916307537273e-05, + "loss": 0.9841, + "step": 15910 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969159175112404e-05, + "loss": 2.6722, + "step": 15911 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969155274605594e-05, + "loss": 1.1447, + "step": 15912 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969151373852305e-05, + "loss": 0.8918, + "step": 15913 + }, + { + "epoch": 0.02, + "learning_rate": 4.996914747285252e-05, + "loss": 0.1826, + "step": 15914 + }, + { + "epoch": 0.02, + "learning_rate": 4.996914357160626e-05, + "loss": 0.1544, + "step": 15915 + }, + { + "epoch": 0.02, + "learning_rate": 4.996913967011351e-05, + "loss": 0.1236, + "step": 15916 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969135768374274e-05, + "loss": 0.0733, + "step": 15917 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969131866388565e-05, + "loss": 0.1476, + "step": 15918 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969127964156356e-05, + "loss": 0.2543, + "step": 15919 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969124061677674e-05, + "loss": 0.4413, + "step": 15920 + }, + { + "epoch": 0.02, + "learning_rate": 4.99691201589525e-05, + "loss": 1.0583, + "step": 15921 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969116255980844e-05, + "loss": 1.0857, + "step": 15922 + }, + { + "epoch": 0.02, + "learning_rate": 4.99691123527627e-05, + "loss": 1.2017, + "step": 15923 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969108449298074e-05, + "loss": 1.2358, + "step": 15924 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969104545586966e-05, + "loss": 1.0742, + "step": 15925 + }, + { + "epoch": 0.02, + "learning_rate": 4.996910064162937e-05, + "loss": 2.2286, + "step": 15926 + }, + { + "epoch": 0.02, + "learning_rate": 4.996909673742529e-05, + "loss": 3.0932, + "step": 15927 + }, + { + "epoch": 0.02, + "learning_rate": 4.996909283297473e-05, + "loss": 3.0961, + "step": 15928 + }, + { + "epoch": 0.02, + "learning_rate": 4.996908892827769e-05, + "loss": 3.0471, + "step": 15929 + }, + { + "epoch": 0.02, + "learning_rate": 4.996908502333415e-05, + "loss": 3.085, + "step": 15930 + }, + { + "epoch": 0.02, + "learning_rate": 4.996908111814413e-05, + "loss": 2.2995, + "step": 15931 + }, + { + "epoch": 0.02, + "learning_rate": 4.996907721270764e-05, + "loss": 1.6166, + "step": 15932 + }, + { + "epoch": 0.02, + "learning_rate": 4.996907330702465e-05, + "loss": 0.9383, + "step": 15933 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969069401095184e-05, + "loss": 1.0632, + "step": 15934 + }, + { + "epoch": 0.02, + "learning_rate": 4.996906549491923e-05, + "loss": 1.0955, + "step": 15935 + }, + { + "epoch": 0.02, + "learning_rate": 4.996906158849679e-05, + "loss": 1.2428, + "step": 15936 + }, + { + "epoch": 0.02, + "learning_rate": 4.996905768182787e-05, + "loss": 1.0717, + "step": 15937 + }, + { + "epoch": 0.02, + "learning_rate": 4.996905377491247e-05, + "loss": 1.1718, + "step": 15938 + }, + { + "epoch": 0.02, + "learning_rate": 4.996904986775057e-05, + "loss": 1.1196, + "step": 15939 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969045960342197e-05, + "loss": 1.2579, + "step": 15940 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969042052687344e-05, + "loss": 0.9889, + "step": 15941 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969038144786004e-05, + "loss": 0.9782, + "step": 15942 + }, + { + "epoch": 0.02, + "learning_rate": 4.996903423663818e-05, + "loss": 1.1278, + "step": 15943 + }, + { + "epoch": 0.02, + "learning_rate": 4.996903032824387e-05, + "loss": 0.9426, + "step": 15944 + }, + { + "epoch": 0.02, + "learning_rate": 4.996902641960308e-05, + "loss": 1.1588, + "step": 15945 + }, + { + "epoch": 0.02, + "learning_rate": 4.99690225107158e-05, + "loss": 1.3812, + "step": 15946 + }, + { + "epoch": 0.02, + "learning_rate": 4.996901860158204e-05, + "loss": 1.137, + "step": 15947 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969014692201796e-05, + "loss": 1.0239, + "step": 15948 + }, + { + "epoch": 0.02, + "learning_rate": 4.996901078257508e-05, + "loss": 0.9451, + "step": 15949 + }, + { + "epoch": 0.02, + "learning_rate": 4.996900687270186e-05, + "loss": 1.1793, + "step": 15950 + }, + { + "epoch": 0.02, + "learning_rate": 4.996900296258217e-05, + "loss": 1.1752, + "step": 15951 + }, + { + "epoch": 0.02, + "learning_rate": 4.996899905221599e-05, + "loss": 1.1459, + "step": 15952 + }, + { + "epoch": 0.02, + "learning_rate": 4.996899514160333e-05, + "loss": 0.7601, + "step": 15953 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968991230744186e-05, + "loss": 0.7365, + "step": 15954 + }, + { + "epoch": 0.02, + "learning_rate": 4.996898731963856e-05, + "loss": 1.0842, + "step": 15955 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968983408286444e-05, + "loss": 1.3112, + "step": 15956 + }, + { + "epoch": 0.02, + "learning_rate": 4.996897949668785e-05, + "loss": 1.1491, + "step": 15957 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968975584842776e-05, + "loss": 1.12, + "step": 15958 + }, + { + "epoch": 0.02, + "learning_rate": 4.996897167275121e-05, + "loss": 1.1248, + "step": 15959 + }, + { + "epoch": 0.02, + "learning_rate": 4.996896776041317e-05, + "loss": 0.9892, + "step": 15960 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968963847828644e-05, + "loss": 1.2619, + "step": 15961 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968959934997634e-05, + "loss": 0.9759, + "step": 15962 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968956021920144e-05, + "loss": 1.103, + "step": 15963 + }, + { + "epoch": 0.02, + "learning_rate": 4.996895210859617e-05, + "loss": 1.113, + "step": 15964 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968948195025704e-05, + "loss": 1.1596, + "step": 15965 + }, + { + "epoch": 0.02, + "learning_rate": 4.996894428120876e-05, + "loss": 1.0598, + "step": 15966 + }, + { + "epoch": 0.02, + "learning_rate": 4.996894036714534e-05, + "loss": 1.316, + "step": 15967 + }, + { + "epoch": 0.02, + "learning_rate": 4.996893645283543e-05, + "loss": 2.0774, + "step": 15968 + }, + { + "epoch": 0.02, + "learning_rate": 4.996893253827904e-05, + "loss": 0.9468, + "step": 15969 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968928623476163e-05, + "loss": 1.0569, + "step": 15970 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968924708426815e-05, + "loss": 1.1094, + "step": 15971 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968920793130965e-05, + "loss": 1.1638, + "step": 15972 + }, + { + "epoch": 0.02, + "learning_rate": 4.996891687758865e-05, + "loss": 1.1114, + "step": 15973 + }, + { + "epoch": 0.02, + "learning_rate": 4.996891296179985e-05, + "loss": 1.0219, + "step": 15974 + }, + { + "epoch": 0.02, + "learning_rate": 4.996890904576455e-05, + "loss": 1.0367, + "step": 15975 + }, + { + "epoch": 0.02, + "learning_rate": 4.996890512948279e-05, + "loss": 1.0037, + "step": 15976 + }, + { + "epoch": 0.02, + "learning_rate": 4.996890121295454e-05, + "loss": 1.224, + "step": 15977 + }, + { + "epoch": 0.02, + "learning_rate": 4.996889729617981e-05, + "loss": 0.7068, + "step": 15978 + }, + { + "epoch": 0.02, + "learning_rate": 4.996889337915859e-05, + "loss": 0.9655, + "step": 15979 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968889461890886e-05, + "loss": 1.5248, + "step": 15980 + }, + { + "epoch": 0.02, + "learning_rate": 4.996888554437671e-05, + "loss": 1.0857, + "step": 15981 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968881626616045e-05, + "loss": 1.252, + "step": 15982 + }, + { + "epoch": 0.02, + "learning_rate": 4.99688777086089e-05, + "loss": 0.8674, + "step": 15983 + }, + { + "epoch": 0.02, + "learning_rate": 4.996887379035527e-05, + "loss": 1.0696, + "step": 15984 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968869871855164e-05, + "loss": 1.2208, + "step": 15985 + }, + { + "epoch": 0.02, + "learning_rate": 4.996886595310857e-05, + "loss": 1.0645, + "step": 15986 + }, + { + "epoch": 0.02, + "learning_rate": 4.99688620341155e-05, + "loss": 0.9622, + "step": 15987 + }, + { + "epoch": 0.02, + "learning_rate": 4.996885811487594e-05, + "loss": 0.8759, + "step": 15988 + }, + { + "epoch": 0.02, + "learning_rate": 4.99688541953899e-05, + "loss": 1.0974, + "step": 15989 + }, + { + "epoch": 0.02, + "learning_rate": 4.996885027565738e-05, + "loss": 1.2132, + "step": 15990 + }, + { + "epoch": 0.02, + "learning_rate": 4.996884635567837e-05, + "loss": 0.9603, + "step": 15991 + }, + { + "epoch": 0.02, + "learning_rate": 4.996884243545289e-05, + "loss": 0.777, + "step": 15992 + }, + { + "epoch": 0.02, + "learning_rate": 4.996883851498092e-05, + "loss": 0.8661, + "step": 15993 + }, + { + "epoch": 0.02, + "learning_rate": 4.996883459426247e-05, + "loss": 0.9645, + "step": 15994 + }, + { + "epoch": 0.02, + "learning_rate": 4.996883067329754e-05, + "loss": 0.9234, + "step": 15995 + }, + { + "epoch": 0.02, + "learning_rate": 4.996882675208613e-05, + "loss": 1.0664, + "step": 15996 + }, + { + "epoch": 0.02, + "learning_rate": 4.996882283062824e-05, + "loss": 1.2544, + "step": 15997 + }, + { + "epoch": 0.02, + "learning_rate": 4.996881890892387e-05, + "loss": 0.7092, + "step": 15998 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968814986973e-05, + "loss": 1.0547, + "step": 15999 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968811064775664e-05, + "loss": 1.257, + "step": 16000 + }, + { + "epoch": 0.02, + "eval_loss": 1.0286349058151245, + "eval_runtime": 85.5057, + "eval_samples_per_second": 16.198, + "eval_steps_per_second": 4.058, + "step": 16000 + }, + { + "epoch": 0.02, + "learning_rate": 4.996880714233184e-05, + "loss": 1.335, + "step": 16001 + }, + { + "epoch": 0.02, + "learning_rate": 4.996880321964155e-05, + "loss": 1.232, + "step": 16002 + }, + { + "epoch": 0.02, + "learning_rate": 4.996879929670476e-05, + "loss": 0.966, + "step": 16003 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968795373521494e-05, + "loss": 1.0119, + "step": 16004 + }, + { + "epoch": 0.02, + "learning_rate": 4.996879145009174e-05, + "loss": 1.1361, + "step": 16005 + }, + { + "epoch": 0.02, + "learning_rate": 4.996878752641552e-05, + "loss": 1.0948, + "step": 16006 + }, + { + "epoch": 0.02, + "learning_rate": 4.996878360249281e-05, + "loss": 1.1098, + "step": 16007 + }, + { + "epoch": 0.02, + "learning_rate": 4.996877967832362e-05, + "loss": 1.0399, + "step": 16008 + }, + { + "epoch": 0.02, + "learning_rate": 4.996877575390795e-05, + "loss": 1.1335, + "step": 16009 + }, + { + "epoch": 0.02, + "learning_rate": 4.99687718292458e-05, + "loss": 1.0527, + "step": 16010 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968767904337164e-05, + "loss": 1.038, + "step": 16011 + }, + { + "epoch": 0.02, + "learning_rate": 4.996876397918205e-05, + "loss": 0.973, + "step": 16012 + }, + { + "epoch": 0.02, + "learning_rate": 4.996876005378045e-05, + "loss": 1.1094, + "step": 16013 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968756128132376e-05, + "loss": 0.95, + "step": 16014 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968752202237814e-05, + "loss": 1.0416, + "step": 16015 + }, + { + "epoch": 0.02, + "learning_rate": 4.996874827609678e-05, + "loss": 1.2394, + "step": 16016 + }, + { + "epoch": 0.02, + "learning_rate": 4.996874434970925e-05, + "loss": 1.0531, + "step": 16017 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968740423075255e-05, + "loss": 1.0012, + "step": 16018 + }, + { + "epoch": 0.02, + "learning_rate": 4.996873649619477e-05, + "loss": 0.9496, + "step": 16019 + }, + { + "epoch": 0.02, + "learning_rate": 4.996873256906781e-05, + "loss": 1.1404, + "step": 16020 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968728641694364e-05, + "loss": 0.7881, + "step": 16021 + }, + { + "epoch": 0.02, + "learning_rate": 4.996872471407444e-05, + "loss": 0.9775, + "step": 16022 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968720786208036e-05, + "loss": 1.135, + "step": 16023 + }, + { + "epoch": 0.02, + "learning_rate": 4.996871685809515e-05, + "loss": 0.7833, + "step": 16024 + }, + { + "epoch": 0.02, + "learning_rate": 4.996871292973578e-05, + "loss": 1.0859, + "step": 16025 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968709001129935e-05, + "loss": 1.2412, + "step": 16026 + }, + { + "epoch": 0.02, + "learning_rate": 4.996870507227761e-05, + "loss": 1.5017, + "step": 16027 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968701143178795e-05, + "loss": 0.8793, + "step": 16028 + }, + { + "epoch": 0.02, + "learning_rate": 4.996869721383351e-05, + "loss": 1.0542, + "step": 16029 + }, + { + "epoch": 0.02, + "learning_rate": 4.996869328424174e-05, + "loss": 0.9377, + "step": 16030 + }, + { + "epoch": 0.02, + "learning_rate": 4.996868935440349e-05, + "loss": 0.8212, + "step": 16031 + }, + { + "epoch": 0.02, + "learning_rate": 4.996868542431876e-05, + "loss": 0.9972, + "step": 16032 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968681493987544e-05, + "loss": 1.0806, + "step": 16033 + }, + { + "epoch": 0.02, + "learning_rate": 4.996867756340986e-05, + "loss": 1.1353, + "step": 16034 + }, + { + "epoch": 0.02, + "learning_rate": 4.996867363258569e-05, + "loss": 1.1027, + "step": 16035 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968669701515035e-05, + "loss": 0.988, + "step": 16036 + }, + { + "epoch": 0.02, + "learning_rate": 4.99686657701979e-05, + "loss": 1.2436, + "step": 16037 + }, + { + "epoch": 0.02, + "learning_rate": 4.996866183863429e-05, + "loss": 1.0596, + "step": 16038 + }, + { + "epoch": 0.02, + "learning_rate": 4.99686579068242e-05, + "loss": 1.1205, + "step": 16039 + }, + { + "epoch": 0.02, + "learning_rate": 4.996865397476763e-05, + "loss": 1.6028, + "step": 16040 + }, + { + "epoch": 0.02, + "learning_rate": 4.996865004246458e-05, + "loss": 1.0554, + "step": 16041 + }, + { + "epoch": 0.02, + "learning_rate": 4.996864610991505e-05, + "loss": 1.0465, + "step": 16042 + }, + { + "epoch": 0.02, + "learning_rate": 4.996864217711904e-05, + "loss": 1.2438, + "step": 16043 + }, + { + "epoch": 0.02, + "learning_rate": 4.996863824407654e-05, + "loss": 0.8443, + "step": 16044 + }, + { + "epoch": 0.02, + "learning_rate": 4.996863431078757e-05, + "loss": 1.7163, + "step": 16045 + }, + { + "epoch": 0.02, + "learning_rate": 4.996863037725212e-05, + "loss": 2.501, + "step": 16046 + }, + { + "epoch": 0.02, + "learning_rate": 4.996862644347019e-05, + "loss": 1.183, + "step": 16047 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968622509441774e-05, + "loss": 1.074, + "step": 16048 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968618575166885e-05, + "loss": 1.0039, + "step": 16049 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968614640645516e-05, + "loss": 1.2442, + "step": 16050 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968610705877667e-05, + "loss": 1.2344, + "step": 16051 + }, + { + "epoch": 0.02, + "learning_rate": 4.996860677086334e-05, + "loss": 1.0345, + "step": 16052 + }, + { + "epoch": 0.02, + "learning_rate": 4.996860283560253e-05, + "loss": 1.1195, + "step": 16053 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968598900095234e-05, + "loss": 1.2332, + "step": 16054 + }, + { + "epoch": 0.02, + "learning_rate": 4.996859496434147e-05, + "loss": 1.3668, + "step": 16055 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968591028341224e-05, + "loss": 1.223, + "step": 16056 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968587092094496e-05, + "loss": 0.9158, + "step": 16057 + }, + { + "epoch": 0.02, + "learning_rate": 4.996858315560129e-05, + "loss": 0.7889, + "step": 16058 + }, + { + "epoch": 0.02, + "learning_rate": 4.99685792188616e-05, + "loss": 0.831, + "step": 16059 + }, + { + "epoch": 0.02, + "learning_rate": 4.996857528187544e-05, + "loss": 0.6155, + "step": 16060 + }, + { + "epoch": 0.02, + "learning_rate": 4.996857134464279e-05, + "loss": 0.4096, + "step": 16061 + }, + { + "epoch": 0.02, + "learning_rate": 4.996856740716367e-05, + "loss": 0.7652, + "step": 16062 + }, + { + "epoch": 0.02, + "learning_rate": 4.996856346943807e-05, + "loss": 1.124, + "step": 16063 + }, + { + "epoch": 0.02, + "learning_rate": 4.996855953146599e-05, + "loss": 1.152, + "step": 16064 + }, + { + "epoch": 0.02, + "learning_rate": 4.996855559324742e-05, + "loss": 1.7036, + "step": 16065 + }, + { + "epoch": 0.02, + "learning_rate": 4.996855165478238e-05, + "loss": 1.3949, + "step": 16066 + }, + { + "epoch": 0.02, + "learning_rate": 4.996854771607087e-05, + "loss": 0.9946, + "step": 16067 + }, + { + "epoch": 0.02, + "learning_rate": 4.996854377711286e-05, + "loss": 1.1107, + "step": 16068 + }, + { + "epoch": 0.02, + "learning_rate": 4.996853983790839e-05, + "loss": 1.0923, + "step": 16069 + }, + { + "epoch": 0.02, + "learning_rate": 4.996853589845744e-05, + "loss": 0.9937, + "step": 16070 + }, + { + "epoch": 0.02, + "learning_rate": 4.996853195876e-05, + "loss": 1.1859, + "step": 16071 + }, + { + "epoch": 0.02, + "learning_rate": 4.996852801881608e-05, + "loss": 0.9965, + "step": 16072 + }, + { + "epoch": 0.02, + "learning_rate": 4.99685240786257e-05, + "loss": 0.9947, + "step": 16073 + }, + { + "epoch": 0.02, + "learning_rate": 4.996852013818883e-05, + "loss": 0.8839, + "step": 16074 + }, + { + "epoch": 0.02, + "learning_rate": 4.996851619750547e-05, + "loss": 1.1034, + "step": 16075 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968512256575645e-05, + "loss": 0.9629, + "step": 16076 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968508315399335e-05, + "loss": 0.7939, + "step": 16077 + }, + { + "epoch": 0.02, + "learning_rate": 4.996850437397656e-05, + "loss": 1.1657, + "step": 16078 + }, + { + "epoch": 0.02, + "learning_rate": 4.996850043230729e-05, + "loss": 0.9332, + "step": 16079 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968496490391544e-05, + "loss": 1.1233, + "step": 16080 + }, + { + "epoch": 0.02, + "learning_rate": 4.996849254822932e-05, + "loss": 1.1291, + "step": 16081 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968488605820625e-05, + "loss": 1.1487, + "step": 16082 + }, + { + "epoch": 0.02, + "learning_rate": 4.996848466316545e-05, + "loss": 1.0192, + "step": 16083 + }, + { + "epoch": 0.02, + "learning_rate": 4.996848072026379e-05, + "loss": 1.3341, + "step": 16084 + }, + { + "epoch": 0.02, + "learning_rate": 4.996847677711566e-05, + "loss": 1.1141, + "step": 16085 + }, + { + "epoch": 0.02, + "learning_rate": 4.996847283372104e-05, + "loss": 1.0351, + "step": 16086 + }, + { + "epoch": 0.02, + "learning_rate": 4.996846889007995e-05, + "loss": 0.921, + "step": 16087 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968464946192387e-05, + "loss": 1.1743, + "step": 16088 + }, + { + "epoch": 0.02, + "learning_rate": 4.996846100205833e-05, + "loss": 1.0212, + "step": 16089 + }, + { + "epoch": 0.02, + "learning_rate": 4.996845705767781e-05, + "loss": 1.0725, + "step": 16090 + }, + { + "epoch": 0.02, + "learning_rate": 4.99684531130508e-05, + "loss": 1.1326, + "step": 16091 + }, + { + "epoch": 0.02, + "learning_rate": 4.996844916817732e-05, + "loss": 1.0872, + "step": 16092 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968445223057355e-05, + "loss": 1.4123, + "step": 16093 + }, + { + "epoch": 0.02, + "learning_rate": 4.996844127769092e-05, + "loss": 1.2065, + "step": 16094 + }, + { + "epoch": 0.02, + "learning_rate": 4.996843733207801e-05, + "loss": 1.1871, + "step": 16095 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968433386218616e-05, + "loss": 0.8718, + "step": 16096 + }, + { + "epoch": 0.02, + "learning_rate": 4.996842944011274e-05, + "loss": 1.1697, + "step": 16097 + }, + { + "epoch": 0.02, + "learning_rate": 4.99684254937604e-05, + "loss": 1.0986, + "step": 16098 + }, + { + "epoch": 0.02, + "learning_rate": 4.996842154716157e-05, + "loss": 1.1475, + "step": 16099 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968417600316266e-05, + "loss": 1.0599, + "step": 16100 + }, + { + "epoch": 0.02, + "learning_rate": 4.996841365322448e-05, + "loss": 1.2229, + "step": 16101 + }, + { + "epoch": 0.02, + "learning_rate": 4.996840970588622e-05, + "loss": 1.251, + "step": 16102 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968405758301484e-05, + "loss": 0.683, + "step": 16103 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968401810470266e-05, + "loss": 1.1209, + "step": 16104 + }, + { + "epoch": 0.02, + "learning_rate": 4.996839786239257e-05, + "loss": 0.8694, + "step": 16105 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968393914068404e-05, + "loss": 1.3749, + "step": 16106 + }, + { + "epoch": 0.02, + "learning_rate": 4.996838996549775e-05, + "loss": 1.0793, + "step": 16107 + }, + { + "epoch": 0.02, + "learning_rate": 4.996838601668063e-05, + "loss": 1.1458, + "step": 16108 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968382067617026e-05, + "loss": 1.2545, + "step": 16109 + }, + { + "epoch": 0.02, + "learning_rate": 4.996837811830695e-05, + "loss": 0.9319, + "step": 16110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968374168750386e-05, + "loss": 0.6395, + "step": 16111 + }, + { + "epoch": 0.02, + "learning_rate": 4.996837021894735e-05, + "loss": 1.0393, + "step": 16112 + }, + { + "epoch": 0.02, + "learning_rate": 4.996836626889784e-05, + "loss": 0.9462, + "step": 16113 + }, + { + "epoch": 0.02, + "learning_rate": 4.996836231860185e-05, + "loss": 0.8878, + "step": 16114 + }, + { + "epoch": 0.02, + "learning_rate": 4.996835836805939e-05, + "loss": 1.6842, + "step": 16115 + }, + { + "epoch": 0.02, + "learning_rate": 4.996835441727045e-05, + "loss": 1.9949, + "step": 16116 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968350466235026e-05, + "loss": 0.9831, + "step": 16117 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968346514953124e-05, + "loss": 1.1943, + "step": 16118 + }, + { + "epoch": 0.02, + "learning_rate": 4.996834256342475e-05, + "loss": 0.9621, + "step": 16119 + }, + { + "epoch": 0.02, + "learning_rate": 4.99683386116499e-05, + "loss": 1.0894, + "step": 16120 + }, + { + "epoch": 0.02, + "learning_rate": 4.996833465962857e-05, + "loss": 1.1831, + "step": 16121 + }, + { + "epoch": 0.02, + "learning_rate": 4.996833070736077e-05, + "loss": 1.0437, + "step": 16122 + }, + { + "epoch": 0.02, + "learning_rate": 4.996832675484648e-05, + "loss": 0.6832, + "step": 16123 + }, + { + "epoch": 0.02, + "learning_rate": 4.996832280208572e-05, + "loss": 0.7591, + "step": 16124 + }, + { + "epoch": 0.02, + "learning_rate": 4.996831884907849e-05, + "loss": 0.977, + "step": 16125 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968314895824775e-05, + "loss": 0.7077, + "step": 16126 + }, + { + "epoch": 0.02, + "learning_rate": 4.996831094232459e-05, + "loss": 0.6646, + "step": 16127 + }, + { + "epoch": 0.02, + "learning_rate": 4.996830698857792e-05, + "loss": 0.7334, + "step": 16128 + }, + { + "epoch": 0.02, + "learning_rate": 4.996830303458478e-05, + "loss": 1.3824, + "step": 16129 + }, + { + "epoch": 0.02, + "learning_rate": 4.996829908034516e-05, + "loss": 0.8863, + "step": 16130 + }, + { + "epoch": 0.02, + "learning_rate": 4.996829512585907e-05, + "loss": 1.0294, + "step": 16131 + }, + { + "epoch": 0.02, + "learning_rate": 4.996829117112649e-05, + "loss": 1.0788, + "step": 16132 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968287216147445e-05, + "loss": 1.2313, + "step": 16133 + }, + { + "epoch": 0.02, + "learning_rate": 4.996828326092192e-05, + "loss": 0.791, + "step": 16134 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968279305449916e-05, + "loss": 1.3411, + "step": 16135 + }, + { + "epoch": 0.02, + "learning_rate": 4.996827534973144e-05, + "loss": 1.1637, + "step": 16136 + }, + { + "epoch": 0.02, + "learning_rate": 4.996827139376649e-05, + "loss": 1.1389, + "step": 16137 + }, + { + "epoch": 0.02, + "learning_rate": 4.996826743755506e-05, + "loss": 1.0491, + "step": 16138 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968263481097156e-05, + "loss": 0.9974, + "step": 16139 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968259524392767e-05, + "loss": 1.1931, + "step": 16140 + }, + { + "epoch": 0.02, + "learning_rate": 4.996825556744191e-05, + "loss": 1.14, + "step": 16141 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968251610244575e-05, + "loss": 1.0189, + "step": 16142 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968247652800766e-05, + "loss": 1.1904, + "step": 16143 + }, + { + "epoch": 0.02, + "learning_rate": 4.996824369511048e-05, + "loss": 1.0098, + "step": 16144 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968239737173716e-05, + "loss": 1.048, + "step": 16145 + }, + { + "epoch": 0.02, + "learning_rate": 4.996823577899048e-05, + "loss": 1.0062, + "step": 16146 + }, + { + "epoch": 0.02, + "learning_rate": 4.996823182056076e-05, + "loss": 1.0512, + "step": 16147 + }, + { + "epoch": 0.02, + "learning_rate": 4.996822786188457e-05, + "loss": 1.1053, + "step": 16148 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968223902961906e-05, + "loss": 1.4602, + "step": 16149 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968219943792766e-05, + "loss": 0.3209, + "step": 16150 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968215984377146e-05, + "loss": 0.2716, + "step": 16151 + }, + { + "epoch": 0.02, + "learning_rate": 4.996821202471505e-05, + "loss": 0.1903, + "step": 16152 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968208064806486e-05, + "loss": 1.2917, + "step": 16153 + }, + { + "epoch": 0.02, + "learning_rate": 4.996820410465144e-05, + "loss": 1.2429, + "step": 16154 + }, + { + "epoch": 0.02, + "learning_rate": 4.996820014424992e-05, + "loss": 1.0873, + "step": 16155 + }, + { + "epoch": 0.02, + "learning_rate": 4.996819618360192e-05, + "loss": 0.9876, + "step": 16156 + }, + { + "epoch": 0.02, + "learning_rate": 4.996819222270746e-05, + "loss": 1.0216, + "step": 16157 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968188261566506e-05, + "loss": 1.1061, + "step": 16158 + }, + { + "epoch": 0.02, + "learning_rate": 4.996818430017908e-05, + "loss": 1.0066, + "step": 16159 + }, + { + "epoch": 0.02, + "learning_rate": 4.996818033854519e-05, + "loss": 1.181, + "step": 16160 + }, + { + "epoch": 0.02, + "learning_rate": 4.996817637666481e-05, + "loss": 1.3481, + "step": 16161 + }, + { + "epoch": 0.02, + "learning_rate": 4.996817241453796e-05, + "loss": 1.9089, + "step": 16162 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968168452164645e-05, + "loss": 1.3536, + "step": 16163 + }, + { + "epoch": 0.02, + "learning_rate": 4.996816448954484e-05, + "loss": 1.3729, + "step": 16164 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968160526678565e-05, + "loss": 1.4777, + "step": 16165 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968156563565816e-05, + "loss": 1.051, + "step": 16166 + }, + { + "epoch": 0.02, + "learning_rate": 4.99681526002066e-05, + "loss": 1.4189, + "step": 16167 + }, + { + "epoch": 0.02, + "learning_rate": 4.99681486366009e-05, + "loss": 1.304, + "step": 16168 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968144672748716e-05, + "loss": 0.6471, + "step": 16169 + }, + { + "epoch": 0.02, + "learning_rate": 4.996814070865007e-05, + "loss": 1.0248, + "step": 16170 + }, + { + "epoch": 0.02, + "learning_rate": 4.996813674430495e-05, + "loss": 1.2299, + "step": 16171 + }, + { + "epoch": 0.02, + "learning_rate": 4.996813277971335e-05, + "loss": 0.8333, + "step": 16172 + }, + { + "epoch": 0.02, + "learning_rate": 4.996812881487527e-05, + "loss": 0.8823, + "step": 16173 + }, + { + "epoch": 0.02, + "learning_rate": 4.996812484979072e-05, + "loss": 1.1194, + "step": 16174 + }, + { + "epoch": 0.02, + "learning_rate": 4.99681208844597e-05, + "loss": 1.1598, + "step": 16175 + }, + { + "epoch": 0.02, + "learning_rate": 4.99681169188822e-05, + "loss": 1.0874, + "step": 16176 + }, + { + "epoch": 0.02, + "learning_rate": 4.996811295305823e-05, + "loss": 0.8971, + "step": 16177 + }, + { + "epoch": 0.02, + "learning_rate": 4.996810898698778e-05, + "loss": 1.2511, + "step": 16178 + }, + { + "epoch": 0.02, + "learning_rate": 4.996810502067086e-05, + "loss": 1.2893, + "step": 16179 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968101054107465e-05, + "loss": 0.9393, + "step": 16180 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968097087297586e-05, + "loss": 1.1149, + "step": 16181 + }, + { + "epoch": 0.02, + "learning_rate": 4.996809312024124e-05, + "loss": 1.958, + "step": 16182 + }, + { + "epoch": 0.02, + "learning_rate": 4.996808915293842e-05, + "loss": 2.5813, + "step": 16183 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968085185389125e-05, + "loss": 2.4732, + "step": 16184 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968081217593354e-05, + "loss": 2.4139, + "step": 16185 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968077249551116e-05, + "loss": 2.3772, + "step": 16186 + }, + { + "epoch": 0.02, + "learning_rate": 4.996807328126239e-05, + "loss": 1.754, + "step": 16187 + }, + { + "epoch": 0.02, + "learning_rate": 4.99680693127272e-05, + "loss": 0.9631, + "step": 16188 + }, + { + "epoch": 0.02, + "learning_rate": 4.996806534394554e-05, + "loss": 1.1956, + "step": 16189 + }, + { + "epoch": 0.02, + "learning_rate": 4.996806137491739e-05, + "loss": 1.2091, + "step": 16190 + }, + { + "epoch": 0.02, + "learning_rate": 4.996805740564277e-05, + "loss": 1.1417, + "step": 16191 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968053436121684e-05, + "loss": 1.1485, + "step": 16192 + }, + { + "epoch": 0.02, + "learning_rate": 4.996804946635412e-05, + "loss": 1.3441, + "step": 16193 + }, + { + "epoch": 0.02, + "learning_rate": 4.996804549634009e-05, + "loss": 1.2406, + "step": 16194 + }, + { + "epoch": 0.02, + "learning_rate": 4.996804152607957e-05, + "loss": 1.259, + "step": 16195 + }, + { + "epoch": 0.02, + "learning_rate": 4.996803755557259e-05, + "loss": 0.7639, + "step": 16196 + }, + { + "epoch": 0.02, + "learning_rate": 4.996803358481913e-05, + "loss": 1.5595, + "step": 16197 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968029613819196e-05, + "loss": 0.9994, + "step": 16198 + }, + { + "epoch": 0.02, + "learning_rate": 4.996802564257279e-05, + "loss": 0.9487, + "step": 16199 + }, + { + "epoch": 0.02, + "learning_rate": 4.99680216710799e-05, + "loss": 1.0912, + "step": 16200 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968017699340555e-05, + "loss": 1.1955, + "step": 16201 + }, + { + "epoch": 0.02, + "learning_rate": 4.996801372735472e-05, + "loss": 0.8499, + "step": 16202 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968009755122416e-05, + "loss": 1.0375, + "step": 16203 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968005782643644e-05, + "loss": 1.1882, + "step": 16204 + }, + { + "epoch": 0.02, + "learning_rate": 4.996800180991839e-05, + "loss": 0.7487, + "step": 16205 + }, + { + "epoch": 0.02, + "learning_rate": 4.996799783694667e-05, + "loss": 1.0927, + "step": 16206 + }, + { + "epoch": 0.02, + "learning_rate": 4.996799386372847e-05, + "loss": 1.2371, + "step": 16207 + }, + { + "epoch": 0.02, + "learning_rate": 4.99679898902638e-05, + "loss": 1.1254, + "step": 16208 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967985916552654e-05, + "loss": 1.0465, + "step": 16209 + }, + { + "epoch": 0.02, + "learning_rate": 4.996798194259504e-05, + "loss": 1.1878, + "step": 16210 + }, + { + "epoch": 0.02, + "learning_rate": 4.996797796839095e-05, + "loss": 1.2273, + "step": 16211 + }, + { + "epoch": 0.02, + "learning_rate": 4.996797399394039e-05, + "loss": 1.1758, + "step": 16212 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967970019243346e-05, + "loss": 0.9587, + "step": 16213 + }, + { + "epoch": 0.02, + "learning_rate": 4.996796604429984e-05, + "loss": 1.2403, + "step": 16214 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967962069109855e-05, + "loss": 1.0609, + "step": 16215 + }, + { + "epoch": 0.02, + "learning_rate": 4.996795809367339e-05, + "loss": 0.8989, + "step": 16216 + }, + { + "epoch": 0.02, + "learning_rate": 4.996795411799047e-05, + "loss": 0.9804, + "step": 16217 + }, + { + "epoch": 0.02, + "learning_rate": 4.996795014206106e-05, + "loss": 0.4521, + "step": 16218 + }, + { + "epoch": 0.02, + "learning_rate": 4.996794616588518e-05, + "loss": 0.3247, + "step": 16219 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967942189462833e-05, + "loss": 0.4387, + "step": 16220 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967938212794006e-05, + "loss": 0.5697, + "step": 16221 + }, + { + "epoch": 0.02, + "learning_rate": 4.996793423587872e-05, + "loss": 0.4907, + "step": 16222 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967930258716946e-05, + "loss": 0.3862, + "step": 16223 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967926281308706e-05, + "loss": 0.8819, + "step": 16224 + }, + { + "epoch": 0.02, + "learning_rate": 4.996792230365399e-05, + "loss": 1.2234, + "step": 16225 + }, + { + "epoch": 0.02, + "learning_rate": 4.996791832575281e-05, + "loss": 0.7997, + "step": 16226 + }, + { + "epoch": 0.02, + "learning_rate": 4.996791434760514e-05, + "loss": 1.0663, + "step": 16227 + }, + { + "epoch": 0.02, + "learning_rate": 4.996791036921101e-05, + "loss": 1.0899, + "step": 16228 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967906390570405e-05, + "loss": 1.0533, + "step": 16229 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967902411683334e-05, + "loss": 1.3995, + "step": 16230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967898432549776e-05, + "loss": 1.369, + "step": 16231 + }, + { + "epoch": 0.02, + "learning_rate": 4.996789445316976e-05, + "loss": 1.1766, + "step": 16232 + }, + { + "epoch": 0.02, + "learning_rate": 4.996789047354326e-05, + "loss": 0.9267, + "step": 16233 + }, + { + "epoch": 0.02, + "learning_rate": 4.996788649367029e-05, + "loss": 1.1148, + "step": 16234 + }, + { + "epoch": 0.02, + "learning_rate": 4.996788251355085e-05, + "loss": 1.1133, + "step": 16235 + }, + { + "epoch": 0.02, + "learning_rate": 4.996787853318494e-05, + "loss": 1.035, + "step": 16236 + }, + { + "epoch": 0.02, + "learning_rate": 4.996787455257255e-05, + "loss": 0.9821, + "step": 16237 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967870571713695e-05, + "loss": 1.168, + "step": 16238 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967866590608366e-05, + "loss": 1.21, + "step": 16239 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967862609256565e-05, + "loss": 0.9015, + "step": 16240 + }, + { + "epoch": 0.02, + "learning_rate": 4.996785862765829e-05, + "loss": 1.1564, + "step": 16241 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967854645813536e-05, + "loss": 0.9021, + "step": 16242 + }, + { + "epoch": 0.02, + "learning_rate": 4.996785066372232e-05, + "loss": 0.7426, + "step": 16243 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967846681384635e-05, + "loss": 0.6433, + "step": 16244 + }, + { + "epoch": 0.02, + "learning_rate": 4.996784269880047e-05, + "loss": 0.66, + "step": 16245 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967838715969835e-05, + "loss": 0.5208, + "step": 16246 + }, + { + "epoch": 0.02, + "learning_rate": 4.996783473289273e-05, + "loss": 1.1492, + "step": 16247 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967830749569143e-05, + "loss": 0.9436, + "step": 16248 + }, + { + "epoch": 0.02, + "learning_rate": 4.996782676599909e-05, + "loss": 0.9296, + "step": 16249 + }, + { + "epoch": 0.02, + "learning_rate": 4.996782278218257e-05, + "loss": 1.1591, + "step": 16250 + }, + { + "epoch": 0.02, + "learning_rate": 4.996781879811958e-05, + "loss": 1.0893, + "step": 16251 + }, + { + "epoch": 0.02, + "learning_rate": 4.996781481381011e-05, + "loss": 1.0829, + "step": 16252 + }, + { + "epoch": 0.02, + "learning_rate": 4.996781082925417e-05, + "loss": 1.1987, + "step": 16253 + }, + { + "epoch": 0.02, + "learning_rate": 4.996780684445176e-05, + "loss": 0.7229, + "step": 16254 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967802859402886e-05, + "loss": 1.1515, + "step": 16255 + }, + { + "epoch": 0.02, + "learning_rate": 4.996779887410753e-05, + "loss": 1.0667, + "step": 16256 + }, + { + "epoch": 0.02, + "learning_rate": 4.996779488856571e-05, + "loss": 1.4895, + "step": 16257 + }, + { + "epoch": 0.02, + "learning_rate": 4.996779090277741e-05, + "loss": 0.9377, + "step": 16258 + }, + { + "epoch": 0.02, + "learning_rate": 4.996778691674264e-05, + "loss": 1.115, + "step": 16259 + }, + { + "epoch": 0.02, + "learning_rate": 4.996778293046141e-05, + "loss": 1.0619, + "step": 16260 + }, + { + "epoch": 0.02, + "learning_rate": 4.996777894393369e-05, + "loss": 1.3029, + "step": 16261 + }, + { + "epoch": 0.02, + "learning_rate": 4.996777495715951e-05, + "loss": 1.0012, + "step": 16262 + }, + { + "epoch": 0.02, + "learning_rate": 4.996777097013886e-05, + "loss": 1.0984, + "step": 16263 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967766982871736e-05, + "loss": 1.1476, + "step": 16264 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967762995358136e-05, + "loss": 1.2302, + "step": 16265 + }, + { + "epoch": 0.02, + "learning_rate": 4.996775900759807e-05, + "loss": 1.5285, + "step": 16266 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967755019591536e-05, + "loss": 1.3113, + "step": 16267 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967751031338524e-05, + "loss": 1.1252, + "step": 16268 + }, + { + "epoch": 0.02, + "learning_rate": 4.996774704283904e-05, + "loss": 1.0883, + "step": 16269 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967743054093086e-05, + "loss": 1.1315, + "step": 16270 + }, + { + "epoch": 0.02, + "learning_rate": 4.996773906510067e-05, + "loss": 0.8545, + "step": 16271 + }, + { + "epoch": 0.02, + "learning_rate": 4.996773507586177e-05, + "loss": 1.2811, + "step": 16272 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967731086376406e-05, + "loss": 1.0656, + "step": 16273 + }, + { + "epoch": 0.02, + "learning_rate": 4.996772709664457e-05, + "loss": 0.9933, + "step": 16274 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967723106666265e-05, + "loss": 1.0854, + "step": 16275 + }, + { + "epoch": 0.02, + "learning_rate": 4.996771911644149e-05, + "loss": 1.7743, + "step": 16276 + }, + { + "epoch": 0.02, + "learning_rate": 4.996771512597024e-05, + "loss": 1.2574, + "step": 16277 + }, + { + "epoch": 0.02, + "learning_rate": 4.996771113525252e-05, + "loss": 1.0404, + "step": 16278 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967707144288334e-05, + "loss": 1.3512, + "step": 16279 + }, + { + "epoch": 0.02, + "learning_rate": 4.996770315307767e-05, + "loss": 0.8853, + "step": 16280 + }, + { + "epoch": 0.02, + "learning_rate": 4.996769916162054e-05, + "loss": 0.3981, + "step": 16281 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967695169916936e-05, + "loss": 0.5679, + "step": 16282 + }, + { + "epoch": 0.02, + "learning_rate": 4.996769117796687e-05, + "loss": 1.0688, + "step": 16283 + }, + { + "epoch": 0.02, + "learning_rate": 4.996768718577032e-05, + "loss": 1.0377, + "step": 16284 + }, + { + "epoch": 0.02, + "learning_rate": 4.996768319332731e-05, + "loss": 1.1405, + "step": 16285 + }, + { + "epoch": 0.02, + "learning_rate": 4.996767920063783e-05, + "loss": 1.1835, + "step": 16286 + }, + { + "epoch": 0.02, + "learning_rate": 4.996767520770187e-05, + "loss": 1.2494, + "step": 16287 + }, + { + "epoch": 0.02, + "learning_rate": 4.996767121451945e-05, + "loss": 0.864, + "step": 16288 + }, + { + "epoch": 0.02, + "learning_rate": 4.996766722109055e-05, + "loss": 1.0897, + "step": 16289 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967663227415185e-05, + "loss": 1.0804, + "step": 16290 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967659233493353e-05, + "loss": 1.0939, + "step": 16291 + }, + { + "epoch": 0.02, + "learning_rate": 4.996765523932505e-05, + "loss": 0.8912, + "step": 16292 + }, + { + "epoch": 0.02, + "learning_rate": 4.996765124491027e-05, + "loss": 1.1055, + "step": 16293 + }, + { + "epoch": 0.02, + "learning_rate": 4.996764725024903e-05, + "loss": 1.3556, + "step": 16294 + }, + { + "epoch": 0.02, + "learning_rate": 4.996764325534131e-05, + "loss": 1.1317, + "step": 16295 + }, + { + "epoch": 0.02, + "learning_rate": 4.996763926018713e-05, + "loss": 1.0213, + "step": 16296 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967635264786465e-05, + "loss": 1.0258, + "step": 16297 + }, + { + "epoch": 0.02, + "learning_rate": 4.996763126913934e-05, + "loss": 0.9683, + "step": 16298 + }, + { + "epoch": 0.02, + "learning_rate": 4.996762727324574e-05, + "loss": 1.2127, + "step": 16299 + }, + { + "epoch": 0.02, + "learning_rate": 4.996762327710568e-05, + "loss": 1.1685, + "step": 16300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967619280719144e-05, + "loss": 1.1319, + "step": 16301 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967615284086136e-05, + "loss": 1.0097, + "step": 16302 + }, + { + "epoch": 0.02, + "learning_rate": 4.996761128720666e-05, + "loss": 1.0898, + "step": 16303 + }, + { + "epoch": 0.02, + "learning_rate": 4.996760729008072e-05, + "loss": 1.5117, + "step": 16304 + }, + { + "epoch": 0.02, + "learning_rate": 4.99676032927083e-05, + "loss": 1.26, + "step": 16305 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967599295089416e-05, + "loss": 1.4611, + "step": 16306 + }, + { + "epoch": 0.02, + "learning_rate": 4.996759529722407e-05, + "loss": 1.0977, + "step": 16307 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967591299112244e-05, + "loss": 0.9306, + "step": 16308 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967587300753946e-05, + "loss": 0.998, + "step": 16309 + }, + { + "epoch": 0.02, + "learning_rate": 4.996758330214919e-05, + "loss": 1.0757, + "step": 16310 + }, + { + "epoch": 0.02, + "learning_rate": 4.996757930329795e-05, + "loss": 1.0549, + "step": 16311 + }, + { + "epoch": 0.02, + "learning_rate": 4.996757530420025e-05, + "loss": 1.2741, + "step": 16312 + }, + { + "epoch": 0.02, + "learning_rate": 4.996757130485609e-05, + "loss": 1.2743, + "step": 16313 + }, + { + "epoch": 0.02, + "learning_rate": 4.996756730526544e-05, + "loss": 1.3359, + "step": 16314 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967563305428335e-05, + "loss": 0.9481, + "step": 16315 + }, + { + "epoch": 0.02, + "learning_rate": 4.996755930534476e-05, + "loss": 1.0658, + "step": 16316 + }, + { + "epoch": 0.02, + "learning_rate": 4.996755530501471e-05, + "loss": 0.5864, + "step": 16317 + }, + { + "epoch": 0.02, + "learning_rate": 4.996755130443819e-05, + "loss": 0.5213, + "step": 16318 + }, + { + "epoch": 0.02, + "learning_rate": 4.99675473036152e-05, + "loss": 0.3932, + "step": 16319 + }, + { + "epoch": 0.02, + "learning_rate": 4.996754330254575e-05, + "loss": 0.4016, + "step": 16320 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967539301229825e-05, + "loss": 0.4387, + "step": 16321 + }, + { + "epoch": 0.02, + "learning_rate": 4.996753529966743e-05, + "loss": 0.3618, + "step": 16322 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967531297858564e-05, + "loss": 0.3892, + "step": 16323 + }, + { + "epoch": 0.02, + "learning_rate": 4.996752729580324e-05, + "loss": 0.4412, + "step": 16324 + }, + { + "epoch": 0.02, + "learning_rate": 4.996752329350144e-05, + "loss": 0.3351, + "step": 16325 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967519290953165e-05, + "loss": 0.7273, + "step": 16326 + }, + { + "epoch": 0.02, + "learning_rate": 4.996751528815843e-05, + "loss": 0.9889, + "step": 16327 + }, + { + "epoch": 0.02, + "learning_rate": 4.996751128511722e-05, + "loss": 1.0277, + "step": 16328 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967507281829544e-05, + "loss": 0.964, + "step": 16329 + }, + { + "epoch": 0.02, + "learning_rate": 4.99675032782954e-05, + "loss": 1.108, + "step": 16330 + }, + { + "epoch": 0.02, + "learning_rate": 4.996749927451479e-05, + "loss": 1.089, + "step": 16331 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967495270487705e-05, + "loss": 1.061, + "step": 16332 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967491266214155e-05, + "loss": 1.014, + "step": 16333 + }, + { + "epoch": 0.02, + "learning_rate": 4.996748726169413e-05, + "loss": 1.1837, + "step": 16334 + }, + { + "epoch": 0.02, + "learning_rate": 4.996748325692765e-05, + "loss": 1.0725, + "step": 16335 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967479251914695e-05, + "loss": 1.2409, + "step": 16336 + }, + { + "epoch": 0.02, + "learning_rate": 4.996747524665527e-05, + "loss": 1.2574, + "step": 16337 + }, + { + "epoch": 0.02, + "learning_rate": 4.996747124114938e-05, + "loss": 1.4221, + "step": 16338 + }, + { + "epoch": 0.02, + "learning_rate": 4.996746723539702e-05, + "loss": 1.1872, + "step": 16339 + }, + { + "epoch": 0.02, + "learning_rate": 4.996746322939819e-05, + "loss": 0.8682, + "step": 16340 + }, + { + "epoch": 0.02, + "learning_rate": 4.996745922315289e-05, + "loss": 1.2092, + "step": 16341 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967455216661124e-05, + "loss": 1.0144, + "step": 16342 + }, + { + "epoch": 0.02, + "learning_rate": 4.996745120992289e-05, + "loss": 0.8675, + "step": 16343 + }, + { + "epoch": 0.02, + "learning_rate": 4.996744720293819e-05, + "loss": 0.9412, + "step": 16344 + }, + { + "epoch": 0.02, + "learning_rate": 4.996744319570702e-05, + "loss": 1.0969, + "step": 16345 + }, + { + "epoch": 0.02, + "learning_rate": 4.996743918822938e-05, + "loss": 1.1246, + "step": 16346 + }, + { + "epoch": 0.02, + "learning_rate": 4.996743518050527e-05, + "loss": 0.9383, + "step": 16347 + }, + { + "epoch": 0.02, + "learning_rate": 4.99674311725347e-05, + "loss": 1.54, + "step": 16348 + }, + { + "epoch": 0.02, + "learning_rate": 4.996742716431766e-05, + "loss": 1.1927, + "step": 16349 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967423155854145e-05, + "loss": 0.9948, + "step": 16350 + }, + { + "epoch": 0.02, + "learning_rate": 4.996741914714417e-05, + "loss": 1.1343, + "step": 16351 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967415138187726e-05, + "loss": 1.1142, + "step": 16352 + }, + { + "epoch": 0.02, + "learning_rate": 4.996741112898481e-05, + "loss": 1.0497, + "step": 16353 + }, + { + "epoch": 0.02, + "learning_rate": 4.996740711953543e-05, + "loss": 1.0757, + "step": 16354 + }, + { + "epoch": 0.02, + "learning_rate": 4.996740310983958e-05, + "loss": 1.3099, + "step": 16355 + }, + { + "epoch": 0.02, + "learning_rate": 4.996739909989726e-05, + "loss": 1.2023, + "step": 16356 + }, + { + "epoch": 0.02, + "learning_rate": 4.996739508970848e-05, + "loss": 0.9294, + "step": 16357 + }, + { + "epoch": 0.02, + "learning_rate": 4.996739107927323e-05, + "loss": 0.7495, + "step": 16358 + }, + { + "epoch": 0.02, + "learning_rate": 4.996738706859151e-05, + "loss": 0.7693, + "step": 16359 + }, + { + "epoch": 0.02, + "learning_rate": 4.996738305766332e-05, + "loss": 1.0183, + "step": 16360 + }, + { + "epoch": 0.02, + "learning_rate": 4.996737904648866e-05, + "loss": 0.9784, + "step": 16361 + }, + { + "epoch": 0.02, + "learning_rate": 4.996737503506754e-05, + "loss": 1.1849, + "step": 16362 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967371023399955e-05, + "loss": 1.1728, + "step": 16363 + }, + { + "epoch": 0.02, + "learning_rate": 4.99673670114859e-05, + "loss": 1.0427, + "step": 16364 + }, + { + "epoch": 0.02, + "learning_rate": 4.996736299932537e-05, + "loss": 0.9394, + "step": 16365 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967358986918376e-05, + "loss": 1.0391, + "step": 16366 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967354974264926e-05, + "loss": 1.1171, + "step": 16367 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967350961364996e-05, + "loss": 0.9364, + "step": 16368 + }, + { + "epoch": 0.02, + "learning_rate": 4.99673469482186e-05, + "loss": 1.5185, + "step": 16369 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967342934825744e-05, + "loss": 1.0152, + "step": 16370 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967338921186415e-05, + "loss": 0.9826, + "step": 16371 + }, + { + "epoch": 0.02, + "learning_rate": 4.996733490730062e-05, + "loss": 0.9392, + "step": 16372 + }, + { + "epoch": 0.02, + "learning_rate": 4.996733089316836e-05, + "loss": 0.7348, + "step": 16373 + }, + { + "epoch": 0.02, + "learning_rate": 4.996732687878963e-05, + "loss": 1.1406, + "step": 16374 + }, + { + "epoch": 0.02, + "learning_rate": 4.996732286416443e-05, + "loss": 0.9823, + "step": 16375 + }, + { + "epoch": 0.02, + "learning_rate": 4.996731884929277e-05, + "loss": 1.1228, + "step": 16376 + }, + { + "epoch": 0.02, + "learning_rate": 4.996731483417464e-05, + "loss": 1.0975, + "step": 16377 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967310818810046e-05, + "loss": 1.0636, + "step": 16378 + }, + { + "epoch": 0.02, + "learning_rate": 4.996730680319899e-05, + "loss": 1.0302, + "step": 16379 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967302787341455e-05, + "loss": 0.9555, + "step": 16380 + }, + { + "epoch": 0.02, + "learning_rate": 4.996729877123746e-05, + "loss": 1.1268, + "step": 16381 + }, + { + "epoch": 0.02, + "learning_rate": 4.996729475488699e-05, + "loss": 0.9097, + "step": 16382 + }, + { + "epoch": 0.02, + "learning_rate": 4.996729073829007e-05, + "loss": 1.0198, + "step": 16383 + }, + { + "epoch": 0.02, + "learning_rate": 4.996728672144667e-05, + "loss": 1.0253, + "step": 16384 + }, + { + "epoch": 0.02, + "learning_rate": 4.996728270435681e-05, + "loss": 0.9535, + "step": 16385 + }, + { + "epoch": 0.02, + "learning_rate": 4.996727868702048e-05, + "loss": 1.2251, + "step": 16386 + }, + { + "epoch": 0.02, + "learning_rate": 4.996727466943768e-05, + "loss": 1.1155, + "step": 16387 + }, + { + "epoch": 0.02, + "learning_rate": 4.996727065160842e-05, + "loss": 1.1432, + "step": 16388 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967266633532694e-05, + "loss": 1.1097, + "step": 16389 + }, + { + "epoch": 0.02, + "learning_rate": 4.99672626152105e-05, + "loss": 1.2736, + "step": 16390 + }, + { + "epoch": 0.02, + "learning_rate": 4.996725859664184e-05, + "loss": 1.1754, + "step": 16391 + }, + { + "epoch": 0.02, + "learning_rate": 4.996725457782671e-05, + "loss": 1.1784, + "step": 16392 + }, + { + "epoch": 0.02, + "learning_rate": 4.996725055876512e-05, + "loss": 0.899, + "step": 16393 + }, + { + "epoch": 0.02, + "learning_rate": 4.996724653945706e-05, + "loss": 0.9968, + "step": 16394 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967242519902534e-05, + "loss": 1.4226, + "step": 16395 + }, + { + "epoch": 0.02, + "learning_rate": 4.996723850010154e-05, + "loss": 1.2693, + "step": 16396 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967234480054084e-05, + "loss": 1.063, + "step": 16397 + }, + { + "epoch": 0.02, + "learning_rate": 4.996723045976016e-05, + "loss": 0.9308, + "step": 16398 + }, + { + "epoch": 0.02, + "learning_rate": 4.996722643921977e-05, + "loss": 0.7395, + "step": 16399 + }, + { + "epoch": 0.02, + "learning_rate": 4.996722241843291e-05, + "loss": 0.8999, + "step": 16400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967218397399596e-05, + "loss": 1.2738, + "step": 16401 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967214376119806e-05, + "loss": 1.0037, + "step": 16402 + }, + { + "epoch": 0.02, + "learning_rate": 4.996721035459355e-05, + "loss": 1.0335, + "step": 16403 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967206332820836e-05, + "loss": 1.4803, + "step": 16404 + }, + { + "epoch": 0.02, + "learning_rate": 4.996720231080165e-05, + "loss": 1.2902, + "step": 16405 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967198288536e-05, + "loss": 1.1419, + "step": 16406 + }, + { + "epoch": 0.02, + "learning_rate": 4.996719426602388e-05, + "loss": 1.2684, + "step": 16407 + }, + { + "epoch": 0.02, + "learning_rate": 4.99671902432653e-05, + "loss": 1.1004, + "step": 16408 + }, + { + "epoch": 0.02, + "learning_rate": 4.996718622026025e-05, + "loss": 1.6123, + "step": 16409 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967182197008746e-05, + "loss": 1.0305, + "step": 16410 + }, + { + "epoch": 0.02, + "learning_rate": 4.996717817351077e-05, + "loss": 1.3689, + "step": 16411 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967174149766315e-05, + "loss": 1.0398, + "step": 16412 + }, + { + "epoch": 0.02, + "learning_rate": 4.996717012577541e-05, + "loss": 1.0676, + "step": 16413 + }, + { + "epoch": 0.02, + "learning_rate": 4.996716610153803e-05, + "loss": 1.1456, + "step": 16414 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967162077054195e-05, + "loss": 1.1777, + "step": 16415 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967158052323885e-05, + "loss": 0.9724, + "step": 16416 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967154027347115e-05, + "loss": 0.9042, + "step": 16417 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967150002123886e-05, + "loss": 0.8798, + "step": 16418 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967145976654183e-05, + "loss": 1.0784, + "step": 16419 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967141950938015e-05, + "loss": 1.6644, + "step": 16420 + }, + { + "epoch": 0.02, + "learning_rate": 4.996713792497538e-05, + "loss": 1.7673, + "step": 16421 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967133898766286e-05, + "loss": 1.38, + "step": 16422 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967129872310725e-05, + "loss": 1.1686, + "step": 16423 + }, + { + "epoch": 0.02, + "learning_rate": 4.99671258456087e-05, + "loss": 1.0706, + "step": 16424 + }, + { + "epoch": 0.02, + "learning_rate": 4.996712181866021e-05, + "loss": 1.1724, + "step": 16425 + }, + { + "epoch": 0.02, + "learning_rate": 4.996711779146525e-05, + "loss": 0.9545, + "step": 16426 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967113764023834e-05, + "loss": 1.1589, + "step": 16427 + }, + { + "epoch": 0.02, + "learning_rate": 4.996710973633595e-05, + "loss": 0.7277, + "step": 16428 + }, + { + "epoch": 0.02, + "learning_rate": 4.99671057084016e-05, + "loss": 0.7543, + "step": 16429 + }, + { + "epoch": 0.02, + "learning_rate": 4.996710168022078e-05, + "loss": 0.7892, + "step": 16430 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967097651793496e-05, + "loss": 1.1139, + "step": 16431 + }, + { + "epoch": 0.02, + "learning_rate": 4.996709362311975e-05, + "loss": 1.0567, + "step": 16432 + }, + { + "epoch": 0.02, + "learning_rate": 4.996708959419954e-05, + "loss": 1.16, + "step": 16433 + }, + { + "epoch": 0.02, + "learning_rate": 4.996708556503287e-05, + "loss": 1.2519, + "step": 16434 + }, + { + "epoch": 0.02, + "learning_rate": 4.996708153561973e-05, + "loss": 0.7287, + "step": 16435 + }, + { + "epoch": 0.02, + "learning_rate": 4.996707750596013e-05, + "loss": 0.4836, + "step": 16436 + }, + { + "epoch": 0.02, + "learning_rate": 4.996707347605406e-05, + "loss": 0.4589, + "step": 16437 + }, + { + "epoch": 0.02, + "learning_rate": 4.996706944590153e-05, + "loss": 0.6768, + "step": 16438 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967065415502535e-05, + "loss": 1.0248, + "step": 16439 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967061384857075e-05, + "loss": 1.2461, + "step": 16440 + }, + { + "epoch": 0.02, + "learning_rate": 4.996705735396515e-05, + "loss": 1.1995, + "step": 16441 + }, + { + "epoch": 0.02, + "learning_rate": 4.996705332282676e-05, + "loss": 1.167, + "step": 16442 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967049291441904e-05, + "loss": 1.1454, + "step": 16443 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967045259810586e-05, + "loss": 1.2731, + "step": 16444 + }, + { + "epoch": 0.02, + "learning_rate": 4.996704122793281e-05, + "loss": 1.1783, + "step": 16445 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967037195808565e-05, + "loss": 1.2154, + "step": 16446 + }, + { + "epoch": 0.02, + "learning_rate": 4.996703316343785e-05, + "loss": 2.1484, + "step": 16447 + }, + { + "epoch": 0.02, + "learning_rate": 4.996702913082068e-05, + "loss": 1.5237, + "step": 16448 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967025097957036e-05, + "loss": 1.147, + "step": 16449 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967021064846934e-05, + "loss": 0.8201, + "step": 16450 + }, + { + "epoch": 0.02, + "learning_rate": 4.996701703149037e-05, + "loss": 0.9415, + "step": 16451 + }, + { + "epoch": 0.02, + "learning_rate": 4.9967012997887344e-05, + "loss": 1.1502, + "step": 16452 + }, + { + "epoch": 0.02, + "learning_rate": 4.996700896403784e-05, + "loss": 1.1597, + "step": 16453 + }, + { + "epoch": 0.02, + "learning_rate": 4.996700492994188e-05, + "loss": 1.2252, + "step": 16454 + }, + { + "epoch": 0.02, + "learning_rate": 4.996700089559947e-05, + "loss": 1.3019, + "step": 16455 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966996861010584e-05, + "loss": 1.1005, + "step": 16456 + }, + { + "epoch": 0.02, + "learning_rate": 4.996699282617524e-05, + "loss": 1.3154, + "step": 16457 + }, + { + "epoch": 0.02, + "learning_rate": 4.996698879109342e-05, + "loss": 0.9608, + "step": 16458 + }, + { + "epoch": 0.02, + "learning_rate": 4.996698475576515e-05, + "loss": 1.5192, + "step": 16459 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966980720190404e-05, + "loss": 1.0495, + "step": 16460 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966976684369206e-05, + "loss": 0.9472, + "step": 16461 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966972648301536e-05, + "loss": 1.4851, + "step": 16462 + }, + { + "epoch": 0.02, + "learning_rate": 4.996696861198741e-05, + "loss": 1.2061, + "step": 16463 + }, + { + "epoch": 0.02, + "learning_rate": 4.996696457542682e-05, + "loss": 0.9333, + "step": 16464 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966960538619756e-05, + "loss": 1.0153, + "step": 16465 + }, + { + "epoch": 0.02, + "learning_rate": 4.996695650156624e-05, + "loss": 0.9272, + "step": 16466 + }, + { + "epoch": 0.02, + "learning_rate": 4.996695246426626e-05, + "loss": 1.1944, + "step": 16467 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966948426719805e-05, + "loss": 1.1198, + "step": 16468 + }, + { + "epoch": 0.02, + "learning_rate": 4.99669443889269e-05, + "loss": 1.1995, + "step": 16469 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966940350887526e-05, + "loss": 1.0167, + "step": 16470 + }, + { + "epoch": 0.02, + "learning_rate": 4.996693631260169e-05, + "loss": 0.9834, + "step": 16471 + }, + { + "epoch": 0.02, + "learning_rate": 4.996693227406939e-05, + "loss": 1.259, + "step": 16472 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966928235290636e-05, + "loss": 1.9653, + "step": 16473 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966924196265404e-05, + "loss": 1.8612, + "step": 16474 + }, + { + "epoch": 0.02, + "learning_rate": 4.996692015699372e-05, + "loss": 1.0861, + "step": 16475 + }, + { + "epoch": 0.02, + "learning_rate": 4.996691611747557e-05, + "loss": 1.076, + "step": 16476 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966912077710953e-05, + "loss": 1.2206, + "step": 16477 + }, + { + "epoch": 0.02, + "learning_rate": 4.996690803769988e-05, + "loss": 0.8671, + "step": 16478 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966903997442335e-05, + "loss": 0.8373, + "step": 16479 + }, + { + "epoch": 0.02, + "learning_rate": 4.996689995693834e-05, + "loss": 0.9271, + "step": 16480 + }, + { + "epoch": 0.02, + "learning_rate": 4.996689591618787e-05, + "loss": 1.0112, + "step": 16481 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966891875190944e-05, + "loss": 1.0359, + "step": 16482 + }, + { + "epoch": 0.02, + "learning_rate": 4.996688783394755e-05, + "loss": 1.1535, + "step": 16483 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966883792457704e-05, + "loss": 1.0136, + "step": 16484 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966879750721385e-05, + "loss": 1.2872, + "step": 16485 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966875708738606e-05, + "loss": 1.115, + "step": 16486 + }, + { + "epoch": 0.02, + "learning_rate": 4.996687166650937e-05, + "loss": 1.1417, + "step": 16487 + }, + { + "epoch": 0.02, + "learning_rate": 4.996686762403366e-05, + "loss": 1.2457, + "step": 16488 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966863581311505e-05, + "loss": 1.0294, + "step": 16489 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966859538342874e-05, + "loss": 0.5416, + "step": 16490 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966855495127784e-05, + "loss": 0.5301, + "step": 16491 + }, + { + "epoch": 0.02, + "learning_rate": 4.996685145166623e-05, + "loss": 0.5015, + "step": 16492 + }, + { + "epoch": 0.02, + "learning_rate": 4.996684740795822e-05, + "loss": 0.4428, + "step": 16493 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966843364003744e-05, + "loss": 0.4028, + "step": 16494 + }, + { + "epoch": 0.02, + "learning_rate": 4.99668393198028e-05, + "loss": 0.4598, + "step": 16495 + }, + { + "epoch": 0.02, + "learning_rate": 4.996683527535541e-05, + "loss": 0.3943, + "step": 16496 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966831230661534e-05, + "loss": 0.395, + "step": 16497 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966827185721214e-05, + "loss": 0.4142, + "step": 16498 + }, + { + "epoch": 0.02, + "learning_rate": 4.996682314053442e-05, + "loss": 0.7986, + "step": 16499 + }, + { + "epoch": 0.02, + "learning_rate": 4.996681909510118e-05, + "loss": 1.1246, + "step": 16500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0717031955718994, + "eval_runtime": 83.8078, + "eval_samples_per_second": 16.526, + "eval_steps_per_second": 4.14, + "step": 16500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966815049421464e-05, + "loss": 0.852, + "step": 16501 + }, + { + "epoch": 0.02, + "learning_rate": 4.996681100349529e-05, + "loss": 0.6896, + "step": 16502 + }, + { + "epoch": 0.02, + "learning_rate": 4.996680695732266e-05, + "loss": 0.9153, + "step": 16503 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966802910903564e-05, + "loss": 1.1552, + "step": 16504 + }, + { + "epoch": 0.02, + "learning_rate": 4.996679886423801e-05, + "loss": 1.3038, + "step": 16505 + }, + { + "epoch": 0.02, + "learning_rate": 4.996679481732599e-05, + "loss": 1.1098, + "step": 16506 + }, + { + "epoch": 0.02, + "learning_rate": 4.99667907701675e-05, + "loss": 1.4375, + "step": 16507 + }, + { + "epoch": 0.02, + "learning_rate": 4.996678672276257e-05, + "loss": 1.2337, + "step": 16508 + }, + { + "epoch": 0.02, + "learning_rate": 4.996678267511116e-05, + "loss": 1.1692, + "step": 16509 + }, + { + "epoch": 0.02, + "learning_rate": 4.99667786272133e-05, + "loss": 0.9022, + "step": 16510 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966774579068965e-05, + "loss": 1.5421, + "step": 16511 + }, + { + "epoch": 0.02, + "learning_rate": 4.996677053067818e-05, + "loss": 2.2308, + "step": 16512 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966766482040925e-05, + "loss": 0.8242, + "step": 16513 + }, + { + "epoch": 0.02, + "learning_rate": 4.996676243315721e-05, + "loss": 0.8223, + "step": 16514 + }, + { + "epoch": 0.02, + "learning_rate": 4.996675838402704e-05, + "loss": 0.7621, + "step": 16515 + }, + { + "epoch": 0.02, + "learning_rate": 4.99667543346504e-05, + "loss": 0.7414, + "step": 16516 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966750285027306e-05, + "loss": 0.8153, + "step": 16517 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966746235157755e-05, + "loss": 1.3378, + "step": 16518 + }, + { + "epoch": 0.02, + "learning_rate": 4.996674218504174e-05, + "loss": 1.1553, + "step": 16519 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966738134679256e-05, + "loss": 1.1391, + "step": 16520 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966734084070314e-05, + "loss": 0.887, + "step": 16521 + }, + { + "epoch": 0.02, + "learning_rate": 4.996673003321491e-05, + "loss": 1.9101, + "step": 16522 + }, + { + "epoch": 0.02, + "learning_rate": 4.996672598211305e-05, + "loss": 1.0085, + "step": 16523 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966721930764724e-05, + "loss": 1.2954, + "step": 16524 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966717879169943e-05, + "loss": 1.31, + "step": 16525 + }, + { + "epoch": 0.02, + "learning_rate": 4.996671382732869e-05, + "loss": 1.0428, + "step": 16526 + }, + { + "epoch": 0.02, + "learning_rate": 4.996670977524099e-05, + "loss": 1.2657, + "step": 16527 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966705722906826e-05, + "loss": 1.2665, + "step": 16528 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966701670326194e-05, + "loss": 1.1931, + "step": 16529 + }, + { + "epoch": 0.02, + "learning_rate": 4.99666976174991e-05, + "loss": 1.201, + "step": 16530 + }, + { + "epoch": 0.02, + "learning_rate": 4.996669356442556e-05, + "loss": 1.0277, + "step": 16531 + }, + { + "epoch": 0.02, + "learning_rate": 4.996668951110555e-05, + "loss": 0.9173, + "step": 16532 + }, + { + "epoch": 0.02, + "learning_rate": 4.996668545753907e-05, + "loss": 1.1702, + "step": 16533 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966681403726144e-05, + "loss": 1.2528, + "step": 16534 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966677349666755e-05, + "loss": 1.4963, + "step": 16535 + }, + { + "epoch": 0.02, + "learning_rate": 4.99666732953609e-05, + "loss": 1.2674, + "step": 16536 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966669240808586e-05, + "loss": 0.9046, + "step": 16537 + }, + { + "epoch": 0.02, + "learning_rate": 4.996666518600981e-05, + "loss": 0.8433, + "step": 16538 + }, + { + "epoch": 0.02, + "learning_rate": 4.996666113096458e-05, + "loss": 0.9715, + "step": 16539 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966657075672886e-05, + "loss": 1.0459, + "step": 16540 + }, + { + "epoch": 0.02, + "learning_rate": 4.996665302013473e-05, + "loss": 1.1493, + "step": 16541 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966648964350116e-05, + "loss": 1.2271, + "step": 16542 + }, + { + "epoch": 0.02, + "learning_rate": 4.996664490831904e-05, + "loss": 1.3862, + "step": 16543 + }, + { + "epoch": 0.02, + "learning_rate": 4.99666408520415e-05, + "loss": 1.4713, + "step": 16544 + }, + { + "epoch": 0.02, + "learning_rate": 4.996663679551751e-05, + "loss": 1.3913, + "step": 16545 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966632738747054e-05, + "loss": 1.3553, + "step": 16546 + }, + { + "epoch": 0.02, + "learning_rate": 4.996662868173014e-05, + "loss": 1.0174, + "step": 16547 + }, + { + "epoch": 0.02, + "learning_rate": 4.996662462446676e-05, + "loss": 1.1201, + "step": 16548 + }, + { + "epoch": 0.02, + "learning_rate": 4.996662056695692e-05, + "loss": 1.2895, + "step": 16549 + }, + { + "epoch": 0.02, + "learning_rate": 4.996661650920063e-05, + "loss": 1.1312, + "step": 16550 + }, + { + "epoch": 0.02, + "learning_rate": 4.996661245119787e-05, + "loss": 1.0577, + "step": 16551 + }, + { + "epoch": 0.02, + "learning_rate": 4.996660839294866e-05, + "loss": 1.1619, + "step": 16552 + }, + { + "epoch": 0.02, + "learning_rate": 4.996660433445298e-05, + "loss": 1.2603, + "step": 16553 + }, + { + "epoch": 0.02, + "learning_rate": 4.996660027571085e-05, + "loss": 1.1966, + "step": 16554 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966596216722256e-05, + "loss": 0.9881, + "step": 16555 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966592157487205e-05, + "loss": 1.0043, + "step": 16556 + }, + { + "epoch": 0.02, + "learning_rate": 4.996658809800569e-05, + "loss": 0.9814, + "step": 16557 + }, + { + "epoch": 0.02, + "learning_rate": 4.996658403827771e-05, + "loss": 0.8964, + "step": 16558 + }, + { + "epoch": 0.02, + "learning_rate": 4.996657997830328e-05, + "loss": 1.0718, + "step": 16559 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966575918082385e-05, + "loss": 1.0221, + "step": 16560 + }, + { + "epoch": 0.02, + "learning_rate": 4.996657185761503e-05, + "loss": 1.3086, + "step": 16561 + }, + { + "epoch": 0.02, + "learning_rate": 4.996656779690122e-05, + "loss": 1.2142, + "step": 16562 + }, + { + "epoch": 0.02, + "learning_rate": 4.996656373594095e-05, + "loss": 1.3019, + "step": 16563 + }, + { + "epoch": 0.02, + "learning_rate": 4.996655967473422e-05, + "loss": 1.0613, + "step": 16564 + }, + { + "epoch": 0.02, + "learning_rate": 4.996655561328103e-05, + "loss": 1.2345, + "step": 16565 + }, + { + "epoch": 0.02, + "learning_rate": 4.996655155158138e-05, + "loss": 0.9696, + "step": 16566 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966547489635275e-05, + "loss": 0.7926, + "step": 16567 + }, + { + "epoch": 0.02, + "learning_rate": 4.99665434274427e-05, + "loss": 1.2845, + "step": 16568 + }, + { + "epoch": 0.02, + "learning_rate": 4.996653936500367e-05, + "loss": 0.9566, + "step": 16569 + }, + { + "epoch": 0.02, + "learning_rate": 4.996653530231819e-05, + "loss": 0.9945, + "step": 16570 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966531239386244e-05, + "loss": 0.7139, + "step": 16571 + }, + { + "epoch": 0.02, + "learning_rate": 4.996652717620784e-05, + "loss": 0.9812, + "step": 16572 + }, + { + "epoch": 0.02, + "learning_rate": 4.996652311278298e-05, + "loss": 1.2039, + "step": 16573 + }, + { + "epoch": 0.02, + "learning_rate": 4.996651904911166e-05, + "loss": 1.1899, + "step": 16574 + }, + { + "epoch": 0.02, + "learning_rate": 4.996651498519387e-05, + "loss": 0.7483, + "step": 16575 + }, + { + "epoch": 0.02, + "learning_rate": 4.996651092102963e-05, + "loss": 1.2013, + "step": 16576 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966506856618934e-05, + "loss": 0.8949, + "step": 16577 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966502791961774e-05, + "loss": 1.3335, + "step": 16578 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966498727058155e-05, + "loss": 1.0725, + "step": 16579 + }, + { + "epoch": 0.02, + "learning_rate": 4.996649466190808e-05, + "loss": 1.1775, + "step": 16580 + }, + { + "epoch": 0.02, + "learning_rate": 4.996649059651155e-05, + "loss": 1.1097, + "step": 16581 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966486530868554e-05, + "loss": 1.0837, + "step": 16582 + }, + { + "epoch": 0.02, + "learning_rate": 4.99664824649791e-05, + "loss": 1.0946, + "step": 16583 + }, + { + "epoch": 0.02, + "learning_rate": 4.996647839884319e-05, + "loss": 1.1962, + "step": 16584 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966474332460824e-05, + "loss": 0.7366, + "step": 16585 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966470265831995e-05, + "loss": 0.7874, + "step": 16586 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966466198956706e-05, + "loss": 1.0273, + "step": 16587 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966462131834965e-05, + "loss": 0.928, + "step": 16588 + }, + { + "epoch": 0.02, + "learning_rate": 4.996645806446676e-05, + "loss": 1.034, + "step": 16589 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966453996852105e-05, + "loss": 1.093, + "step": 16590 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966449928990985e-05, + "loss": 1.3554, + "step": 16591 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966445860883406e-05, + "loss": 0.9442, + "step": 16592 + }, + { + "epoch": 0.02, + "learning_rate": 4.996644179252937e-05, + "loss": 0.7757, + "step": 16593 + }, + { + "epoch": 0.02, + "learning_rate": 4.996643772392887e-05, + "loss": 1.1583, + "step": 16594 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966433655081926e-05, + "loss": 1.064, + "step": 16595 + }, + { + "epoch": 0.02, + "learning_rate": 4.996642958598851e-05, + "loss": 1.2097, + "step": 16596 + }, + { + "epoch": 0.02, + "learning_rate": 4.996642551664864e-05, + "loss": 0.7976, + "step": 16597 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966421447062316e-05, + "loss": 1.1266, + "step": 16598 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966417377229535e-05, + "loss": 1.1103, + "step": 16599 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966413307150286e-05, + "loss": 1.1689, + "step": 16600 + }, + { + "epoch": 0.02, + "learning_rate": 4.996640923682459e-05, + "loss": 1.0684, + "step": 16601 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966405166252425e-05, + "loss": 1.1045, + "step": 16602 + }, + { + "epoch": 0.02, + "learning_rate": 4.996640109543381e-05, + "loss": 1.0252, + "step": 16603 + }, + { + "epoch": 0.02, + "learning_rate": 4.996639702436874e-05, + "loss": 0.9911, + "step": 16604 + }, + { + "epoch": 0.02, + "learning_rate": 4.996639295305721e-05, + "loss": 1.2316, + "step": 16605 + }, + { + "epoch": 0.02, + "learning_rate": 4.996638888149922e-05, + "loss": 0.8862, + "step": 16606 + }, + { + "epoch": 0.02, + "learning_rate": 4.996638480969478e-05, + "loss": 1.215, + "step": 16607 + }, + { + "epoch": 0.02, + "learning_rate": 4.996638073764387e-05, + "loss": 0.9901, + "step": 16608 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966376665346505e-05, + "loss": 1.2669, + "step": 16609 + }, + { + "epoch": 0.02, + "learning_rate": 4.996637259280269e-05, + "loss": 1.2011, + "step": 16610 + }, + { + "epoch": 0.02, + "learning_rate": 4.996636852001241e-05, + "loss": 0.6976, + "step": 16611 + }, + { + "epoch": 0.02, + "learning_rate": 4.996636444697568e-05, + "loss": 0.9118, + "step": 16612 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966360373692485e-05, + "loss": 0.9129, + "step": 16613 + }, + { + "epoch": 0.02, + "learning_rate": 4.996635630016283e-05, + "loss": 0.9218, + "step": 16614 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966352226386725e-05, + "loss": 0.974, + "step": 16615 + }, + { + "epoch": 0.02, + "learning_rate": 4.996634815236416e-05, + "loss": 1.1337, + "step": 16616 + }, + { + "epoch": 0.02, + "learning_rate": 4.996634407809514e-05, + "loss": 0.8914, + "step": 16617 + }, + { + "epoch": 0.02, + "learning_rate": 4.996634000357966e-05, + "loss": 0.9413, + "step": 16618 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966335928817724e-05, + "loss": 1.0699, + "step": 16619 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966331853809334e-05, + "loss": 1.0181, + "step": 16620 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966327778554484e-05, + "loss": 1.0375, + "step": 16621 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966323703053175e-05, + "loss": 0.974, + "step": 16622 + }, + { + "epoch": 0.02, + "learning_rate": 4.996631962730541e-05, + "loss": 1.0828, + "step": 16623 + }, + { + "epoch": 0.02, + "learning_rate": 4.996631555131119e-05, + "loss": 1.3124, + "step": 16624 + }, + { + "epoch": 0.02, + "learning_rate": 4.996631147507052e-05, + "loss": 0.9741, + "step": 16625 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966307398583377e-05, + "loss": 0.8479, + "step": 16626 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966303321849783e-05, + "loss": 1.0124, + "step": 16627 + }, + { + "epoch": 0.02, + "learning_rate": 4.996629924486974e-05, + "loss": 1.3266, + "step": 16628 + }, + { + "epoch": 0.02, + "learning_rate": 4.996629516764323e-05, + "loss": 0.6632, + "step": 16629 + }, + { + "epoch": 0.02, + "learning_rate": 4.996629109017027e-05, + "loss": 0.3679, + "step": 16630 + }, + { + "epoch": 0.02, + "learning_rate": 4.996628701245085e-05, + "loss": 0.3498, + "step": 16631 + }, + { + "epoch": 0.02, + "learning_rate": 4.996628293448497e-05, + "loss": 0.3288, + "step": 16632 + }, + { + "epoch": 0.02, + "learning_rate": 4.996627885627264e-05, + "loss": 0.6746, + "step": 16633 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966274777813854e-05, + "loss": 0.9577, + "step": 16634 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966270699108605e-05, + "loss": 1.0087, + "step": 16635 + }, + { + "epoch": 0.02, + "learning_rate": 4.996626662015691e-05, + "loss": 1.2065, + "step": 16636 + }, + { + "epoch": 0.02, + "learning_rate": 4.996626254095875e-05, + "loss": 1.3431, + "step": 16637 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966258461514136e-05, + "loss": 1.0444, + "step": 16638 + }, + { + "epoch": 0.02, + "learning_rate": 4.996625438182306e-05, + "loss": 1.3579, + "step": 16639 + }, + { + "epoch": 0.02, + "learning_rate": 4.996625030188553e-05, + "loss": 1.3195, + "step": 16640 + }, + { + "epoch": 0.02, + "learning_rate": 4.996624622170155e-05, + "loss": 1.3443, + "step": 16641 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966242141271114e-05, + "loss": 1.2297, + "step": 16642 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966238060594216e-05, + "loss": 1.1234, + "step": 16643 + }, + { + "epoch": 0.02, + "learning_rate": 4.996623397967086e-05, + "loss": 1.2622, + "step": 16644 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966229898501057e-05, + "loss": 1.1222, + "step": 16645 + }, + { + "epoch": 0.02, + "learning_rate": 4.996622581708479e-05, + "loss": 1.1789, + "step": 16646 + }, + { + "epoch": 0.02, + "learning_rate": 4.996622173542207e-05, + "loss": 1.1328, + "step": 16647 + }, + { + "epoch": 0.02, + "learning_rate": 4.996621765351289e-05, + "loss": 1.1746, + "step": 16648 + }, + { + "epoch": 0.02, + "learning_rate": 4.996621357135726e-05, + "loss": 0.8202, + "step": 16649 + }, + { + "epoch": 0.02, + "learning_rate": 4.996620948895517e-05, + "loss": 0.868, + "step": 16650 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966205406306624e-05, + "loss": 0.8718, + "step": 16651 + }, + { + "epoch": 0.02, + "learning_rate": 4.996620132341162e-05, + "loss": 1.286, + "step": 16652 + }, + { + "epoch": 0.02, + "learning_rate": 4.996619724027017e-05, + "loss": 1.1304, + "step": 16653 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966193156882256e-05, + "loss": 0.8905, + "step": 16654 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966189073247885e-05, + "loss": 0.6736, + "step": 16655 + }, + { + "epoch": 0.02, + "learning_rate": 4.996618498936707e-05, + "loss": 0.6963, + "step": 16656 + }, + { + "epoch": 0.02, + "learning_rate": 4.996618090523979e-05, + "loss": 1.2097, + "step": 16657 + }, + { + "epoch": 0.02, + "learning_rate": 4.996617682086605e-05, + "loss": 1.2617, + "step": 16658 + }, + { + "epoch": 0.02, + "learning_rate": 4.996617273624586e-05, + "loss": 0.8113, + "step": 16659 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966168651379214e-05, + "loss": 0.402, + "step": 16660 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966164566266114e-05, + "loss": 0.392, + "step": 16661 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966160480906554e-05, + "loss": 0.4594, + "step": 16662 + }, + { + "epoch": 0.02, + "learning_rate": 4.996615639530055e-05, + "loss": 1.0284, + "step": 16663 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966152309448076e-05, + "loss": 1.4787, + "step": 16664 + }, + { + "epoch": 0.02, + "learning_rate": 4.996614822334915e-05, + "loss": 0.842, + "step": 16665 + }, + { + "epoch": 0.02, + "learning_rate": 4.996614413700378e-05, + "loss": 0.5603, + "step": 16666 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966140050411944e-05, + "loss": 0.983, + "step": 16667 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966135963573654e-05, + "loss": 1.1018, + "step": 16668 + }, + { + "epoch": 0.02, + "learning_rate": 4.996613187648891e-05, + "loss": 0.7569, + "step": 16669 + }, + { + "epoch": 0.02, + "learning_rate": 4.996612778915771e-05, + "loss": 1.3882, + "step": 16670 + }, + { + "epoch": 0.02, + "learning_rate": 4.996612370158006e-05, + "loss": 1.4836, + "step": 16671 + }, + { + "epoch": 0.02, + "learning_rate": 4.996611961375595e-05, + "loss": 0.8851, + "step": 16672 + }, + { + "epoch": 0.02, + "learning_rate": 4.996611552568539e-05, + "loss": 0.5777, + "step": 16673 + }, + { + "epoch": 0.02, + "learning_rate": 4.996611143736836e-05, + "loss": 0.5935, + "step": 16674 + }, + { + "epoch": 0.02, + "learning_rate": 4.996610734880489e-05, + "loss": 1.2951, + "step": 16675 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966103259994965e-05, + "loss": 1.2895, + "step": 16676 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966099170938574e-05, + "loss": 1.1234, + "step": 16677 + }, + { + "epoch": 0.02, + "learning_rate": 4.996609508163574e-05, + "loss": 1.0689, + "step": 16678 + }, + { + "epoch": 0.02, + "learning_rate": 4.996609099208645e-05, + "loss": 1.0915, + "step": 16679 + }, + { + "epoch": 0.02, + "learning_rate": 4.99660869022907e-05, + "loss": 1.3714, + "step": 16680 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966082812248496e-05, + "loss": 1.0338, + "step": 16681 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966078721959834e-05, + "loss": 0.708, + "step": 16682 + }, + { + "epoch": 0.02, + "learning_rate": 4.996607463142473e-05, + "loss": 0.779, + "step": 16683 + }, + { + "epoch": 0.02, + "learning_rate": 4.996607054064315e-05, + "loss": 1.1155, + "step": 16684 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966066449615134e-05, + "loss": 1.1101, + "step": 16685 + }, + { + "epoch": 0.02, + "learning_rate": 4.996606235834066e-05, + "loss": 1.0737, + "step": 16686 + }, + { + "epoch": 0.02, + "learning_rate": 4.996605826681972e-05, + "loss": 0.8443, + "step": 16687 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966054175052345e-05, + "loss": 0.979, + "step": 16688 + }, + { + "epoch": 0.02, + "learning_rate": 4.996605008303851e-05, + "loss": 1.174, + "step": 16689 + }, + { + "epoch": 0.02, + "learning_rate": 4.996604599077821e-05, + "loss": 1.2915, + "step": 16690 + }, + { + "epoch": 0.02, + "learning_rate": 4.996604189827146e-05, + "loss": 0.6875, + "step": 16691 + }, + { + "epoch": 0.02, + "learning_rate": 4.996603780551826e-05, + "loss": 1.2094, + "step": 16692 + }, + { + "epoch": 0.02, + "learning_rate": 4.996603371251861e-05, + "loss": 0.9497, + "step": 16693 + }, + { + "epoch": 0.02, + "learning_rate": 4.99660296192725e-05, + "loss": 0.8918, + "step": 16694 + }, + { + "epoch": 0.02, + "learning_rate": 4.996602552577994e-05, + "loss": 0.9831, + "step": 16695 + }, + { + "epoch": 0.02, + "learning_rate": 4.996602143204092e-05, + "loss": 1.2135, + "step": 16696 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966017338055446e-05, + "loss": 1.0891, + "step": 16697 + }, + { + "epoch": 0.02, + "learning_rate": 4.996601324382352e-05, + "loss": 0.9437, + "step": 16698 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966009149345136e-05, + "loss": 0.9093, + "step": 16699 + }, + { + "epoch": 0.02, + "learning_rate": 4.99660050546203e-05, + "loss": 0.9861, + "step": 16700 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966000959649016e-05, + "loss": 1.2344, + "step": 16701 + }, + { + "epoch": 0.02, + "learning_rate": 4.996599686443127e-05, + "loss": 1.0995, + "step": 16702 + }, + { + "epoch": 0.02, + "learning_rate": 4.996599276896707e-05, + "loss": 1.1801, + "step": 16703 + }, + { + "epoch": 0.02, + "learning_rate": 4.996598867325642e-05, + "loss": 1.3899, + "step": 16704 + }, + { + "epoch": 0.02, + "learning_rate": 4.996598457729932e-05, + "loss": 0.9331, + "step": 16705 + }, + { + "epoch": 0.02, + "learning_rate": 4.996598048109576e-05, + "loss": 1.2834, + "step": 16706 + }, + { + "epoch": 0.02, + "learning_rate": 4.996597638464575e-05, + "loss": 1.0422, + "step": 16707 + }, + { + "epoch": 0.02, + "learning_rate": 4.996597228794929e-05, + "loss": 1.0223, + "step": 16708 + }, + { + "epoch": 0.02, + "learning_rate": 4.996596819100637e-05, + "loss": 1.4528, + "step": 16709 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965964093817e-05, + "loss": 0.9167, + "step": 16710 + }, + { + "epoch": 0.02, + "learning_rate": 4.996595999638118e-05, + "loss": 1.2883, + "step": 16711 + }, + { + "epoch": 0.02, + "learning_rate": 4.99659558986989e-05, + "loss": 1.2901, + "step": 16712 + }, + { + "epoch": 0.02, + "learning_rate": 4.996595180077017e-05, + "loss": 1.1457, + "step": 16713 + }, + { + "epoch": 0.02, + "learning_rate": 4.996594770259498e-05, + "loss": 1.4019, + "step": 16714 + }, + { + "epoch": 0.02, + "learning_rate": 4.996594360417335e-05, + "loss": 1.08, + "step": 16715 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965939505505254e-05, + "loss": 1.0866, + "step": 16716 + }, + { + "epoch": 0.02, + "learning_rate": 4.996593540659071e-05, + "loss": 0.8027, + "step": 16717 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965931307429714e-05, + "loss": 1.0548, + "step": 16718 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965927208022255e-05, + "loss": 0.5563, + "step": 16719 + }, + { + "epoch": 0.02, + "learning_rate": 4.996592310836835e-05, + "loss": 1.0494, + "step": 16720 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965919008468e-05, + "loss": 1.0373, + "step": 16721 + }, + { + "epoch": 0.02, + "learning_rate": 4.996591490832119e-05, + "loss": 1.2256, + "step": 16722 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965910807927926e-05, + "loss": 0.939, + "step": 16723 + }, + { + "epoch": 0.02, + "learning_rate": 4.996590670728821e-05, + "loss": 0.9157, + "step": 16724 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965902606402035e-05, + "loss": 1.4165, + "step": 16725 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965898505269414e-05, + "loss": 0.8425, + "step": 16726 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965894403890347e-05, + "loss": 1.3648, + "step": 16727 + }, + { + "epoch": 0.02, + "learning_rate": 4.996589030226482e-05, + "loss": 1.3553, + "step": 16728 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965886200392834e-05, + "loss": 1.3747, + "step": 16729 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965882098274395e-05, + "loss": 0.946, + "step": 16730 + }, + { + "epoch": 0.02, + "learning_rate": 4.996587799590952e-05, + "loss": 1.4941, + "step": 16731 + }, + { + "epoch": 0.02, + "learning_rate": 4.996587389329817e-05, + "loss": 1.3225, + "step": 16732 + }, + { + "epoch": 0.02, + "learning_rate": 4.996586979044039e-05, + "loss": 1.0894, + "step": 16733 + }, + { + "epoch": 0.02, + "learning_rate": 4.996586568733615e-05, + "loss": 1.0873, + "step": 16734 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965861583985445e-05, + "loss": 0.9497, + "step": 16735 + }, + { + "epoch": 0.02, + "learning_rate": 4.99658574803883e-05, + "loss": 1.3325, + "step": 16736 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965853376544695e-05, + "loss": 1.0551, + "step": 16737 + }, + { + "epoch": 0.02, + "learning_rate": 4.996584927245465e-05, + "loss": 1.0396, + "step": 16738 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965845168118135e-05, + "loss": 1.1581, + "step": 16739 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965841063535176e-05, + "loss": 0.9999, + "step": 16740 + }, + { + "epoch": 0.02, + "learning_rate": 4.996583695870577e-05, + "loss": 1.0565, + "step": 16741 + }, + { + "epoch": 0.02, + "learning_rate": 4.996583285362991e-05, + "loss": 1.0459, + "step": 16742 + }, + { + "epoch": 0.02, + "learning_rate": 4.996582874830759e-05, + "loss": 1.0718, + "step": 16743 + }, + { + "epoch": 0.02, + "learning_rate": 4.996582464273882e-05, + "loss": 1.1664, + "step": 16744 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965820536923604e-05, + "loss": 1.1634, + "step": 16745 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965816430861936e-05, + "loss": 1.6196, + "step": 16746 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965812324553814e-05, + "loss": 1.2276, + "step": 16747 + }, + { + "epoch": 0.02, + "learning_rate": 4.996580821799924e-05, + "loss": 0.9878, + "step": 16748 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965804111198214e-05, + "loss": 1.1393, + "step": 16749 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965800004150734e-05, + "loss": 0.6872, + "step": 16750 + }, + { + "epoch": 0.02, + "learning_rate": 4.99657958968568e-05, + "loss": 1.0776, + "step": 16751 + }, + { + "epoch": 0.02, + "learning_rate": 4.996579178931642e-05, + "loss": 1.2382, + "step": 16752 + }, + { + "epoch": 0.02, + "learning_rate": 4.996578768152959e-05, + "loss": 1.1693, + "step": 16753 + }, + { + "epoch": 0.02, + "learning_rate": 4.99657835734963e-05, + "loss": 1.1698, + "step": 16754 + }, + { + "epoch": 0.02, + "learning_rate": 4.996577946521656e-05, + "loss": 1.1649, + "step": 16755 + }, + { + "epoch": 0.02, + "learning_rate": 4.996577535669037e-05, + "loss": 1.3233, + "step": 16756 + }, + { + "epoch": 0.02, + "learning_rate": 4.996577124791773e-05, + "loss": 1.095, + "step": 16757 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965767138898636e-05, + "loss": 1.08, + "step": 16758 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965763029633096e-05, + "loss": 1.0797, + "step": 16759 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965758920121096e-05, + "loss": 1.0174, + "step": 16760 + }, + { + "epoch": 0.02, + "learning_rate": 4.996575481036265e-05, + "loss": 1.3959, + "step": 16761 + }, + { + "epoch": 0.02, + "learning_rate": 4.996575070035775e-05, + "loss": 1.0393, + "step": 16762 + }, + { + "epoch": 0.02, + "learning_rate": 4.99657465901064e-05, + "loss": 1.3243, + "step": 16763 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965742479608605e-05, + "loss": 1.252, + "step": 16764 + }, + { + "epoch": 0.02, + "learning_rate": 4.996573836886435e-05, + "loss": 0.9156, + "step": 16765 + }, + { + "epoch": 0.02, + "learning_rate": 4.996573425787364e-05, + "loss": 1.3264, + "step": 16766 + }, + { + "epoch": 0.02, + "learning_rate": 4.996573014663649e-05, + "loss": 1.0443, + "step": 16767 + }, + { + "epoch": 0.02, + "learning_rate": 4.996572603515288e-05, + "loss": 1.0448, + "step": 16768 + }, + { + "epoch": 0.02, + "learning_rate": 4.996572192342282e-05, + "loss": 1.1402, + "step": 16769 + }, + { + "epoch": 0.02, + "learning_rate": 4.996571781144632e-05, + "loss": 1.2106, + "step": 16770 + }, + { + "epoch": 0.02, + "learning_rate": 4.996571369922336e-05, + "loss": 1.1338, + "step": 16771 + }, + { + "epoch": 0.02, + "learning_rate": 4.996570958675395e-05, + "loss": 1.1623, + "step": 16772 + }, + { + "epoch": 0.02, + "learning_rate": 4.996570547403809e-05, + "loss": 1.3325, + "step": 16773 + }, + { + "epoch": 0.02, + "learning_rate": 4.996570136107578e-05, + "loss": 1.0579, + "step": 16774 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965697247867014e-05, + "loss": 1.0937, + "step": 16775 + }, + { + "epoch": 0.02, + "learning_rate": 4.99656931344118e-05, + "loss": 1.0413, + "step": 16776 + }, + { + "epoch": 0.02, + "learning_rate": 4.996568902071014e-05, + "loss": 0.9278, + "step": 16777 + }, + { + "epoch": 0.02, + "learning_rate": 4.996568490676202e-05, + "loss": 1.0191, + "step": 16778 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965680792567455e-05, + "loss": 1.1559, + "step": 16779 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965676678126435e-05, + "loss": 0.8959, + "step": 16780 + }, + { + "epoch": 0.02, + "learning_rate": 4.996567256343897e-05, + "loss": 1.3292, + "step": 16781 + }, + { + "epoch": 0.02, + "learning_rate": 4.996566844850505e-05, + "loss": 0.9531, + "step": 16782 + }, + { + "epoch": 0.02, + "learning_rate": 4.996566433332468e-05, + "loss": 1.1523, + "step": 16783 + }, + { + "epoch": 0.02, + "learning_rate": 4.996566021789786e-05, + "loss": 1.0218, + "step": 16784 + }, + { + "epoch": 0.02, + "learning_rate": 4.996565610222459e-05, + "loss": 1.3074, + "step": 16785 + }, + { + "epoch": 0.02, + "learning_rate": 4.996565198630487e-05, + "loss": 1.2885, + "step": 16786 + }, + { + "epoch": 0.02, + "learning_rate": 4.99656478701387e-05, + "loss": 1.1237, + "step": 16787 + }, + { + "epoch": 0.02, + "learning_rate": 4.996564375372608e-05, + "loss": 0.9325, + "step": 16788 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965639637067007e-05, + "loss": 0.8641, + "step": 16789 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965635520161494e-05, + "loss": 1.0525, + "step": 16790 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965631403009514e-05, + "loss": 1.2073, + "step": 16791 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965627285611096e-05, + "loss": 1.2401, + "step": 16792 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965623167966225e-05, + "loss": 0.9488, + "step": 16793 + }, + { + "epoch": 0.02, + "learning_rate": 4.99656190500749e-05, + "loss": 0.9807, + "step": 16794 + }, + { + "epoch": 0.02, + "learning_rate": 4.996561493193713e-05, + "loss": 1.0958, + "step": 16795 + }, + { + "epoch": 0.02, + "learning_rate": 4.996561081355291e-05, + "loss": 1.199, + "step": 16796 + }, + { + "epoch": 0.02, + "learning_rate": 4.996560669492223e-05, + "loss": 1.0049, + "step": 16797 + }, + { + "epoch": 0.02, + "learning_rate": 4.996560257604511e-05, + "loss": 1.1125, + "step": 16798 + }, + { + "epoch": 0.02, + "learning_rate": 4.996559845692154e-05, + "loss": 1.0018, + "step": 16799 + }, + { + "epoch": 0.02, + "learning_rate": 4.996559433755152e-05, + "loss": 0.8581, + "step": 16800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965590217935046e-05, + "loss": 1.2083, + "step": 16801 + }, + { + "epoch": 0.02, + "learning_rate": 4.996558609807213e-05, + "loss": 1.1268, + "step": 16802 + }, + { + "epoch": 0.02, + "learning_rate": 4.996558197796275e-05, + "loss": 1.0259, + "step": 16803 + }, + { + "epoch": 0.02, + "learning_rate": 4.996557785760693e-05, + "loss": 1.074, + "step": 16804 + }, + { + "epoch": 0.02, + "learning_rate": 4.996557373700466e-05, + "loss": 0.5176, + "step": 16805 + }, + { + "epoch": 0.02, + "learning_rate": 4.996556961615594e-05, + "loss": 0.4992, + "step": 16806 + }, + { + "epoch": 0.02, + "learning_rate": 4.996556549506077e-05, + "loss": 0.4515, + "step": 16807 + }, + { + "epoch": 0.02, + "learning_rate": 4.996556137371915e-05, + "loss": 0.6775, + "step": 16808 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965557252131084e-05, + "loss": 0.9663, + "step": 16809 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965553130296564e-05, + "loss": 0.9702, + "step": 16810 + }, + { + "epoch": 0.02, + "learning_rate": 4.99655490082156e-05, + "loss": 1.4939, + "step": 16811 + }, + { + "epoch": 0.02, + "learning_rate": 4.996554488588818e-05, + "loss": 1.1516, + "step": 16812 + }, + { + "epoch": 0.02, + "learning_rate": 4.996554076331431e-05, + "loss": 1.2307, + "step": 16813 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965536640494e-05, + "loss": 1.1261, + "step": 16814 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965532517427235e-05, + "loss": 1.2522, + "step": 16815 + }, + { + "epoch": 0.02, + "learning_rate": 4.996552839411401e-05, + "loss": 1.0733, + "step": 16816 + }, + { + "epoch": 0.02, + "learning_rate": 4.996552427055435e-05, + "loss": 0.4757, + "step": 16817 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965520146748244e-05, + "loss": 0.3949, + "step": 16818 + }, + { + "epoch": 0.02, + "learning_rate": 4.996551602269568e-05, + "loss": 0.3809, + "step": 16819 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965511898396664e-05, + "loss": 0.3144, + "step": 16820 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965507773851205e-05, + "loss": 0.4285, + "step": 16821 + }, + { + "epoch": 0.02, + "learning_rate": 4.99655036490593e-05, + "loss": 0.3382, + "step": 16822 + }, + { + "epoch": 0.02, + "learning_rate": 4.996549952402094e-05, + "loss": 0.2479, + "step": 16823 + }, + { + "epoch": 0.02, + "learning_rate": 4.996549539873614e-05, + "loss": 0.2332, + "step": 16824 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965491273204877e-05, + "loss": 0.4393, + "step": 16825 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965487147427174e-05, + "loss": 0.4781, + "step": 16826 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965483021403026e-05, + "loss": 0.7115, + "step": 16827 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965478895132426e-05, + "loss": 0.898, + "step": 16828 + }, + { + "epoch": 0.02, + "learning_rate": 4.996547476861537e-05, + "loss": 0.9597, + "step": 16829 + }, + { + "epoch": 0.02, + "learning_rate": 4.996547064185187e-05, + "loss": 1.1202, + "step": 16830 + }, + { + "epoch": 0.02, + "learning_rate": 4.996546651484192e-05, + "loss": 1.0522, + "step": 16831 + }, + { + "epoch": 0.02, + "learning_rate": 4.996546238758553e-05, + "loss": 1.3063, + "step": 16832 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965458260082686e-05, + "loss": 1.0041, + "step": 16833 + }, + { + "epoch": 0.02, + "learning_rate": 4.996545413233339e-05, + "loss": 1.3297, + "step": 16834 + }, + { + "epoch": 0.02, + "learning_rate": 4.996545000433765e-05, + "loss": 1.1705, + "step": 16835 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965445876095465e-05, + "loss": 0.9359, + "step": 16836 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965441747606824e-05, + "loss": 1.1187, + "step": 16837 + }, + { + "epoch": 0.02, + "learning_rate": 4.996543761887174e-05, + "loss": 1.0203, + "step": 16838 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965433489890204e-05, + "loss": 1.0938, + "step": 16839 + }, + { + "epoch": 0.02, + "learning_rate": 4.996542936066222e-05, + "loss": 1.2317, + "step": 16840 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965425231187786e-05, + "loss": 1.1463, + "step": 16841 + }, + { + "epoch": 0.02, + "learning_rate": 4.99654211014669e-05, + "loss": 1.1069, + "step": 16842 + }, + { + "epoch": 0.02, + "learning_rate": 4.996541697149958e-05, + "loss": 0.9595, + "step": 16843 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965412841285795e-05, + "loss": 1.0811, + "step": 16844 + }, + { + "epoch": 0.02, + "learning_rate": 4.996540871082558e-05, + "loss": 1.2748, + "step": 16845 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965404580118905e-05, + "loss": 1.1745, + "step": 16846 + }, + { + "epoch": 0.02, + "learning_rate": 4.996540044916579e-05, + "loss": 1.1534, + "step": 16847 + }, + { + "epoch": 0.02, + "learning_rate": 4.996539631796622e-05, + "loss": 0.5495, + "step": 16848 + }, + { + "epoch": 0.02, + "learning_rate": 4.99653921865202e-05, + "loss": 0.8761, + "step": 16849 + }, + { + "epoch": 0.02, + "learning_rate": 4.996538805482774e-05, + "loss": 0.9127, + "step": 16850 + }, + { + "epoch": 0.02, + "learning_rate": 4.996538392288883e-05, + "loss": 1.0061, + "step": 16851 + }, + { + "epoch": 0.02, + "learning_rate": 4.996537979070347e-05, + "loss": 1.2454, + "step": 16852 + }, + { + "epoch": 0.02, + "learning_rate": 4.996537565827166e-05, + "loss": 1.378, + "step": 16853 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965371525593405e-05, + "loss": 0.9582, + "step": 16854 + }, + { + "epoch": 0.02, + "learning_rate": 4.99653673926687e-05, + "loss": 1.0881, + "step": 16855 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965363259497556e-05, + "loss": 1.0177, + "step": 16856 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965359126079955e-05, + "loss": 1.1509, + "step": 16857 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965354992415916e-05, + "loss": 0.972, + "step": 16858 + }, + { + "epoch": 0.02, + "learning_rate": 4.996535085850542e-05, + "loss": 1.0542, + "step": 16859 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965346724348485e-05, + "loss": 0.17, + "step": 16860 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965342589945094e-05, + "loss": 0.6616, + "step": 16861 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965338455295264e-05, + "loss": 1.6308, + "step": 16862 + }, + { + "epoch": 0.02, + "learning_rate": 4.996533432039898e-05, + "loss": 1.3101, + "step": 16863 + }, + { + "epoch": 0.02, + "learning_rate": 4.996533018525625e-05, + "loss": 1.015, + "step": 16864 + }, + { + "epoch": 0.02, + "learning_rate": 4.996532604986707e-05, + "loss": 0.8646, + "step": 16865 + }, + { + "epoch": 0.02, + "learning_rate": 4.996532191423145e-05, + "loss": 0.749, + "step": 16866 + }, + { + "epoch": 0.02, + "learning_rate": 4.996531777834938e-05, + "loss": 0.7537, + "step": 16867 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965313642220865e-05, + "loss": 1.1897, + "step": 16868 + }, + { + "epoch": 0.02, + "learning_rate": 4.99653095058459e-05, + "loss": 1.0762, + "step": 16869 + }, + { + "epoch": 0.02, + "learning_rate": 4.996530536922449e-05, + "loss": 1.1929, + "step": 16870 + }, + { + "epoch": 0.02, + "learning_rate": 4.996530123235663e-05, + "loss": 1.1974, + "step": 16871 + }, + { + "epoch": 0.02, + "learning_rate": 4.996529709524232e-05, + "loss": 0.9872, + "step": 16872 + }, + { + "epoch": 0.02, + "learning_rate": 4.996529295788157e-05, + "loss": 0.8774, + "step": 16873 + }, + { + "epoch": 0.02, + "learning_rate": 4.996528882027437e-05, + "loss": 0.8457, + "step": 16874 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965284682420724e-05, + "loss": 1.0507, + "step": 16875 + }, + { + "epoch": 0.02, + "learning_rate": 4.996528054432064e-05, + "loss": 1.1527, + "step": 16876 + }, + { + "epoch": 0.02, + "learning_rate": 4.996527640597409e-05, + "loss": 1.0335, + "step": 16877 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965272267381106e-05, + "loss": 0.7664, + "step": 16878 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965268128541675e-05, + "loss": 0.7212, + "step": 16879 + }, + { + "epoch": 0.02, + "learning_rate": 4.99652639894558e-05, + "loss": 0.9155, + "step": 16880 + }, + { + "epoch": 0.02, + "learning_rate": 4.996525985012347e-05, + "loss": 1.0865, + "step": 16881 + }, + { + "epoch": 0.02, + "learning_rate": 4.99652557105447e-05, + "loss": 1.294, + "step": 16882 + }, + { + "epoch": 0.02, + "learning_rate": 4.996525157071948e-05, + "loss": 0.2576, + "step": 16883 + }, + { + "epoch": 0.02, + "learning_rate": 4.996524743064781e-05, + "loss": 0.2085, + "step": 16884 + }, + { + "epoch": 0.02, + "learning_rate": 4.99652432903297e-05, + "loss": 0.124, + "step": 16885 + }, + { + "epoch": 0.02, + "learning_rate": 4.996523914976514e-05, + "loss": 0.6439, + "step": 16886 + }, + { + "epoch": 0.02, + "learning_rate": 4.996523500895414e-05, + "loss": 1.0005, + "step": 16887 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965230867896686e-05, + "loss": 0.9706, + "step": 16888 + }, + { + "epoch": 0.02, + "learning_rate": 4.996522672659279e-05, + "loss": 1.0865, + "step": 16889 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965222585042446e-05, + "loss": 1.2426, + "step": 16890 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965218443245656e-05, + "loss": 1.1496, + "step": 16891 + }, + { + "epoch": 0.02, + "learning_rate": 4.996521430120242e-05, + "loss": 1.3892, + "step": 16892 + }, + { + "epoch": 0.02, + "learning_rate": 4.996521015891274e-05, + "loss": 0.9313, + "step": 16893 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965206016376607e-05, + "loss": 1.0959, + "step": 16894 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965201873594034e-05, + "loss": 1.2665, + "step": 16895 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965197730565015e-05, + "loss": 1.1766, + "step": 16896 + }, + { + "epoch": 0.02, + "learning_rate": 4.996519358728955e-05, + "loss": 0.8894, + "step": 16897 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965189443767646e-05, + "loss": 1.3634, + "step": 16898 + }, + { + "epoch": 0.02, + "learning_rate": 4.996518529999928e-05, + "loss": 1.0216, + "step": 16899 + }, + { + "epoch": 0.02, + "learning_rate": 4.996518115598448e-05, + "loss": 0.8901, + "step": 16900 + }, + { + "epoch": 0.02, + "learning_rate": 4.996517701172323e-05, + "loss": 1.251, + "step": 16901 + }, + { + "epoch": 0.02, + "learning_rate": 4.996517286721554e-05, + "loss": 1.6033, + "step": 16902 + }, + { + "epoch": 0.02, + "learning_rate": 4.99651687224614e-05, + "loss": 1.3355, + "step": 16903 + }, + { + "epoch": 0.02, + "learning_rate": 4.996516457746081e-05, + "loss": 1.1677, + "step": 16904 + }, + { + "epoch": 0.02, + "learning_rate": 4.996516043221378e-05, + "loss": 0.9257, + "step": 16905 + }, + { + "epoch": 0.02, + "learning_rate": 4.996515628672031e-05, + "loss": 1.0758, + "step": 16906 + }, + { + "epoch": 0.02, + "learning_rate": 4.996515214098038e-05, + "loss": 1.1511, + "step": 16907 + }, + { + "epoch": 0.02, + "learning_rate": 4.996514799499401e-05, + "loss": 1.0877, + "step": 16908 + }, + { + "epoch": 0.02, + "learning_rate": 4.99651438487612e-05, + "loss": 1.1926, + "step": 16909 + }, + { + "epoch": 0.02, + "learning_rate": 4.996513970228194e-05, + "loss": 1.0493, + "step": 16910 + }, + { + "epoch": 0.02, + "learning_rate": 4.996513555555624e-05, + "loss": 1.2535, + "step": 16911 + }, + { + "epoch": 0.02, + "learning_rate": 4.996513140858409e-05, + "loss": 1.2452, + "step": 16912 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965127261365495e-05, + "loss": 1.4611, + "step": 16913 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965123113900456e-05, + "loss": 1.1301, + "step": 16914 + }, + { + "epoch": 0.02, + "learning_rate": 4.996511896618897e-05, + "loss": 1.1435, + "step": 16915 + }, + { + "epoch": 0.02, + "learning_rate": 4.996511481823104e-05, + "loss": 1.1146, + "step": 16916 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965110670026664e-05, + "loss": 1.1103, + "step": 16917 + }, + { + "epoch": 0.02, + "learning_rate": 4.996510652157584e-05, + "loss": 0.9252, + "step": 16918 + }, + { + "epoch": 0.02, + "learning_rate": 4.996510237287858e-05, + "loss": 0.7727, + "step": 16919 + }, + { + "epoch": 0.02, + "learning_rate": 4.996509822393487e-05, + "loss": 0.9401, + "step": 16920 + }, + { + "epoch": 0.02, + "learning_rate": 4.996509407474471e-05, + "loss": 1.0591, + "step": 16921 + }, + { + "epoch": 0.02, + "learning_rate": 4.996508992530812e-05, + "loss": 1.1756, + "step": 16922 + }, + { + "epoch": 0.02, + "learning_rate": 4.996508577562507e-05, + "loss": 0.8581, + "step": 16923 + }, + { + "epoch": 0.02, + "learning_rate": 4.996508162569558e-05, + "loss": 1.0644, + "step": 16924 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965077475519646e-05, + "loss": 1.1265, + "step": 16925 + }, + { + "epoch": 0.02, + "learning_rate": 4.996507332509727e-05, + "loss": 1.4137, + "step": 16926 + }, + { + "epoch": 0.02, + "learning_rate": 4.996506917442844e-05, + "loss": 1.1044, + "step": 16927 + }, + { + "epoch": 0.02, + "learning_rate": 4.996506502351317e-05, + "loss": 1.2725, + "step": 16928 + }, + { + "epoch": 0.02, + "learning_rate": 4.996506087235146e-05, + "loss": 0.8745, + "step": 16929 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965056720943304e-05, + "loss": 0.9583, + "step": 16930 + }, + { + "epoch": 0.02, + "learning_rate": 4.99650525692887e-05, + "loss": 0.8965, + "step": 16931 + }, + { + "epoch": 0.02, + "learning_rate": 4.996504841738765e-05, + "loss": 0.8921, + "step": 16932 + }, + { + "epoch": 0.02, + "learning_rate": 4.996504426524016e-05, + "loss": 0.9241, + "step": 16933 + }, + { + "epoch": 0.02, + "learning_rate": 4.996504011284623e-05, + "loss": 1.0459, + "step": 16934 + }, + { + "epoch": 0.02, + "learning_rate": 4.996503596020585e-05, + "loss": 1.1032, + "step": 16935 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965031807319027e-05, + "loss": 1.2464, + "step": 16936 + }, + { + "epoch": 0.02, + "learning_rate": 4.996502765418576e-05, + "loss": 0.8342, + "step": 16937 + }, + { + "epoch": 0.02, + "learning_rate": 4.996502350080604e-05, + "loss": 1.0984, + "step": 16938 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965019347179885e-05, + "loss": 1.4428, + "step": 16939 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965015193307285e-05, + "loss": 1.1296, + "step": 16940 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965011039188234e-05, + "loss": 1.182, + "step": 16941 + }, + { + "epoch": 0.02, + "learning_rate": 4.996500688482275e-05, + "loss": 1.1627, + "step": 16942 + }, + { + "epoch": 0.02, + "learning_rate": 4.996500273021082e-05, + "loss": 1.0323, + "step": 16943 + }, + { + "epoch": 0.02, + "learning_rate": 4.996499857535244e-05, + "loss": 0.8979, + "step": 16944 + }, + { + "epoch": 0.02, + "learning_rate": 4.996499442024762e-05, + "loss": 0.943, + "step": 16945 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964990264896354e-05, + "loss": 1.1286, + "step": 16946 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964986109298647e-05, + "loss": 1.0072, + "step": 16947 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964981953454494e-05, + "loss": 1.1529, + "step": 16948 + }, + { + "epoch": 0.02, + "learning_rate": 4.99649777973639e-05, + "loss": 1.1849, + "step": 16949 + }, + { + "epoch": 0.02, + "learning_rate": 4.996497364102686e-05, + "loss": 1.1461, + "step": 16950 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964969484443366e-05, + "loss": 0.7396, + "step": 16951 + }, + { + "epoch": 0.02, + "learning_rate": 4.996496532761344e-05, + "loss": 1.1525, + "step": 16952 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964961170537074e-05, + "loss": 1.3184, + "step": 16953 + }, + { + "epoch": 0.02, + "learning_rate": 4.996495701321426e-05, + "loss": 0.9543, + "step": 16954 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964952855645e-05, + "loss": 1.0231, + "step": 16955 + }, + { + "epoch": 0.02, + "learning_rate": 4.99649486978293e-05, + "loss": 0.8666, + "step": 16956 + }, + { + "epoch": 0.02, + "learning_rate": 4.996494453976716e-05, + "loss": 1.1742, + "step": 16957 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964940381458567e-05, + "loss": 1.1606, + "step": 16958 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964936222903535e-05, + "loss": 1.1058, + "step": 16959 + }, + { + "epoch": 0.02, + "learning_rate": 4.996493206410205e-05, + "loss": 1.1045, + "step": 16960 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964927905054135e-05, + "loss": 0.7528, + "step": 16961 + }, + { + "epoch": 0.02, + "learning_rate": 4.996492374575977e-05, + "loss": 0.9455, + "step": 16962 + }, + { + "epoch": 0.02, + "learning_rate": 4.996491958621897e-05, + "loss": 1.0045, + "step": 16963 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964915426431725e-05, + "loss": 0.8128, + "step": 16964 + }, + { + "epoch": 0.02, + "learning_rate": 4.996491126639803e-05, + "loss": 1.019, + "step": 16965 + }, + { + "epoch": 0.02, + "learning_rate": 4.99649071061179e-05, + "loss": 1.0857, + "step": 16966 + }, + { + "epoch": 0.02, + "learning_rate": 4.996490294559132e-05, + "loss": 1.3725, + "step": 16967 + }, + { + "epoch": 0.02, + "learning_rate": 4.99648987848183e-05, + "loss": 1.0572, + "step": 16968 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964894623798834e-05, + "loss": 1.2566, + "step": 16969 + }, + { + "epoch": 0.02, + "learning_rate": 4.996489046253293e-05, + "loss": 1.0518, + "step": 16970 + }, + { + "epoch": 0.02, + "learning_rate": 4.996488630102058e-05, + "loss": 0.9972, + "step": 16971 + }, + { + "epoch": 0.02, + "learning_rate": 4.996488213926179e-05, + "loss": 0.872, + "step": 16972 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964877977256555e-05, + "loss": 1.3396, + "step": 16973 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964873815004876e-05, + "loss": 1.1596, + "step": 16974 + }, + { + "epoch": 0.02, + "learning_rate": 4.996486965250675e-05, + "loss": 1.3249, + "step": 16975 + }, + { + "epoch": 0.02, + "learning_rate": 4.996486548976219e-05, + "loss": 1.1562, + "step": 16976 + }, + { + "epoch": 0.02, + "learning_rate": 4.996486132677118e-05, + "loss": 1.0276, + "step": 16977 + }, + { + "epoch": 0.02, + "learning_rate": 4.996485716353374e-05, + "loss": 0.8831, + "step": 16978 + }, + { + "epoch": 0.02, + "learning_rate": 4.996485300004985e-05, + "loss": 0.7286, + "step": 16979 + }, + { + "epoch": 0.02, + "learning_rate": 4.996484883631951e-05, + "loss": 0.8235, + "step": 16980 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964844672342736e-05, + "loss": 1.1816, + "step": 16981 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964840508119516e-05, + "loss": 1.514, + "step": 16982 + }, + { + "epoch": 0.02, + "learning_rate": 4.996483634364986e-05, + "loss": 1.2372, + "step": 16983 + }, + { + "epoch": 0.02, + "learning_rate": 4.996483217893375e-05, + "loss": 1.0974, + "step": 16984 + }, + { + "epoch": 0.02, + "learning_rate": 4.996482801397121e-05, + "loss": 0.9436, + "step": 16985 + }, + { + "epoch": 0.02, + "learning_rate": 4.996482384876222e-05, + "loss": 1.1224, + "step": 16986 + }, + { + "epoch": 0.02, + "learning_rate": 4.996481968330679e-05, + "loss": 1.2766, + "step": 16987 + }, + { + "epoch": 0.02, + "learning_rate": 4.996481551760492e-05, + "loss": 1.1842, + "step": 16988 + }, + { + "epoch": 0.02, + "learning_rate": 4.996481135165661e-05, + "loss": 0.9383, + "step": 16989 + }, + { + "epoch": 0.02, + "learning_rate": 4.996480718546185e-05, + "loss": 1.036, + "step": 16990 + }, + { + "epoch": 0.02, + "learning_rate": 4.996480301902065e-05, + "loss": 1.1436, + "step": 16991 + }, + { + "epoch": 0.02, + "learning_rate": 4.996479885233301e-05, + "loss": 1.1322, + "step": 16992 + }, + { + "epoch": 0.02, + "learning_rate": 4.996479468539893e-05, + "loss": 1.0676, + "step": 16993 + }, + { + "epoch": 0.02, + "learning_rate": 4.996479051821841e-05, + "loss": 1.0792, + "step": 16994 + }, + { + "epoch": 0.02, + "learning_rate": 4.996478635079144e-05, + "loss": 0.6835, + "step": 16995 + }, + { + "epoch": 0.02, + "learning_rate": 4.996478218311803e-05, + "loss": 0.9158, + "step": 16996 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964778015198176e-05, + "loss": 1.0694, + "step": 16997 + }, + { + "epoch": 0.02, + "learning_rate": 4.996477384703189e-05, + "loss": 1.1936, + "step": 16998 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964769678619157e-05, + "loss": 1.3911, + "step": 16999 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964765509959985e-05, + "loss": 0.9993, + "step": 17000 + }, + { + "epoch": 0.02, + "eval_loss": 1.0266118049621582, + "eval_runtime": 85.6314, + "eval_samples_per_second": 16.174, + "eval_steps_per_second": 4.052, + "step": 17000 + }, + { + "epoch": 0.02, + "learning_rate": 4.996476134105437e-05, + "loss": 1.3432, + "step": 17001 + }, + { + "epoch": 0.02, + "learning_rate": 4.996475717190231e-05, + "loss": 1.0194, + "step": 17002 + }, + { + "epoch": 0.02, + "learning_rate": 4.996475300250381e-05, + "loss": 1.1707, + "step": 17003 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964748832858865e-05, + "loss": 1.2612, + "step": 17004 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964744662967484e-05, + "loss": 1.1914, + "step": 17005 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964740492829656e-05, + "loss": 1.149, + "step": 17006 + }, + { + "epoch": 0.02, + "learning_rate": 4.99647363224454e-05, + "loss": 1.1162, + "step": 17007 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964732151814685e-05, + "loss": 1.111, + "step": 17008 + }, + { + "epoch": 0.02, + "learning_rate": 4.996472798093754e-05, + "loss": 1.2897, + "step": 17009 + }, + { + "epoch": 0.02, + "learning_rate": 4.996472380981395e-05, + "loss": 1.0675, + "step": 17010 + }, + { + "epoch": 0.02, + "learning_rate": 4.996471963844391e-05, + "loss": 1.0948, + "step": 17011 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964715466827444e-05, + "loss": 1.1462, + "step": 17012 + }, + { + "epoch": 0.02, + "learning_rate": 4.996471129496453e-05, + "loss": 1.2166, + "step": 17013 + }, + { + "epoch": 0.02, + "learning_rate": 4.996470712285518e-05, + "loss": 1.055, + "step": 17014 + }, + { + "epoch": 0.02, + "learning_rate": 4.996470295049938e-05, + "loss": 1.1979, + "step": 17015 + }, + { + "epoch": 0.02, + "learning_rate": 4.996469877789714e-05, + "loss": 1.0299, + "step": 17016 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964694605048465e-05, + "loss": 1.0215, + "step": 17017 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964690431953354e-05, + "loss": 0.7385, + "step": 17018 + }, + { + "epoch": 0.02, + "learning_rate": 4.996468625861179e-05, + "loss": 1.0382, + "step": 17019 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964682085023795e-05, + "loss": 1.2014, + "step": 17020 + }, + { + "epoch": 0.02, + "learning_rate": 4.996467791118935e-05, + "loss": 0.9783, + "step": 17021 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964673737108466e-05, + "loss": 0.6325, + "step": 17022 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964669562781146e-05, + "loss": 0.8298, + "step": 17023 + }, + { + "epoch": 0.02, + "learning_rate": 4.996466538820738e-05, + "loss": 1.088, + "step": 17024 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964661213387176e-05, + "loss": 0.8899, + "step": 17025 + }, + { + "epoch": 0.02, + "learning_rate": 4.996465703832054e-05, + "loss": 1.2099, + "step": 17026 + }, + { + "epoch": 0.02, + "learning_rate": 4.996465286300745e-05, + "loss": 1.2739, + "step": 17027 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964648687447926e-05, + "loss": 1.3037, + "step": 17028 + }, + { + "epoch": 0.02, + "learning_rate": 4.996464451164196e-05, + "loss": 1.1223, + "step": 17029 + }, + { + "epoch": 0.02, + "learning_rate": 4.996464033558955e-05, + "loss": 1.3314, + "step": 17030 + }, + { + "epoch": 0.02, + "learning_rate": 4.99646361592907e-05, + "loss": 1.0791, + "step": 17031 + }, + { + "epoch": 0.02, + "learning_rate": 4.996463198274542e-05, + "loss": 0.9956, + "step": 17032 + }, + { + "epoch": 0.02, + "learning_rate": 4.996462780595369e-05, + "loss": 1.0603, + "step": 17033 + }, + { + "epoch": 0.02, + "learning_rate": 4.996462362891552e-05, + "loss": 1.3253, + "step": 17034 + }, + { + "epoch": 0.02, + "learning_rate": 4.996461945163091e-05, + "loss": 1.123, + "step": 17035 + }, + { + "epoch": 0.02, + "learning_rate": 4.996461527409987e-05, + "loss": 0.5553, + "step": 17036 + }, + { + "epoch": 0.02, + "learning_rate": 4.996461109632238e-05, + "loss": 0.9056, + "step": 17037 + }, + { + "epoch": 0.02, + "learning_rate": 4.996460691829845e-05, + "loss": 1.1559, + "step": 17038 + }, + { + "epoch": 0.02, + "learning_rate": 4.996460274002808e-05, + "loss": 1.3572, + "step": 17039 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964598561511274e-05, + "loss": 1.1788, + "step": 17040 + }, + { + "epoch": 0.02, + "learning_rate": 4.996459438274803e-05, + "loss": 1.3355, + "step": 17041 + }, + { + "epoch": 0.02, + "learning_rate": 4.996459020373834e-05, + "loss": 0.7532, + "step": 17042 + }, + { + "epoch": 0.02, + "learning_rate": 4.996458602448221e-05, + "loss": 1.2198, + "step": 17043 + }, + { + "epoch": 0.02, + "learning_rate": 4.996458184497964e-05, + "loss": 0.9523, + "step": 17044 + }, + { + "epoch": 0.02, + "learning_rate": 4.996457766523063e-05, + "loss": 1.1045, + "step": 17045 + }, + { + "epoch": 0.02, + "learning_rate": 4.996457348523518e-05, + "loss": 1.1337, + "step": 17046 + }, + { + "epoch": 0.02, + "learning_rate": 4.996456930499329e-05, + "loss": 1.0515, + "step": 17047 + }, + { + "epoch": 0.02, + "learning_rate": 4.996456512450497e-05, + "loss": 1.319, + "step": 17048 + }, + { + "epoch": 0.02, + "learning_rate": 4.99645609437702e-05, + "loss": 1.2448, + "step": 17049 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964556762788994e-05, + "loss": 1.2194, + "step": 17050 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964552581561344e-05, + "loss": 1.087, + "step": 17051 + }, + { + "epoch": 0.02, + "learning_rate": 4.996454840008726e-05, + "loss": 1.3277, + "step": 17052 + }, + { + "epoch": 0.02, + "learning_rate": 4.996454421836674e-05, + "loss": 0.4406, + "step": 17053 + }, + { + "epoch": 0.02, + "learning_rate": 4.996454003639977e-05, + "loss": 1.022, + "step": 17054 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964535854186366e-05, + "loss": 1.3239, + "step": 17055 + }, + { + "epoch": 0.02, + "learning_rate": 4.996453167172652e-05, + "loss": 1.2189, + "step": 17056 + }, + { + "epoch": 0.02, + "learning_rate": 4.996452748902024e-05, + "loss": 1.2144, + "step": 17057 + }, + { + "epoch": 0.02, + "learning_rate": 4.996452330606752e-05, + "loss": 0.9751, + "step": 17058 + }, + { + "epoch": 0.02, + "learning_rate": 4.996451912286836e-05, + "loss": 1.2124, + "step": 17059 + }, + { + "epoch": 0.02, + "learning_rate": 4.996451493942276e-05, + "loss": 0.9633, + "step": 17060 + }, + { + "epoch": 0.02, + "learning_rate": 4.996451075573072e-05, + "loss": 1.3018, + "step": 17061 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964506571792236e-05, + "loss": 1.5028, + "step": 17062 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964502387607316e-05, + "loss": 1.2864, + "step": 17063 + }, + { + "epoch": 0.02, + "learning_rate": 4.996449820317596e-05, + "loss": 1.2757, + "step": 17064 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964494018498166e-05, + "loss": 0.9664, + "step": 17065 + }, + { + "epoch": 0.02, + "learning_rate": 4.996448983357393e-05, + "loss": 1.1671, + "step": 17066 + }, + { + "epoch": 0.02, + "learning_rate": 4.996448564840326e-05, + "loss": 1.2138, + "step": 17067 + }, + { + "epoch": 0.02, + "learning_rate": 4.996448146298614e-05, + "loss": 0.9621, + "step": 17068 + }, + { + "epoch": 0.02, + "learning_rate": 4.996447727732259e-05, + "loss": 0.6476, + "step": 17069 + }, + { + "epoch": 0.02, + "learning_rate": 4.99644730914126e-05, + "loss": 0.881, + "step": 17070 + }, + { + "epoch": 0.02, + "learning_rate": 4.996446890525617e-05, + "loss": 1.2651, + "step": 17071 + }, + { + "epoch": 0.02, + "learning_rate": 4.99644647188533e-05, + "loss": 1.1601, + "step": 17072 + }, + { + "epoch": 0.02, + "learning_rate": 4.996446053220399e-05, + "loss": 1.3367, + "step": 17073 + }, + { + "epoch": 0.02, + "learning_rate": 4.996445634530825e-05, + "loss": 1.032, + "step": 17074 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964452158166064e-05, + "loss": 1.2032, + "step": 17075 + }, + { + "epoch": 0.02, + "learning_rate": 4.996444797077745e-05, + "loss": 1.0557, + "step": 17076 + }, + { + "epoch": 0.02, + "learning_rate": 4.996444378314239e-05, + "loss": 1.2644, + "step": 17077 + }, + { + "epoch": 0.02, + "learning_rate": 4.996443959526088e-05, + "loss": 1.0396, + "step": 17078 + }, + { + "epoch": 0.02, + "learning_rate": 4.996443540713295e-05, + "loss": 1.4458, + "step": 17079 + }, + { + "epoch": 0.02, + "learning_rate": 4.996443121875857e-05, + "loss": 1.0424, + "step": 17080 + }, + { + "epoch": 0.02, + "learning_rate": 4.996442703013776e-05, + "loss": 1.0034, + "step": 17081 + }, + { + "epoch": 0.02, + "learning_rate": 4.99644228412705e-05, + "loss": 1.1232, + "step": 17082 + }, + { + "epoch": 0.02, + "learning_rate": 4.996441865215681e-05, + "loss": 1.1754, + "step": 17083 + }, + { + "epoch": 0.02, + "learning_rate": 4.996441446279668e-05, + "loss": 0.9483, + "step": 17084 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964410273190114e-05, + "loss": 0.9419, + "step": 17085 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964406083337114e-05, + "loss": 0.9803, + "step": 17086 + }, + { + "epoch": 0.02, + "learning_rate": 4.996440189323767e-05, + "loss": 0.963, + "step": 17087 + }, + { + "epoch": 0.02, + "learning_rate": 4.996439770289178e-05, + "loss": 0.4295, + "step": 17088 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964393512299465e-05, + "loss": 0.9823, + "step": 17089 + }, + { + "epoch": 0.02, + "learning_rate": 4.99643893214607e-05, + "loss": 0.6973, + "step": 17090 + }, + { + "epoch": 0.02, + "learning_rate": 4.996438513037551e-05, + "loss": 0.9756, + "step": 17091 + }, + { + "epoch": 0.02, + "learning_rate": 4.996438093904388e-05, + "loss": 1.2441, + "step": 17092 + }, + { + "epoch": 0.02, + "learning_rate": 4.99643767474658e-05, + "loss": 1.1462, + "step": 17093 + }, + { + "epoch": 0.02, + "learning_rate": 4.996437255564129e-05, + "loss": 1.0648, + "step": 17094 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964368363570344e-05, + "loss": 1.1123, + "step": 17095 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964364171252966e-05, + "loss": 1.2947, + "step": 17096 + }, + { + "epoch": 0.02, + "learning_rate": 4.996435997868914e-05, + "loss": 1.2598, + "step": 17097 + }, + { + "epoch": 0.02, + "learning_rate": 4.996435578587888e-05, + "loss": 1.3251, + "step": 17098 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964351592822176e-05, + "loss": 0.9466, + "step": 17099 + }, + { + "epoch": 0.02, + "learning_rate": 4.996434739951905e-05, + "loss": 0.7017, + "step": 17100 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964343205969474e-05, + "loss": 1.0579, + "step": 17101 + }, + { + "epoch": 0.02, + "learning_rate": 4.996433901217347e-05, + "loss": 1.1884, + "step": 17102 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964334818131016e-05, + "loss": 0.9634, + "step": 17103 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964330623842125e-05, + "loss": 0.8608, + "step": 17104 + }, + { + "epoch": 0.02, + "learning_rate": 4.996432642930681e-05, + "loss": 1.0821, + "step": 17105 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964322234525045e-05, + "loss": 1.1591, + "step": 17106 + }, + { + "epoch": 0.02, + "learning_rate": 4.996431803949685e-05, + "loss": 1.1362, + "step": 17107 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964313844222215e-05, + "loss": 1.2582, + "step": 17108 + }, + { + "epoch": 0.02, + "learning_rate": 4.996430964870115e-05, + "loss": 1.0, + "step": 17109 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964305452933636e-05, + "loss": 0.9845, + "step": 17110 + }, + { + "epoch": 0.02, + "learning_rate": 4.996430125691969e-05, + "loss": 1.1918, + "step": 17111 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964297060659306e-05, + "loss": 1.1198, + "step": 17112 + }, + { + "epoch": 0.02, + "learning_rate": 4.996429286415248e-05, + "loss": 1.141, + "step": 17113 + }, + { + "epoch": 0.02, + "learning_rate": 4.996428866739923e-05, + "loss": 1.5284, + "step": 17114 + }, + { + "epoch": 0.02, + "learning_rate": 4.996428447039954e-05, + "loss": 1.396, + "step": 17115 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964280273153405e-05, + "loss": 1.1226, + "step": 17116 + }, + { + "epoch": 0.02, + "learning_rate": 4.996427607566084e-05, + "loss": 1.2073, + "step": 17117 + }, + { + "epoch": 0.02, + "learning_rate": 4.996427187792183e-05, + "loss": 1.0523, + "step": 17118 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964267679936395e-05, + "loss": 0.8472, + "step": 17119 + }, + { + "epoch": 0.02, + "learning_rate": 4.996426348170452e-05, + "loss": 0.7644, + "step": 17120 + }, + { + "epoch": 0.02, + "learning_rate": 4.99642592832262e-05, + "loss": 1.1019, + "step": 17121 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964255084501446e-05, + "loss": 1.0891, + "step": 17122 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964250885530265e-05, + "loss": 1.1578, + "step": 17123 + }, + { + "epoch": 0.02, + "learning_rate": 4.996424668631264e-05, + "loss": 1.1067, + "step": 17124 + }, + { + "epoch": 0.02, + "learning_rate": 4.996424248684858e-05, + "loss": 1.0518, + "step": 17125 + }, + { + "epoch": 0.02, + "learning_rate": 4.996423828713808e-05, + "loss": 1.1473, + "step": 17126 + }, + { + "epoch": 0.02, + "learning_rate": 4.996423408718114e-05, + "loss": 1.0619, + "step": 17127 + }, + { + "epoch": 0.02, + "learning_rate": 4.996422988697778e-05, + "loss": 1.2103, + "step": 17128 + }, + { + "epoch": 0.02, + "learning_rate": 4.996422568652796e-05, + "loss": 0.8704, + "step": 17129 + }, + { + "epoch": 0.02, + "learning_rate": 4.996422148583173e-05, + "loss": 1.5559, + "step": 17130 + }, + { + "epoch": 0.02, + "learning_rate": 4.996421728488905e-05, + "loss": 1.4091, + "step": 17131 + }, + { + "epoch": 0.02, + "learning_rate": 4.996421308369993e-05, + "loss": 1.4804, + "step": 17132 + }, + { + "epoch": 0.02, + "learning_rate": 4.996420888226438e-05, + "loss": 1.5789, + "step": 17133 + }, + { + "epoch": 0.02, + "learning_rate": 4.99642046805824e-05, + "loss": 1.7038, + "step": 17134 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964200478653967e-05, + "loss": 1.5351, + "step": 17135 + }, + { + "epoch": 0.02, + "learning_rate": 4.996419627647911e-05, + "loss": 1.5582, + "step": 17136 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964192074057815e-05, + "loss": 1.9338, + "step": 17137 + }, + { + "epoch": 0.02, + "learning_rate": 4.996418787139008e-05, + "loss": 1.1169, + "step": 17138 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964183668475914e-05, + "loss": 1.0119, + "step": 17139 + }, + { + "epoch": 0.02, + "learning_rate": 4.996417946531531e-05, + "loss": 0.8378, + "step": 17140 + }, + { + "epoch": 0.02, + "learning_rate": 4.996417526190827e-05, + "loss": 1.1842, + "step": 17141 + }, + { + "epoch": 0.02, + "learning_rate": 4.996417105825479e-05, + "loss": 0.8782, + "step": 17142 + }, + { + "epoch": 0.02, + "learning_rate": 4.996416685435488e-05, + "loss": 0.9028, + "step": 17143 + }, + { + "epoch": 0.02, + "learning_rate": 4.996416265020853e-05, + "loss": 0.3882, + "step": 17144 + }, + { + "epoch": 0.02, + "learning_rate": 4.996415844581575e-05, + "loss": 0.6216, + "step": 17145 + }, + { + "epoch": 0.02, + "learning_rate": 4.996415424117653e-05, + "loss": 1.2481, + "step": 17146 + }, + { + "epoch": 0.02, + "learning_rate": 4.996415003629088e-05, + "loss": 0.5825, + "step": 17147 + }, + { + "epoch": 0.02, + "learning_rate": 4.996414583115879e-05, + "loss": 0.6968, + "step": 17148 + }, + { + "epoch": 0.02, + "learning_rate": 4.996414162578026e-05, + "loss": 1.1528, + "step": 17149 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964137420155304e-05, + "loss": 1.0865, + "step": 17150 + }, + { + "epoch": 0.02, + "learning_rate": 4.996413321428391e-05, + "loss": 0.8462, + "step": 17151 + }, + { + "epoch": 0.02, + "learning_rate": 4.996412900816608e-05, + "loss": 1.1814, + "step": 17152 + }, + { + "epoch": 0.02, + "learning_rate": 4.996412480180181e-05, + "loss": 1.1778, + "step": 17153 + }, + { + "epoch": 0.02, + "learning_rate": 4.996412059519111e-05, + "loss": 0.781, + "step": 17154 + }, + { + "epoch": 0.02, + "learning_rate": 4.996411638833397e-05, + "loss": 1.3372, + "step": 17155 + }, + { + "epoch": 0.02, + "learning_rate": 4.99641121812304e-05, + "loss": 0.9577, + "step": 17156 + }, + { + "epoch": 0.02, + "learning_rate": 4.996410797388039e-05, + "loss": 0.9539, + "step": 17157 + }, + { + "epoch": 0.02, + "learning_rate": 4.996410376628395e-05, + "loss": 1.015, + "step": 17158 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964099558441066e-05, + "loss": 0.6379, + "step": 17159 + }, + { + "epoch": 0.02, + "learning_rate": 4.996409535035176e-05, + "loss": 1.1556, + "step": 17160 + }, + { + "epoch": 0.02, + "learning_rate": 4.996409114201601e-05, + "loss": 1.1133, + "step": 17161 + }, + { + "epoch": 0.02, + "learning_rate": 4.996408693343383e-05, + "loss": 0.962, + "step": 17162 + }, + { + "epoch": 0.02, + "learning_rate": 4.996408272460521e-05, + "loss": 1.1877, + "step": 17163 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964078515530155e-05, + "loss": 1.0683, + "step": 17164 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964074306208665e-05, + "loss": 0.8889, + "step": 17165 + }, + { + "epoch": 0.02, + "learning_rate": 4.996407009664075e-05, + "loss": 0.7492, + "step": 17166 + }, + { + "epoch": 0.02, + "learning_rate": 4.996406588682638e-05, + "loss": 1.2761, + "step": 17167 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964061676765594e-05, + "loss": 0.9403, + "step": 17168 + }, + { + "epoch": 0.02, + "learning_rate": 4.996405746645837e-05, + "loss": 1.046, + "step": 17169 + }, + { + "epoch": 0.02, + "learning_rate": 4.996405325590471e-05, + "loss": 1.205, + "step": 17170 + }, + { + "epoch": 0.02, + "learning_rate": 4.996404904510461e-05, + "loss": 1.1947, + "step": 17171 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964044834058075e-05, + "loss": 1.1725, + "step": 17172 + }, + { + "epoch": 0.02, + "learning_rate": 4.996404062276512e-05, + "loss": 1.0102, + "step": 17173 + }, + { + "epoch": 0.02, + "learning_rate": 4.996403641122571e-05, + "loss": 1.4921, + "step": 17174 + }, + { + "epoch": 0.02, + "learning_rate": 4.996403219943988e-05, + "loss": 1.1074, + "step": 17175 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964027987407616e-05, + "loss": 1.2726, + "step": 17176 + }, + { + "epoch": 0.02, + "learning_rate": 4.996402377512891e-05, + "loss": 0.974, + "step": 17177 + }, + { + "epoch": 0.02, + "learning_rate": 4.996401956260377e-05, + "loss": 0.9307, + "step": 17178 + }, + { + "epoch": 0.02, + "learning_rate": 4.99640153498322e-05, + "loss": 1.0392, + "step": 17179 + }, + { + "epoch": 0.02, + "learning_rate": 4.996401113681419e-05, + "loss": 0.9007, + "step": 17180 + }, + { + "epoch": 0.02, + "learning_rate": 4.996400692354976e-05, + "loss": 0.8559, + "step": 17181 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964002710038883e-05, + "loss": 0.5702, + "step": 17182 + }, + { + "epoch": 0.02, + "learning_rate": 4.996399849628157e-05, + "loss": 0.6818, + "step": 17183 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963994282277824e-05, + "loss": 1.2182, + "step": 17184 + }, + { + "epoch": 0.02, + "learning_rate": 4.996399006802765e-05, + "loss": 1.3155, + "step": 17185 + }, + { + "epoch": 0.02, + "learning_rate": 4.996398585353104e-05, + "loss": 1.0473, + "step": 17186 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963981638788e-05, + "loss": 1.134, + "step": 17187 + }, + { + "epoch": 0.02, + "learning_rate": 4.996397742379852e-05, + "loss": 1.1505, + "step": 17188 + }, + { + "epoch": 0.02, + "learning_rate": 4.996397320856261e-05, + "loss": 1.2495, + "step": 17189 + }, + { + "epoch": 0.02, + "learning_rate": 4.996396899308026e-05, + "loss": 1.2216, + "step": 17190 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963964777351476e-05, + "loss": 1.4167, + "step": 17191 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963960561376265e-05, + "loss": 1.1755, + "step": 17192 + }, + { + "epoch": 0.02, + "learning_rate": 4.996395634515462e-05, + "loss": 1.3357, + "step": 17193 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963952128686544e-05, + "loss": 1.0604, + "step": 17194 + }, + { + "epoch": 0.02, + "learning_rate": 4.996394791197202e-05, + "loss": 1.2739, + "step": 17195 + }, + { + "epoch": 0.02, + "learning_rate": 4.996394369501108e-05, + "loss": 1.4028, + "step": 17196 + }, + { + "epoch": 0.02, + "learning_rate": 4.99639394778037e-05, + "loss": 1.4299, + "step": 17197 + }, + { + "epoch": 0.02, + "learning_rate": 4.996393526034988e-05, + "loss": 1.3966, + "step": 17198 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963931042649636e-05, + "loss": 1.132, + "step": 17199 + }, + { + "epoch": 0.02, + "learning_rate": 4.996392682470296e-05, + "loss": 1.253, + "step": 17200 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963922606509835e-05, + "loss": 1.5545, + "step": 17201 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963918388070286e-05, + "loss": 1.3841, + "step": 17202 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963914169384305e-05, + "loss": 0.8694, + "step": 17203 + }, + { + "epoch": 0.02, + "learning_rate": 4.996390995045189e-05, + "loss": 0.9877, + "step": 17204 + }, + { + "epoch": 0.02, + "learning_rate": 4.996390573127304e-05, + "loss": 0.9831, + "step": 17205 + }, + { + "epoch": 0.02, + "learning_rate": 4.996390151184776e-05, + "loss": 0.9441, + "step": 17206 + }, + { + "epoch": 0.02, + "learning_rate": 4.996389729217604e-05, + "loss": 1.1723, + "step": 17207 + }, + { + "epoch": 0.02, + "learning_rate": 4.99638930722579e-05, + "loss": 0.7799, + "step": 17208 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963888852093324e-05, + "loss": 1.1345, + "step": 17209 + }, + { + "epoch": 0.02, + "learning_rate": 4.99638846316823e-05, + "loss": 1.2506, + "step": 17210 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963880411024855e-05, + "loss": 1.2413, + "step": 17211 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963876190120975e-05, + "loss": 0.963, + "step": 17212 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963871968970664e-05, + "loss": 1.2428, + "step": 17213 + }, + { + "epoch": 0.02, + "learning_rate": 4.996386774757392e-05, + "loss": 1.1133, + "step": 17214 + }, + { + "epoch": 0.02, + "learning_rate": 4.996386352593074e-05, + "loss": 0.9141, + "step": 17215 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963859304041134e-05, + "loss": 1.0773, + "step": 17216 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963855081905086e-05, + "loss": 1.1935, + "step": 17217 + }, + { + "epoch": 0.02, + "learning_rate": 4.996385085952261e-05, + "loss": 0.822, + "step": 17218 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963846636893706e-05, + "loss": 0.9676, + "step": 17219 + }, + { + "epoch": 0.02, + "learning_rate": 4.996384241401836e-05, + "loss": 1.18, + "step": 17220 + }, + { + "epoch": 0.02, + "learning_rate": 4.996383819089659e-05, + "loss": 0.933, + "step": 17221 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963833967528386e-05, + "loss": 1.2033, + "step": 17222 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963829743913744e-05, + "loss": 0.9014, + "step": 17223 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963825520052676e-05, + "loss": 0.9146, + "step": 17224 + }, + { + "epoch": 0.02, + "learning_rate": 4.996382129594517e-05, + "loss": 1.1156, + "step": 17225 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963817071591236e-05, + "loss": 1.0826, + "step": 17226 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963812846990864e-05, + "loss": 0.9781, + "step": 17227 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963808622144066e-05, + "loss": 0.9874, + "step": 17228 + }, + { + "epoch": 0.02, + "learning_rate": 4.996380439705083e-05, + "loss": 1.2103, + "step": 17229 + }, + { + "epoch": 0.02, + "learning_rate": 4.996380017171117e-05, + "loss": 1.0287, + "step": 17230 + }, + { + "epoch": 0.02, + "learning_rate": 4.996379594612507e-05, + "loss": 1.2416, + "step": 17231 + }, + { + "epoch": 0.02, + "learning_rate": 4.996379172029254e-05, + "loss": 1.2759, + "step": 17232 + }, + { + "epoch": 0.02, + "learning_rate": 4.996378749421358e-05, + "loss": 1.0926, + "step": 17233 + }, + { + "epoch": 0.02, + "learning_rate": 4.996378326788819e-05, + "loss": 1.2005, + "step": 17234 + }, + { + "epoch": 0.02, + "learning_rate": 4.996377904131636e-05, + "loss": 0.7497, + "step": 17235 + }, + { + "epoch": 0.02, + "learning_rate": 4.996377481449811e-05, + "loss": 1.0177, + "step": 17236 + }, + { + "epoch": 0.02, + "learning_rate": 4.996377058743342e-05, + "loss": 1.2318, + "step": 17237 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963766360122296e-05, + "loss": 0.7603, + "step": 17238 + }, + { + "epoch": 0.02, + "learning_rate": 4.996376213256474e-05, + "loss": 1.1657, + "step": 17239 + }, + { + "epoch": 0.02, + "learning_rate": 4.996375790476076e-05, + "loss": 1.1482, + "step": 17240 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963753676710345e-05, + "loss": 1.1145, + "step": 17241 + }, + { + "epoch": 0.02, + "learning_rate": 4.99637494484135e-05, + "loss": 1.2936, + "step": 17242 + }, + { + "epoch": 0.02, + "learning_rate": 4.996374521987022e-05, + "loss": 1.0326, + "step": 17243 + }, + { + "epoch": 0.02, + "learning_rate": 4.996374099108051e-05, + "loss": 1.1578, + "step": 17244 + }, + { + "epoch": 0.02, + "learning_rate": 4.996373676204437e-05, + "loss": 1.2512, + "step": 17245 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963732532761794e-05, + "loss": 0.9417, + "step": 17246 + }, + { + "epoch": 0.02, + "learning_rate": 4.996372830323279e-05, + "loss": 1.1779, + "step": 17247 + }, + { + "epoch": 0.02, + "learning_rate": 4.996372407345735e-05, + "loss": 1.2913, + "step": 17248 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963719843435486e-05, + "loss": 1.5138, + "step": 17249 + }, + { + "epoch": 0.02, + "learning_rate": 4.996371561316718e-05, + "loss": 0.879, + "step": 17250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963711382652454e-05, + "loss": 1.0488, + "step": 17251 + }, + { + "epoch": 0.02, + "learning_rate": 4.996370715189129e-05, + "loss": 0.9471, + "step": 17252 + }, + { + "epoch": 0.02, + "learning_rate": 4.99637029208837e-05, + "loss": 1.1719, + "step": 17253 + }, + { + "epoch": 0.02, + "learning_rate": 4.996369868962968e-05, + "loss": 1.2307, + "step": 17254 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963694458129216e-05, + "loss": 0.8388, + "step": 17255 + }, + { + "epoch": 0.02, + "learning_rate": 4.996369022638233e-05, + "loss": 1.1033, + "step": 17256 + }, + { + "epoch": 0.02, + "learning_rate": 4.996368599438902e-05, + "loss": 0.6805, + "step": 17257 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963681762149274e-05, + "loss": 1.232, + "step": 17258 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963677529663086e-05, + "loss": 1.1412, + "step": 17259 + }, + { + "epoch": 0.02, + "learning_rate": 4.996367329693048e-05, + "loss": 1.1539, + "step": 17260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963669063951434e-05, + "loss": 1.0914, + "step": 17261 + }, + { + "epoch": 0.02, + "learning_rate": 4.996366483072596e-05, + "loss": 1.2681, + "step": 17262 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963660597254066e-05, + "loss": 1.0189, + "step": 17263 + }, + { + "epoch": 0.02, + "learning_rate": 4.996365636353573e-05, + "loss": 1.4464, + "step": 17264 + }, + { + "epoch": 0.02, + "learning_rate": 4.996365212957097e-05, + "loss": 1.2144, + "step": 17265 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963647895359774e-05, + "loss": 1.2996, + "step": 17266 + }, + { + "epoch": 0.02, + "learning_rate": 4.996364366090215e-05, + "loss": 1.2995, + "step": 17267 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963639426198096e-05, + "loss": 1.2946, + "step": 17268 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963635191247605e-05, + "loss": 1.4179, + "step": 17269 + }, + { + "epoch": 0.02, + "learning_rate": 4.996363095605069e-05, + "loss": 1.0535, + "step": 17270 + }, + { + "epoch": 0.02, + "learning_rate": 4.996362672060734e-05, + "loss": 1.051, + "step": 17271 + }, + { + "epoch": 0.02, + "learning_rate": 4.996362248491756e-05, + "loss": 1.2165, + "step": 17272 + }, + { + "epoch": 0.02, + "learning_rate": 4.996361824898136e-05, + "loss": 1.3238, + "step": 17273 + }, + { + "epoch": 0.02, + "learning_rate": 4.996361401279872e-05, + "loss": 0.9395, + "step": 17274 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963609776369646e-05, + "loss": 1.1245, + "step": 17275 + }, + { + "epoch": 0.02, + "learning_rate": 4.996360553969415e-05, + "loss": 1.1777, + "step": 17276 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963601302772225e-05, + "loss": 0.9452, + "step": 17277 + }, + { + "epoch": 0.02, + "learning_rate": 4.996359706560386e-05, + "loss": 1.0391, + "step": 17278 + }, + { + "epoch": 0.02, + "learning_rate": 4.996359282818907e-05, + "loss": 0.9944, + "step": 17279 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963588590527855e-05, + "loss": 1.0862, + "step": 17280 + }, + { + "epoch": 0.02, + "learning_rate": 4.99635843526202e-05, + "loss": 1.0277, + "step": 17281 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963580114466124e-05, + "loss": 0.9585, + "step": 17282 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963575876065614e-05, + "loss": 1.0803, + "step": 17283 + }, + { + "epoch": 0.02, + "learning_rate": 4.996357163741867e-05, + "loss": 0.9859, + "step": 17284 + }, + { + "epoch": 0.02, + "learning_rate": 4.996356739852531e-05, + "loss": 1.0695, + "step": 17285 + }, + { + "epoch": 0.02, + "learning_rate": 4.996356315938551e-05, + "loss": 0.7614, + "step": 17286 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963558919999275e-05, + "loss": 0.9558, + "step": 17287 + }, + { + "epoch": 0.02, + "learning_rate": 4.996355468036662e-05, + "loss": 0.9884, + "step": 17288 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963550440487525e-05, + "loss": 1.2689, + "step": 17289 + }, + { + "epoch": 0.02, + "learning_rate": 4.996354620036201e-05, + "loss": 0.9861, + "step": 17290 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963541959990065e-05, + "loss": 1.0806, + "step": 17291 + }, + { + "epoch": 0.02, + "learning_rate": 4.996353771937169e-05, + "loss": 0.5441, + "step": 17292 + }, + { + "epoch": 0.02, + "learning_rate": 4.996353347850688e-05, + "loss": 0.146, + "step": 17293 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963529237395644e-05, + "loss": 0.1402, + "step": 17294 + }, + { + "epoch": 0.02, + "learning_rate": 4.996352499603798e-05, + "loss": 0.9284, + "step": 17295 + }, + { + "epoch": 0.02, + "learning_rate": 4.996352075443389e-05, + "loss": 1.1727, + "step": 17296 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963516512583364e-05, + "loss": 0.6947, + "step": 17297 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963512270486416e-05, + "loss": 0.7262, + "step": 17298 + }, + { + "epoch": 0.02, + "learning_rate": 4.996350802814303e-05, + "loss": 0.4485, + "step": 17299 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963503785553214e-05, + "loss": 0.2741, + "step": 17300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963499542716975e-05, + "loss": 0.4738, + "step": 17301 + }, + { + "epoch": 0.02, + "learning_rate": 4.99634952996343e-05, + "loss": 0.7866, + "step": 17302 + }, + { + "epoch": 0.02, + "learning_rate": 4.99634910563052e-05, + "loss": 0.7192, + "step": 17303 + }, + { + "epoch": 0.02, + "learning_rate": 4.996348681272968e-05, + "loss": 0.6647, + "step": 17304 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963482568907715e-05, + "loss": 0.7507, + "step": 17305 + }, + { + "epoch": 0.02, + "learning_rate": 4.996347832483933e-05, + "loss": 1.1764, + "step": 17306 + }, + { + "epoch": 0.02, + "learning_rate": 4.996347408052452e-05, + "loss": 0.9444, + "step": 17307 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963469835963276e-05, + "loss": 1.1251, + "step": 17308 + }, + { + "epoch": 0.02, + "learning_rate": 4.99634655911556e-05, + "loss": 1.3977, + "step": 17309 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963461346101494e-05, + "loss": 1.1988, + "step": 17310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963457100800965e-05, + "loss": 1.2108, + "step": 17311 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963452855254004e-05, + "loss": 1.4584, + "step": 17312 + }, + { + "epoch": 0.02, + "learning_rate": 4.996344860946062e-05, + "loss": 0.8228, + "step": 17313 + }, + { + "epoch": 0.02, + "learning_rate": 4.99634443634208e-05, + "loss": 0.9375, + "step": 17314 + }, + { + "epoch": 0.02, + "learning_rate": 4.996344011713455e-05, + "loss": 1.0138, + "step": 17315 + }, + { + "epoch": 0.02, + "learning_rate": 4.996343587060188e-05, + "loss": 0.8541, + "step": 17316 + }, + { + "epoch": 0.02, + "learning_rate": 4.996343162382278e-05, + "loss": 1.1414, + "step": 17317 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963427376797244e-05, + "loss": 0.9219, + "step": 17318 + }, + { + "epoch": 0.02, + "learning_rate": 4.996342312952529e-05, + "loss": 1.2181, + "step": 17319 + }, + { + "epoch": 0.02, + "learning_rate": 4.99634188820069e-05, + "loss": 0.8825, + "step": 17320 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963414634242084e-05, + "loss": 0.9772, + "step": 17321 + }, + { + "epoch": 0.02, + "learning_rate": 4.996341038623084e-05, + "loss": 1.1053, + "step": 17322 + }, + { + "epoch": 0.02, + "learning_rate": 4.996340613797317e-05, + "loss": 1.2224, + "step": 17323 + }, + { + "epoch": 0.02, + "learning_rate": 4.996340188946907e-05, + "loss": 0.9532, + "step": 17324 + }, + { + "epoch": 0.02, + "learning_rate": 4.996339764071853e-05, + "loss": 0.891, + "step": 17325 + }, + { + "epoch": 0.02, + "learning_rate": 4.996339339172158e-05, + "loss": 0.9064, + "step": 17326 + }, + { + "epoch": 0.02, + "learning_rate": 4.996338914247819e-05, + "loss": 1.7453, + "step": 17327 + }, + { + "epoch": 0.02, + "learning_rate": 4.996338489298839e-05, + "loss": 1.1493, + "step": 17328 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963380643252137e-05, + "loss": 0.7362, + "step": 17329 + }, + { + "epoch": 0.02, + "learning_rate": 4.996337639326947e-05, + "loss": 1.2371, + "step": 17330 + }, + { + "epoch": 0.02, + "learning_rate": 4.996337214304038e-05, + "loss": 1.1131, + "step": 17331 + }, + { + "epoch": 0.02, + "learning_rate": 4.996336789256485e-05, + "loss": 0.9624, + "step": 17332 + }, + { + "epoch": 0.02, + "learning_rate": 4.99633636418429e-05, + "loss": 0.9961, + "step": 17333 + }, + { + "epoch": 0.02, + "learning_rate": 4.996335939087451e-05, + "loss": 0.8062, + "step": 17334 + }, + { + "epoch": 0.02, + "learning_rate": 4.996335513965971e-05, + "loss": 0.9296, + "step": 17335 + }, + { + "epoch": 0.02, + "learning_rate": 4.996335088819847e-05, + "loss": 0.9914, + "step": 17336 + }, + { + "epoch": 0.02, + "learning_rate": 4.99633466364908e-05, + "loss": 1.1337, + "step": 17337 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963342384536713e-05, + "loss": 1.2149, + "step": 17338 + }, + { + "epoch": 0.02, + "learning_rate": 4.996333813233619e-05, + "loss": 1.1082, + "step": 17339 + }, + { + "epoch": 0.02, + "learning_rate": 4.996333387988925e-05, + "loss": 1.0661, + "step": 17340 + }, + { + "epoch": 0.02, + "learning_rate": 4.996332962719587e-05, + "loss": 0.9451, + "step": 17341 + }, + { + "epoch": 0.02, + "learning_rate": 4.996332537425607e-05, + "loss": 1.2898, + "step": 17342 + }, + { + "epoch": 0.02, + "learning_rate": 4.996332112106984e-05, + "loss": 1.193, + "step": 17343 + }, + { + "epoch": 0.02, + "learning_rate": 4.996331686763718e-05, + "loss": 1.0381, + "step": 17344 + }, + { + "epoch": 0.02, + "learning_rate": 4.99633126139581e-05, + "loss": 1.0595, + "step": 17345 + }, + { + "epoch": 0.02, + "learning_rate": 4.996330836003259e-05, + "loss": 0.9709, + "step": 17346 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963304105860654e-05, + "loss": 1.5223, + "step": 17347 + }, + { + "epoch": 0.02, + "learning_rate": 4.996329985144228e-05, + "loss": 1.2276, + "step": 17348 + }, + { + "epoch": 0.02, + "learning_rate": 4.996329559677749e-05, + "loss": 1.1893, + "step": 17349 + }, + { + "epoch": 0.02, + "learning_rate": 4.996329134186627e-05, + "loss": 0.9225, + "step": 17350 + }, + { + "epoch": 0.02, + "learning_rate": 4.996328708670862e-05, + "loss": 1.0711, + "step": 17351 + }, + { + "epoch": 0.02, + "learning_rate": 4.996328283130455e-05, + "loss": 0.989, + "step": 17352 + }, + { + "epoch": 0.02, + "learning_rate": 4.996327857565405e-05, + "loss": 1.1306, + "step": 17353 + }, + { + "epoch": 0.02, + "learning_rate": 4.996327431975712e-05, + "loss": 1.1915, + "step": 17354 + }, + { + "epoch": 0.02, + "learning_rate": 4.996327006361377e-05, + "loss": 1.1522, + "step": 17355 + }, + { + "epoch": 0.02, + "learning_rate": 4.996326580722398e-05, + "loss": 0.9981, + "step": 17356 + }, + { + "epoch": 0.02, + "learning_rate": 4.996326155058777e-05, + "loss": 1.1472, + "step": 17357 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963257293705136e-05, + "loss": 1.1636, + "step": 17358 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963253036576077e-05, + "loss": 0.9603, + "step": 17359 + }, + { + "epoch": 0.02, + "learning_rate": 4.996324877920059e-05, + "loss": 0.9564, + "step": 17360 + }, + { + "epoch": 0.02, + "learning_rate": 4.996324452157867e-05, + "loss": 1.0739, + "step": 17361 + }, + { + "epoch": 0.02, + "learning_rate": 4.996324026371033e-05, + "loss": 0.8183, + "step": 17362 + }, + { + "epoch": 0.02, + "learning_rate": 4.996323600559556e-05, + "loss": 1.1497, + "step": 17363 + }, + { + "epoch": 0.02, + "learning_rate": 4.996323174723436e-05, + "loss": 0.8956, + "step": 17364 + }, + { + "epoch": 0.02, + "learning_rate": 4.996322748862674e-05, + "loss": 0.8879, + "step": 17365 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963223229772696e-05, + "loss": 0.7887, + "step": 17366 + }, + { + "epoch": 0.02, + "learning_rate": 4.996321897067222e-05, + "loss": 1.0428, + "step": 17367 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963214711325314e-05, + "loss": 1.2277, + "step": 17368 + }, + { + "epoch": 0.02, + "learning_rate": 4.996321045173199e-05, + "loss": 1.1084, + "step": 17369 + }, + { + "epoch": 0.02, + "learning_rate": 4.996320619189223e-05, + "loss": 0.7871, + "step": 17370 + }, + { + "epoch": 0.02, + "learning_rate": 4.996320193180606e-05, + "loss": 1.0237, + "step": 17371 + }, + { + "epoch": 0.02, + "learning_rate": 4.996319767147345e-05, + "loss": 0.7141, + "step": 17372 + }, + { + "epoch": 0.02, + "learning_rate": 4.996319341089441e-05, + "loss": 0.8453, + "step": 17373 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963189150068956e-05, + "loss": 1.4282, + "step": 17374 + }, + { + "epoch": 0.02, + "learning_rate": 4.996318488899707e-05, + "loss": 1.3896, + "step": 17375 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963180627678765e-05, + "loss": 0.9688, + "step": 17376 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963176366114024e-05, + "loss": 1.2528, + "step": 17377 + }, + { + "epoch": 0.02, + "learning_rate": 4.996317210430286e-05, + "loss": 1.1309, + "step": 17378 + }, + { + "epoch": 0.02, + "learning_rate": 4.996316784224527e-05, + "loss": 1.1008, + "step": 17379 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963163579941255e-05, + "loss": 1.172, + "step": 17380 + }, + { + "epoch": 0.02, + "learning_rate": 4.996315931739082e-05, + "loss": 1.0076, + "step": 17381 + }, + { + "epoch": 0.02, + "learning_rate": 4.996315505459395e-05, + "loss": 1.0946, + "step": 17382 + }, + { + "epoch": 0.02, + "learning_rate": 4.996315079155066e-05, + "loss": 1.2352, + "step": 17383 + }, + { + "epoch": 0.02, + "learning_rate": 4.996314652826094e-05, + "loss": 1.1527, + "step": 17384 + }, + { + "epoch": 0.02, + "learning_rate": 4.99631422647248e-05, + "loss": 1.0632, + "step": 17385 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963138000942226e-05, + "loss": 1.0584, + "step": 17386 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963133736913236e-05, + "loss": 0.9125, + "step": 17387 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963129472637814e-05, + "loss": 0.8828, + "step": 17388 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963125208115966e-05, + "loss": 1.3073, + "step": 17389 + }, + { + "epoch": 0.02, + "learning_rate": 4.99631209433477e-05, + "loss": 1.1409, + "step": 17390 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963116678333e-05, + "loss": 1.2063, + "step": 17391 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963112413071875e-05, + "loss": 1.0319, + "step": 17392 + }, + { + "epoch": 0.02, + "learning_rate": 4.996310814756433e-05, + "loss": 1.2216, + "step": 17393 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963103881810356e-05, + "loss": 1.2572, + "step": 17394 + }, + { + "epoch": 0.02, + "learning_rate": 4.996309961580996e-05, + "loss": 1.2577, + "step": 17395 + }, + { + "epoch": 0.02, + "learning_rate": 4.996309534956314e-05, + "loss": 1.3898, + "step": 17396 + }, + { + "epoch": 0.02, + "learning_rate": 4.996309108306989e-05, + "loss": 1.0667, + "step": 17397 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963086816330217e-05, + "loss": 1.2333, + "step": 17398 + }, + { + "epoch": 0.02, + "learning_rate": 4.996308254934412e-05, + "loss": 1.1824, + "step": 17399 + }, + { + "epoch": 0.02, + "learning_rate": 4.99630782821116e-05, + "loss": 1.2248, + "step": 17400 + }, + { + "epoch": 0.02, + "learning_rate": 4.996307401463265e-05, + "loss": 0.8983, + "step": 17401 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963069746907274e-05, + "loss": 0.6246, + "step": 17402 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963065478935475e-05, + "loss": 0.5794, + "step": 17403 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963061210717256e-05, + "loss": 0.6263, + "step": 17404 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963056942252606e-05, + "loss": 1.0083, + "step": 17405 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963052673541536e-05, + "loss": 0.9281, + "step": 17406 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963048404584034e-05, + "loss": 1.0358, + "step": 17407 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963044135380113e-05, + "loss": 1.281, + "step": 17408 + }, + { + "epoch": 0.02, + "learning_rate": 4.996303986592977e-05, + "loss": 1.1101, + "step": 17409 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963035596233e-05, + "loss": 1.0381, + "step": 17410 + }, + { + "epoch": 0.02, + "learning_rate": 4.99630313262898e-05, + "loss": 0.8644, + "step": 17411 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963027056100174e-05, + "loss": 1.4455, + "step": 17412 + }, + { + "epoch": 0.02, + "learning_rate": 4.996302278566414e-05, + "loss": 1.4179, + "step": 17413 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963018514981665e-05, + "loss": 1.4093, + "step": 17414 + }, + { + "epoch": 0.02, + "learning_rate": 4.996301424405278e-05, + "loss": 1.1314, + "step": 17415 + }, + { + "epoch": 0.02, + "learning_rate": 4.996300997287745e-05, + "loss": 1.0532, + "step": 17416 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963005701455715e-05, + "loss": 1.3066, + "step": 17417 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963001429787545e-05, + "loss": 1.1634, + "step": 17418 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962997157872956e-05, + "loss": 1.0959, + "step": 17419 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962992885711935e-05, + "loss": 1.076, + "step": 17420 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962988613304495e-05, + "loss": 1.2153, + "step": 17421 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962984340650636e-05, + "loss": 1.1288, + "step": 17422 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962980067750345e-05, + "loss": 0.9758, + "step": 17423 + }, + { + "epoch": 0.02, + "learning_rate": 4.996297579460364e-05, + "loss": 1.1316, + "step": 17424 + }, + { + "epoch": 0.02, + "learning_rate": 4.99629715212105e-05, + "loss": 1.2153, + "step": 17425 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962967247570945e-05, + "loss": 1.189, + "step": 17426 + }, + { + "epoch": 0.02, + "learning_rate": 4.996296297368496e-05, + "loss": 1.1286, + "step": 17427 + }, + { + "epoch": 0.02, + "learning_rate": 4.996295869955255e-05, + "loss": 1.1278, + "step": 17428 + }, + { + "epoch": 0.02, + "learning_rate": 4.996295442517373e-05, + "loss": 1.1855, + "step": 17429 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962950150548464e-05, + "loss": 0.951, + "step": 17430 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962945875676795e-05, + "loss": 1.014, + "step": 17431 + }, + { + "epoch": 0.02, + "learning_rate": 4.996294160055869e-05, + "loss": 1.0577, + "step": 17432 + }, + { + "epoch": 0.02, + "learning_rate": 4.996293732519416e-05, + "loss": 1.3129, + "step": 17433 + }, + { + "epoch": 0.02, + "learning_rate": 4.996293304958321e-05, + "loss": 1.0192, + "step": 17434 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962928773725836e-05, + "loss": 1.1545, + "step": 17435 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962924497622046e-05, + "loss": 0.7061, + "step": 17436 + }, + { + "epoch": 0.02, + "learning_rate": 4.996292022127182e-05, + "loss": 1.3276, + "step": 17437 + }, + { + "epoch": 0.02, + "learning_rate": 4.996291594467518e-05, + "loss": 1.2633, + "step": 17438 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962911667832115e-05, + "loss": 1.3611, + "step": 17439 + }, + { + "epoch": 0.02, + "learning_rate": 4.996290739074262e-05, + "loss": 1.0487, + "step": 17440 + }, + { + "epoch": 0.02, + "learning_rate": 4.996290311340671e-05, + "loss": 1.0328, + "step": 17441 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962898835824374e-05, + "loss": 0.5902, + "step": 17442 + }, + { + "epoch": 0.02, + "learning_rate": 4.996289455799561e-05, + "loss": 1.2433, + "step": 17443 + }, + { + "epoch": 0.02, + "learning_rate": 4.996289027992043e-05, + "loss": 1.1824, + "step": 17444 + }, + { + "epoch": 0.02, + "learning_rate": 4.996288600159882e-05, + "loss": 1.2213, + "step": 17445 + }, + { + "epoch": 0.02, + "learning_rate": 4.99628817230308e-05, + "loss": 1.281, + "step": 17446 + }, + { + "epoch": 0.02, + "learning_rate": 4.996287744421634e-05, + "loss": 0.8739, + "step": 17447 + }, + { + "epoch": 0.02, + "learning_rate": 4.996287316515547e-05, + "loss": 1.1386, + "step": 17448 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962868885848166e-05, + "loss": 1.3728, + "step": 17449 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962864606294444e-05, + "loss": 1.2323, + "step": 17450 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962860326494304e-05, + "loss": 1.1179, + "step": 17451 + }, + { + "epoch": 0.02, + "learning_rate": 4.996285604644774e-05, + "loss": 0.8826, + "step": 17452 + }, + { + "epoch": 0.02, + "learning_rate": 4.996285176615475e-05, + "loss": 1.0244, + "step": 17453 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962847485615336e-05, + "loss": 0.6633, + "step": 17454 + }, + { + "epoch": 0.02, + "learning_rate": 4.99628432048295e-05, + "loss": 0.6568, + "step": 17455 + }, + { + "epoch": 0.02, + "learning_rate": 4.996283892379724e-05, + "loss": 0.7697, + "step": 17456 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962834642518565e-05, + "loss": 1.0342, + "step": 17457 + }, + { + "epoch": 0.02, + "learning_rate": 4.996283036099346e-05, + "loss": 1.1591, + "step": 17458 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962826079221934e-05, + "loss": 1.1062, + "step": 17459 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962821797203984e-05, + "loss": 1.3233, + "step": 17460 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962817514939615e-05, + "loss": 1.4055, + "step": 17461 + }, + { + "epoch": 0.02, + "learning_rate": 4.996281323242882e-05, + "loss": 0.9421, + "step": 17462 + }, + { + "epoch": 0.02, + "learning_rate": 4.99628089496716e-05, + "loss": 0.5923, + "step": 17463 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962804666667974e-05, + "loss": 1.2461, + "step": 17464 + }, + { + "epoch": 0.02, + "learning_rate": 4.996280038341791e-05, + "loss": 0.9632, + "step": 17465 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962796099921425e-05, + "loss": 1.1671, + "step": 17466 + }, + { + "epoch": 0.02, + "learning_rate": 4.996279181617852e-05, + "loss": 1.3371, + "step": 17467 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962787532189195e-05, + "loss": 1.2114, + "step": 17468 + }, + { + "epoch": 0.02, + "learning_rate": 4.996278324795345e-05, + "loss": 1.2673, + "step": 17469 + }, + { + "epoch": 0.02, + "learning_rate": 4.996277896347128e-05, + "loss": 1.2273, + "step": 17470 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962774678742684e-05, + "loss": 1.0447, + "step": 17471 + }, + { + "epoch": 0.02, + "learning_rate": 4.996277039376767e-05, + "loss": 1.0823, + "step": 17472 + }, + { + "epoch": 0.02, + "learning_rate": 4.996276610854623e-05, + "loss": 1.1767, + "step": 17473 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962761823078377e-05, + "loss": 1.2755, + "step": 17474 + }, + { + "epoch": 0.02, + "learning_rate": 4.996275753736409e-05, + "loss": 0.9984, + "step": 17475 + }, + { + "epoch": 0.02, + "learning_rate": 4.996275325140339e-05, + "loss": 1.0276, + "step": 17476 + }, + { + "epoch": 0.02, + "learning_rate": 4.996274896519627e-05, + "loss": 1.3435, + "step": 17477 + }, + { + "epoch": 0.02, + "learning_rate": 4.996274467874272e-05, + "loss": 1.2194, + "step": 17478 + }, + { + "epoch": 0.02, + "learning_rate": 4.996274039204275e-05, + "loss": 1.3562, + "step": 17479 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962736105096365e-05, + "loss": 1.0642, + "step": 17480 + }, + { + "epoch": 0.02, + "learning_rate": 4.996273181790355e-05, + "loss": 1.0313, + "step": 17481 + }, + { + "epoch": 0.02, + "learning_rate": 4.996272753046432e-05, + "loss": 0.9993, + "step": 17482 + }, + { + "epoch": 0.02, + "learning_rate": 4.996272324277866e-05, + "loss": 1.1906, + "step": 17483 + }, + { + "epoch": 0.02, + "learning_rate": 4.996271895484659e-05, + "loss": 1.0643, + "step": 17484 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962714666668096e-05, + "loss": 1.1569, + "step": 17485 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962710378243175e-05, + "loss": 1.4357, + "step": 17486 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962706089571835e-05, + "loss": 1.1304, + "step": 17487 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962701800654076e-05, + "loss": 1.5991, + "step": 17488 + }, + { + "epoch": 0.02, + "learning_rate": 4.99626975114899e-05, + "loss": 1.5954, + "step": 17489 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962693222079295e-05, + "loss": 1.3534, + "step": 17490 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962688932422267e-05, + "loss": 0.2291, + "step": 17491 + }, + { + "epoch": 0.02, + "learning_rate": 4.996268464251882e-05, + "loss": 0.1923, + "step": 17492 + }, + { + "epoch": 0.02, + "learning_rate": 4.996268035236895e-05, + "loss": 0.1576, + "step": 17493 + }, + { + "epoch": 0.02, + "learning_rate": 4.996267606197267e-05, + "loss": 0.8841, + "step": 17494 + }, + { + "epoch": 0.02, + "learning_rate": 4.996267177132996e-05, + "loss": 1.2412, + "step": 17495 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962667480440826e-05, + "loss": 1.2512, + "step": 17496 + }, + { + "epoch": 0.02, + "learning_rate": 4.996266318930528e-05, + "loss": 1.2207, + "step": 17497 + }, + { + "epoch": 0.02, + "learning_rate": 4.996265889792331e-05, + "loss": 0.8542, + "step": 17498 + }, + { + "epoch": 0.02, + "learning_rate": 4.996265460629491e-05, + "loss": 0.9536, + "step": 17499 + }, + { + "epoch": 0.02, + "learning_rate": 4.99626503144201e-05, + "loss": 1.121, + "step": 17500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0300171375274658, + "eval_runtime": 114.2587, + "eval_samples_per_second": 12.122, + "eval_steps_per_second": 3.037, + "step": 17500 + }, + { + "epoch": 0.02, + "learning_rate": 4.996264602229886e-05, + "loss": 1.0129, + "step": 17501 + }, + { + "epoch": 0.02, + "learning_rate": 4.996264172993121e-05, + "loss": 1.2211, + "step": 17502 + }, + { + "epoch": 0.02, + "learning_rate": 4.996263743731714e-05, + "loss": 1.1894, + "step": 17503 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962633144456636e-05, + "loss": 1.2215, + "step": 17504 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962628851349724e-05, + "loss": 1.1546, + "step": 17505 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962624557996385e-05, + "loss": 1.0137, + "step": 17506 + }, + { + "epoch": 0.02, + "learning_rate": 4.996262026439663e-05, + "loss": 0.747, + "step": 17507 + }, + { + "epoch": 0.02, + "learning_rate": 4.996261597055045e-05, + "loss": 1.1863, + "step": 17508 + }, + { + "epoch": 0.02, + "learning_rate": 4.996261167645785e-05, + "loss": 0.9583, + "step": 17509 + }, + { + "epoch": 0.02, + "learning_rate": 4.996260738211883e-05, + "loss": 1.0171, + "step": 17510 + }, + { + "epoch": 0.02, + "learning_rate": 4.996260308753339e-05, + "loss": 1.4778, + "step": 17511 + }, + { + "epoch": 0.02, + "learning_rate": 4.996259879270153e-05, + "loss": 1.4268, + "step": 17512 + }, + { + "epoch": 0.02, + "learning_rate": 4.996259449762325e-05, + "loss": 1.1606, + "step": 17513 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962590202298554e-05, + "loss": 1.011, + "step": 17514 + }, + { + "epoch": 0.02, + "learning_rate": 4.996258590672743e-05, + "loss": 0.9438, + "step": 17515 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962581610909885e-05, + "loss": 0.9229, + "step": 17516 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962577314845926e-05, + "loss": 1.1587, + "step": 17517 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962573018535555e-05, + "loss": 1.0432, + "step": 17518 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962568721978745e-05, + "loss": 0.9585, + "step": 17519 + }, + { + "epoch": 0.02, + "learning_rate": 4.996256442517553e-05, + "loss": 1.192, + "step": 17520 + }, + { + "epoch": 0.02, + "learning_rate": 4.996256012812589e-05, + "loss": 1.2137, + "step": 17521 + }, + { + "epoch": 0.02, + "learning_rate": 4.996255583082983e-05, + "loss": 0.953, + "step": 17522 + }, + { + "epoch": 0.02, + "learning_rate": 4.996255153328735e-05, + "loss": 0.4916, + "step": 17523 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962547235498446e-05, + "loss": 0.9689, + "step": 17524 + }, + { + "epoch": 0.02, + "learning_rate": 4.996254293746313e-05, + "loss": 1.0869, + "step": 17525 + }, + { + "epoch": 0.02, + "learning_rate": 4.996253863918139e-05, + "loss": 1.2203, + "step": 17526 + }, + { + "epoch": 0.02, + "learning_rate": 4.996253434065323e-05, + "loss": 1.1731, + "step": 17527 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962530041878654e-05, + "loss": 0.8759, + "step": 17528 + }, + { + "epoch": 0.02, + "learning_rate": 4.996252574285765e-05, + "loss": 0.9557, + "step": 17529 + }, + { + "epoch": 0.02, + "learning_rate": 4.996252144359024e-05, + "loss": 1.3017, + "step": 17530 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962517144076396e-05, + "loss": 1.0891, + "step": 17531 + }, + { + "epoch": 0.02, + "learning_rate": 4.996251284431614e-05, + "loss": 0.9834, + "step": 17532 + }, + { + "epoch": 0.02, + "learning_rate": 4.996250854430946e-05, + "loss": 1.1332, + "step": 17533 + }, + { + "epoch": 0.02, + "learning_rate": 4.996250424405637e-05, + "loss": 1.5566, + "step": 17534 + }, + { + "epoch": 0.02, + "learning_rate": 4.996249994355685e-05, + "loss": 1.0745, + "step": 17535 + }, + { + "epoch": 0.02, + "learning_rate": 4.996249564281092e-05, + "loss": 0.9592, + "step": 17536 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962491341818564e-05, + "loss": 0.7865, + "step": 17537 + }, + { + "epoch": 0.02, + "learning_rate": 4.996248704057979e-05, + "loss": 0.8038, + "step": 17538 + }, + { + "epoch": 0.02, + "learning_rate": 4.99624827390946e-05, + "loss": 1.1383, + "step": 17539 + }, + { + "epoch": 0.02, + "learning_rate": 4.996247843736299e-05, + "loss": 1.1819, + "step": 17540 + }, + { + "epoch": 0.02, + "learning_rate": 4.996247413538496e-05, + "loss": 1.2586, + "step": 17541 + }, + { + "epoch": 0.02, + "learning_rate": 4.996246983316051e-05, + "loss": 1.1353, + "step": 17542 + }, + { + "epoch": 0.02, + "learning_rate": 4.996246553068964e-05, + "loss": 1.0877, + "step": 17543 + }, + { + "epoch": 0.02, + "learning_rate": 4.996246122797236e-05, + "loss": 1.3518, + "step": 17544 + }, + { + "epoch": 0.02, + "learning_rate": 4.996245692500865e-05, + "loss": 0.7672, + "step": 17545 + }, + { + "epoch": 0.02, + "learning_rate": 4.996245262179853e-05, + "loss": 0.816, + "step": 17546 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962448318341984e-05, + "loss": 1.0396, + "step": 17547 + }, + { + "epoch": 0.02, + "learning_rate": 4.996244401463902e-05, + "loss": 0.8345, + "step": 17548 + }, + { + "epoch": 0.02, + "learning_rate": 4.996243971068964e-05, + "loss": 0.7996, + "step": 17549 + }, + { + "epoch": 0.02, + "learning_rate": 4.996243540649384e-05, + "loss": 1.3654, + "step": 17550 + }, + { + "epoch": 0.02, + "learning_rate": 4.996243110205162e-05, + "loss": 1.2618, + "step": 17551 + }, + { + "epoch": 0.02, + "learning_rate": 4.996242679736298e-05, + "loss": 1.0082, + "step": 17552 + }, + { + "epoch": 0.02, + "learning_rate": 4.996242249242793e-05, + "loss": 1.0619, + "step": 17553 + }, + { + "epoch": 0.02, + "learning_rate": 4.996241818724645e-05, + "loss": 1.2977, + "step": 17554 + }, + { + "epoch": 0.02, + "learning_rate": 4.996241388181856e-05, + "loss": 1.0483, + "step": 17555 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962409576144254e-05, + "loss": 0.9183, + "step": 17556 + }, + { + "epoch": 0.02, + "learning_rate": 4.996240527022352e-05, + "loss": 1.1413, + "step": 17557 + }, + { + "epoch": 0.02, + "learning_rate": 4.996240096405638e-05, + "loss": 0.8811, + "step": 17558 + }, + { + "epoch": 0.02, + "learning_rate": 4.996239665764281e-05, + "loss": 1.2113, + "step": 17559 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962392350982826e-05, + "loss": 1.7992, + "step": 17560 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962388044076425e-05, + "loss": 1.382, + "step": 17561 + }, + { + "epoch": 0.02, + "learning_rate": 4.99623837369236e-05, + "loss": 1.8228, + "step": 17562 + }, + { + "epoch": 0.02, + "learning_rate": 4.996237942952437e-05, + "loss": 1.6389, + "step": 17563 + }, + { + "epoch": 0.02, + "learning_rate": 4.996237512187871e-05, + "loss": 1.0656, + "step": 17564 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962370813986634e-05, + "loss": 0.7273, + "step": 17565 + }, + { + "epoch": 0.02, + "learning_rate": 4.996236650584813e-05, + "loss": 0.9601, + "step": 17566 + }, + { + "epoch": 0.02, + "learning_rate": 4.996236219746323e-05, + "loss": 0.9904, + "step": 17567 + }, + { + "epoch": 0.02, + "learning_rate": 4.99623578888319e-05, + "loss": 1.0461, + "step": 17568 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962353579954155e-05, + "loss": 1.2404, + "step": 17569 + }, + { + "epoch": 0.02, + "learning_rate": 4.996234927082999e-05, + "loss": 0.8456, + "step": 17570 + }, + { + "epoch": 0.02, + "learning_rate": 4.996234496145941e-05, + "loss": 1.2965, + "step": 17571 + }, + { + "epoch": 0.02, + "learning_rate": 4.996234065184241e-05, + "loss": 1.2, + "step": 17572 + }, + { + "epoch": 0.02, + "learning_rate": 4.996233634197899e-05, + "loss": 0.7102, + "step": 17573 + }, + { + "epoch": 0.02, + "learning_rate": 4.996233203186915e-05, + "loss": 1.0808, + "step": 17574 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962327721512905e-05, + "loss": 1.0467, + "step": 17575 + }, + { + "epoch": 0.02, + "learning_rate": 4.996232341091023e-05, + "loss": 1.1065, + "step": 17576 + }, + { + "epoch": 0.02, + "learning_rate": 4.996231910006114e-05, + "loss": 1.1417, + "step": 17577 + }, + { + "epoch": 0.02, + "learning_rate": 4.996231478896563e-05, + "loss": 1.1984, + "step": 17578 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962310477623714e-05, + "loss": 1.3023, + "step": 17579 + }, + { + "epoch": 0.02, + "learning_rate": 4.996230616603537e-05, + "loss": 1.1139, + "step": 17580 + }, + { + "epoch": 0.02, + "learning_rate": 4.996230185420061e-05, + "loss": 2.0879, + "step": 17581 + }, + { + "epoch": 0.02, + "learning_rate": 4.996229754211944e-05, + "loss": 1.7322, + "step": 17582 + }, + { + "epoch": 0.02, + "learning_rate": 4.996229322979185e-05, + "loss": 1.205, + "step": 17583 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962288917217835e-05, + "loss": 1.0267, + "step": 17584 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962284604397416e-05, + "loss": 1.4575, + "step": 17585 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962280291330565e-05, + "loss": 1.1628, + "step": 17586 + }, + { + "epoch": 0.02, + "learning_rate": 4.99622759780173e-05, + "loss": 1.0927, + "step": 17587 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962271664457626e-05, + "loss": 0.9515, + "step": 17588 + }, + { + "epoch": 0.02, + "learning_rate": 4.996226735065153e-05, + "loss": 0.8999, + "step": 17589 + }, + { + "epoch": 0.02, + "learning_rate": 4.996226303659902e-05, + "loss": 1.1061, + "step": 17590 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962258722300095e-05, + "loss": 0.9472, + "step": 17591 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962254407754744e-05, + "loss": 0.8737, + "step": 17592 + }, + { + "epoch": 0.02, + "learning_rate": 4.996225009296298e-05, + "loss": 1.2111, + "step": 17593 + }, + { + "epoch": 0.02, + "learning_rate": 4.99622457779248e-05, + "loss": 1.0294, + "step": 17594 + }, + { + "epoch": 0.02, + "learning_rate": 4.99622414626402e-05, + "loss": 0.6605, + "step": 17595 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962237147109195e-05, + "loss": 0.8284, + "step": 17596 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962232831331763e-05, + "loss": 0.9766, + "step": 17597 + }, + { + "epoch": 0.02, + "learning_rate": 4.996222851530792e-05, + "loss": 1.1289, + "step": 17598 + }, + { + "epoch": 0.02, + "learning_rate": 4.996222419903765e-05, + "loss": 1.2024, + "step": 17599 + }, + { + "epoch": 0.02, + "learning_rate": 4.996221988252097e-05, + "loss": 1.1505, + "step": 17600 + }, + { + "epoch": 0.02, + "learning_rate": 4.996221556575788e-05, + "loss": 0.985, + "step": 17601 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962211248748366e-05, + "loss": 1.0529, + "step": 17602 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962206931492436e-05, + "loss": 1.0801, + "step": 17603 + }, + { + "epoch": 0.02, + "learning_rate": 4.996220261399009e-05, + "loss": 0.9786, + "step": 17604 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962198296241325e-05, + "loss": 1.1808, + "step": 17605 + }, + { + "epoch": 0.02, + "learning_rate": 4.996219397824615e-05, + "loss": 0.9943, + "step": 17606 + }, + { + "epoch": 0.02, + "learning_rate": 4.996218966000455e-05, + "loss": 0.9549, + "step": 17607 + }, + { + "epoch": 0.02, + "learning_rate": 4.996218534151654e-05, + "loss": 1.1674, + "step": 17608 + }, + { + "epoch": 0.02, + "learning_rate": 4.996218102278212e-05, + "loss": 1.1715, + "step": 17609 + }, + { + "epoch": 0.02, + "learning_rate": 4.996217670380127e-05, + "loss": 0.9442, + "step": 17610 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962172384574016e-05, + "loss": 1.1659, + "step": 17611 + }, + { + "epoch": 0.02, + "learning_rate": 4.996216806510034e-05, + "loss": 0.6341, + "step": 17612 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962163745380245e-05, + "loss": 0.158, + "step": 17613 + }, + { + "epoch": 0.02, + "learning_rate": 4.996215942541374e-05, + "loss": 0.8477, + "step": 17614 + }, + { + "epoch": 0.02, + "learning_rate": 4.996215510520081e-05, + "loss": 0.924, + "step": 17615 + }, + { + "epoch": 0.02, + "learning_rate": 4.996215078474148e-05, + "loss": 0.8934, + "step": 17616 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962146464035724e-05, + "loss": 1.1694, + "step": 17617 + }, + { + "epoch": 0.02, + "learning_rate": 4.996214214308356e-05, + "loss": 1.1512, + "step": 17618 + }, + { + "epoch": 0.02, + "learning_rate": 4.996213782188497e-05, + "loss": 1.1294, + "step": 17619 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962133500439965e-05, + "loss": 0.9833, + "step": 17620 + }, + { + "epoch": 0.02, + "learning_rate": 4.996212917874855e-05, + "loss": 1.2548, + "step": 17621 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962124856810716e-05, + "loss": 0.9866, + "step": 17622 + }, + { + "epoch": 0.02, + "learning_rate": 4.996212053462647e-05, + "loss": 1.1241, + "step": 17623 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962116212195805e-05, + "loss": 1.5279, + "step": 17624 + }, + { + "epoch": 0.02, + "learning_rate": 4.996211188951873e-05, + "loss": 1.1799, + "step": 17625 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962107566595226e-05, + "loss": 1.17, + "step": 17626 + }, + { + "epoch": 0.02, + "learning_rate": 4.996210324342532e-05, + "loss": 0.7973, + "step": 17627 + }, + { + "epoch": 0.02, + "learning_rate": 4.996209892000899e-05, + "loss": 1.0882, + "step": 17628 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962094596346246e-05, + "loss": 1.2165, + "step": 17629 + }, + { + "epoch": 0.02, + "learning_rate": 4.996209027243709e-05, + "loss": 1.1187, + "step": 17630 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962085948281526e-05, + "loss": 1.0985, + "step": 17631 + }, + { + "epoch": 0.02, + "learning_rate": 4.996208162387953e-05, + "loss": 1.1231, + "step": 17632 + }, + { + "epoch": 0.02, + "learning_rate": 4.996207729923113e-05, + "loss": 1.06, + "step": 17633 + }, + { + "epoch": 0.02, + "learning_rate": 4.996207297433632e-05, + "loss": 1.1135, + "step": 17634 + }, + { + "epoch": 0.02, + "learning_rate": 4.996206864919508e-05, + "loss": 0.9715, + "step": 17635 + }, + { + "epoch": 0.02, + "learning_rate": 4.996206432380744e-05, + "loss": 0.8571, + "step": 17636 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962059998173375e-05, + "loss": 0.9907, + "step": 17637 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962055672292894e-05, + "loss": 0.9307, + "step": 17638 + }, + { + "epoch": 0.02, + "learning_rate": 4.996205134616601e-05, + "loss": 1.1845, + "step": 17639 + }, + { + "epoch": 0.02, + "learning_rate": 4.996204701979269e-05, + "loss": 1.1976, + "step": 17640 + }, + { + "epoch": 0.02, + "learning_rate": 4.996204269317297e-05, + "loss": 1.2271, + "step": 17641 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962038366306836e-05, + "loss": 1.0322, + "step": 17642 + }, + { + "epoch": 0.02, + "learning_rate": 4.996203403919428e-05, + "loss": 1.2612, + "step": 17643 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962029711835315e-05, + "loss": 1.1587, + "step": 17644 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962025384229936e-05, + "loss": 1.1265, + "step": 17645 + }, + { + "epoch": 0.02, + "learning_rate": 4.996202105637814e-05, + "loss": 0.9862, + "step": 17646 + }, + { + "epoch": 0.02, + "learning_rate": 4.996201672827993e-05, + "loss": 1.0855, + "step": 17647 + }, + { + "epoch": 0.02, + "learning_rate": 4.996201239993531e-05, + "loss": 1.1253, + "step": 17648 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962008071344266e-05, + "loss": 0.9555, + "step": 17649 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962003742506813e-05, + "loss": 1.054, + "step": 17650 + }, + { + "epoch": 0.02, + "learning_rate": 4.996199941342294e-05, + "loss": 1.0531, + "step": 17651 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961995084092665e-05, + "loss": 0.9774, + "step": 17652 + }, + { + "epoch": 0.02, + "learning_rate": 4.996199075451597e-05, + "loss": 1.2955, + "step": 17653 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961986424692855e-05, + "loss": 0.9387, + "step": 17654 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961982094623336e-05, + "loss": 1.0335, + "step": 17655 + }, + { + "epoch": 0.02, + "learning_rate": 4.99619777643074e-05, + "loss": 1.0396, + "step": 17656 + }, + { + "epoch": 0.02, + "learning_rate": 4.996197343374505e-05, + "loss": 1.0175, + "step": 17657 + }, + { + "epoch": 0.02, + "learning_rate": 4.996196910293628e-05, + "loss": 1.3186, + "step": 17658 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961964771881096e-05, + "loss": 1.2502, + "step": 17659 + }, + { + "epoch": 0.02, + "learning_rate": 4.99619604405795e-05, + "loss": 1.0594, + "step": 17660 + }, + { + "epoch": 0.02, + "learning_rate": 4.996195610903149e-05, + "loss": 1.0422, + "step": 17661 + }, + { + "epoch": 0.02, + "learning_rate": 4.996195177723707e-05, + "loss": 0.91, + "step": 17662 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961947445196237e-05, + "loss": 1.1158, + "step": 17663 + }, + { + "epoch": 0.02, + "learning_rate": 4.996194311290898e-05, + "loss": 0.8666, + "step": 17664 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961938780375314e-05, + "loss": 0.8887, + "step": 17665 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961934447595235e-05, + "loss": 1.287, + "step": 17666 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961930114568744e-05, + "loss": 1.2711, + "step": 17667 + }, + { + "epoch": 0.02, + "learning_rate": 4.996192578129584e-05, + "loss": 1.0686, + "step": 17668 + }, + { + "epoch": 0.02, + "learning_rate": 4.996192144777652e-05, + "loss": 0.8081, + "step": 17669 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961917114010784e-05, + "loss": 1.7464, + "step": 17670 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961912779998644e-05, + "loss": 0.95, + "step": 17671 + }, + { + "epoch": 0.02, + "learning_rate": 4.996190844574008e-05, + "loss": 1.005, + "step": 17672 + }, + { + "epoch": 0.02, + "learning_rate": 4.996190411123511e-05, + "loss": 1.1172, + "step": 17673 + }, + { + "epoch": 0.02, + "learning_rate": 4.996189977648372e-05, + "loss": 1.0489, + "step": 17674 + }, + { + "epoch": 0.02, + "learning_rate": 4.996189544148592e-05, + "loss": 0.9513, + "step": 17675 + }, + { + "epoch": 0.02, + "learning_rate": 4.996189110624171e-05, + "loss": 0.7141, + "step": 17676 + }, + { + "epoch": 0.02, + "learning_rate": 4.996188677075108e-05, + "loss": 0.8956, + "step": 17677 + }, + { + "epoch": 0.02, + "learning_rate": 4.996188243501404e-05, + "loss": 1.0659, + "step": 17678 + }, + { + "epoch": 0.02, + "learning_rate": 4.996187809903059e-05, + "loss": 1.1496, + "step": 17679 + }, + { + "epoch": 0.02, + "learning_rate": 4.996187376280072e-05, + "loss": 1.0677, + "step": 17680 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961869426324444e-05, + "loss": 0.8862, + "step": 17681 + }, + { + "epoch": 0.02, + "learning_rate": 4.996186508960175e-05, + "loss": 1.7406, + "step": 17682 + }, + { + "epoch": 0.02, + "learning_rate": 4.996186075263265e-05, + "loss": 1.4485, + "step": 17683 + }, + { + "epoch": 0.02, + "learning_rate": 4.996185641541713e-05, + "loss": 1.1186, + "step": 17684 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961852077955196e-05, + "loss": 1.2264, + "step": 17685 + }, + { + "epoch": 0.02, + "learning_rate": 4.996184774024685e-05, + "loss": 1.192, + "step": 17686 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961843402292096e-05, + "loss": 1.0894, + "step": 17687 + }, + { + "epoch": 0.02, + "learning_rate": 4.996183906409092e-05, + "loss": 1.0665, + "step": 17688 + }, + { + "epoch": 0.02, + "learning_rate": 4.996183472564334e-05, + "loss": 1.2266, + "step": 17689 + }, + { + "epoch": 0.02, + "learning_rate": 4.996183038694935e-05, + "loss": 1.2252, + "step": 17690 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961826048008944e-05, + "loss": 1.0922, + "step": 17691 + }, + { + "epoch": 0.02, + "learning_rate": 4.996182170882212e-05, + "loss": 0.8818, + "step": 17692 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961817369388886e-05, + "loss": 1.0697, + "step": 17693 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961813029709236e-05, + "loss": 1.0744, + "step": 17694 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961808689783187e-05, + "loss": 1.1978, + "step": 17695 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961804349610705e-05, + "loss": 1.1365, + "step": 17696 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961800009191825e-05, + "loss": 1.2275, + "step": 17697 + }, + { + "epoch": 0.02, + "learning_rate": 4.996179566852653e-05, + "loss": 1.0285, + "step": 17698 + }, + { + "epoch": 0.02, + "learning_rate": 4.996179132761483e-05, + "loss": 0.7626, + "step": 17699 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961786986456707e-05, + "loss": 0.7302, + "step": 17700 + }, + { + "epoch": 0.02, + "learning_rate": 4.996178264505217e-05, + "loss": 0.6511, + "step": 17701 + }, + { + "epoch": 0.02, + "learning_rate": 4.996177830340123e-05, + "loss": 1.058, + "step": 17702 + }, + { + "epoch": 0.02, + "learning_rate": 4.996177396150387e-05, + "loss": 1.0377, + "step": 17703 + }, + { + "epoch": 0.02, + "learning_rate": 4.99617696193601e-05, + "loss": 1.0946, + "step": 17704 + }, + { + "epoch": 0.02, + "learning_rate": 4.996176527696992e-05, + "loss": 1.3185, + "step": 17705 + }, + { + "epoch": 0.02, + "learning_rate": 4.996176093433333e-05, + "loss": 1.1071, + "step": 17706 + }, + { + "epoch": 0.02, + "learning_rate": 4.996175659145032e-05, + "loss": 1.4332, + "step": 17707 + }, + { + "epoch": 0.02, + "learning_rate": 4.996175224832091e-05, + "loss": 0.8426, + "step": 17708 + }, + { + "epoch": 0.02, + "learning_rate": 4.996174790494508e-05, + "loss": 0.9583, + "step": 17709 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961743561322836e-05, + "loss": 1.2795, + "step": 17710 + }, + { + "epoch": 0.02, + "learning_rate": 4.996173921745419e-05, + "loss": 0.8293, + "step": 17711 + }, + { + "epoch": 0.02, + "learning_rate": 4.996173487333912e-05, + "loss": 1.5821, + "step": 17712 + }, + { + "epoch": 0.02, + "learning_rate": 4.996173052897765e-05, + "loss": 1.1, + "step": 17713 + }, + { + "epoch": 0.02, + "learning_rate": 4.996172618436976e-05, + "loss": 1.1575, + "step": 17714 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961721839515466e-05, + "loss": 1.0456, + "step": 17715 + }, + { + "epoch": 0.02, + "learning_rate": 4.996171749441475e-05, + "loss": 1.176, + "step": 17716 + }, + { + "epoch": 0.02, + "learning_rate": 4.996171314906762e-05, + "loss": 1.1851, + "step": 17717 + }, + { + "epoch": 0.02, + "learning_rate": 4.99617088034741e-05, + "loss": 0.9399, + "step": 17718 + }, + { + "epoch": 0.02, + "learning_rate": 4.996170445763415e-05, + "loss": 0.9748, + "step": 17719 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961700111547795e-05, + "loss": 1.0271, + "step": 17720 + }, + { + "epoch": 0.02, + "learning_rate": 4.996169576521502e-05, + "loss": 1.2641, + "step": 17721 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961691418635844e-05, + "loss": 1.2566, + "step": 17722 + }, + { + "epoch": 0.02, + "learning_rate": 4.996168707181025e-05, + "loss": 1.2029, + "step": 17723 + }, + { + "epoch": 0.02, + "learning_rate": 4.996168272473825e-05, + "loss": 1.2951, + "step": 17724 + }, + { + "epoch": 0.02, + "learning_rate": 4.996167837741984e-05, + "loss": 1.1431, + "step": 17725 + }, + { + "epoch": 0.02, + "learning_rate": 4.996167402985501e-05, + "loss": 0.9106, + "step": 17726 + }, + { + "epoch": 0.02, + "learning_rate": 4.996166968204378e-05, + "loss": 1.1292, + "step": 17727 + }, + { + "epoch": 0.02, + "learning_rate": 4.996166533398613e-05, + "loss": 0.7149, + "step": 17728 + }, + { + "epoch": 0.02, + "learning_rate": 4.996166098568207e-05, + "loss": 1.0786, + "step": 17729 + }, + { + "epoch": 0.02, + "learning_rate": 4.99616566371316e-05, + "loss": 1.0467, + "step": 17730 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961652288334727e-05, + "loss": 0.9294, + "step": 17731 + }, + { + "epoch": 0.02, + "learning_rate": 4.996164793929143e-05, + "loss": 1.1534, + "step": 17732 + }, + { + "epoch": 0.02, + "learning_rate": 4.996164359000173e-05, + "loss": 1.0862, + "step": 17733 + }, + { + "epoch": 0.02, + "learning_rate": 4.996163924046562e-05, + "loss": 0.9491, + "step": 17734 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961634890683095e-05, + "loss": 1.2404, + "step": 17735 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961630540654166e-05, + "loss": 1.066, + "step": 17736 + }, + { + "epoch": 0.02, + "learning_rate": 4.996162619037882e-05, + "loss": 0.8946, + "step": 17737 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961621839857064e-05, + "loss": 0.2297, + "step": 17738 + }, + { + "epoch": 0.02, + "learning_rate": 4.99616174890889e-05, + "loss": 0.0471, + "step": 17739 + }, + { + "epoch": 0.02, + "learning_rate": 4.996161313807433e-05, + "loss": 0.232, + "step": 17740 + }, + { + "epoch": 0.02, + "learning_rate": 4.996160878681334e-05, + "loss": 1.3398, + "step": 17741 + }, + { + "epoch": 0.02, + "learning_rate": 4.996160443530594e-05, + "loss": 1.2047, + "step": 17742 + }, + { + "epoch": 0.02, + "learning_rate": 4.996160008355213e-05, + "loss": 1.3647, + "step": 17743 + }, + { + "epoch": 0.02, + "learning_rate": 4.996159573155192e-05, + "loss": 1.0151, + "step": 17744 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961591379305286e-05, + "loss": 1.1073, + "step": 17745 + }, + { + "epoch": 0.02, + "learning_rate": 4.996158702681224e-05, + "loss": 0.8109, + "step": 17746 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961582674072794e-05, + "loss": 1.158, + "step": 17747 + }, + { + "epoch": 0.02, + "learning_rate": 4.996157832108694e-05, + "loss": 1.232, + "step": 17748 + }, + { + "epoch": 0.02, + "learning_rate": 4.996157396785467e-05, + "loss": 1.256, + "step": 17749 + }, + { + "epoch": 0.02, + "learning_rate": 4.996156961437599e-05, + "loss": 1.1706, + "step": 17750 + }, + { + "epoch": 0.02, + "learning_rate": 4.99615652606509e-05, + "loss": 1.1536, + "step": 17751 + }, + { + "epoch": 0.02, + "learning_rate": 4.99615609066794e-05, + "loss": 1.0007, + "step": 17752 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961556552461484e-05, + "loss": 0.6541, + "step": 17753 + }, + { + "epoch": 0.02, + "learning_rate": 4.996155219799717e-05, + "loss": 1.1607, + "step": 17754 + }, + { + "epoch": 0.02, + "learning_rate": 4.996154784328644e-05, + "loss": 1.0444, + "step": 17755 + }, + { + "epoch": 0.02, + "learning_rate": 4.99615434883293e-05, + "loss": 0.8942, + "step": 17756 + }, + { + "epoch": 0.02, + "learning_rate": 4.996153913312575e-05, + "loss": 1.1181, + "step": 17757 + }, + { + "epoch": 0.02, + "learning_rate": 4.996153477767579e-05, + "loss": 1.1953, + "step": 17758 + }, + { + "epoch": 0.02, + "learning_rate": 4.996153042197942e-05, + "loss": 1.0063, + "step": 17759 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961526066036644e-05, + "loss": 0.9434, + "step": 17760 + }, + { + "epoch": 0.02, + "learning_rate": 4.996152170984746e-05, + "loss": 1.2352, + "step": 17761 + }, + { + "epoch": 0.02, + "learning_rate": 4.996151735341186e-05, + "loss": 0.9462, + "step": 17762 + }, + { + "epoch": 0.02, + "learning_rate": 4.996151299672985e-05, + "loss": 1.0325, + "step": 17763 + }, + { + "epoch": 0.02, + "learning_rate": 4.996150863980144e-05, + "loss": 0.7396, + "step": 17764 + }, + { + "epoch": 0.02, + "learning_rate": 4.99615042826266e-05, + "loss": 0.9112, + "step": 17765 + }, + { + "epoch": 0.02, + "learning_rate": 4.996149992520537e-05, + "loss": 1.1087, + "step": 17766 + }, + { + "epoch": 0.02, + "learning_rate": 4.996149556753772e-05, + "loss": 1.0122, + "step": 17767 + }, + { + "epoch": 0.02, + "learning_rate": 4.996149120962367e-05, + "loss": 1.1162, + "step": 17768 + }, + { + "epoch": 0.02, + "learning_rate": 4.996148685146321e-05, + "loss": 1.0184, + "step": 17769 + }, + { + "epoch": 0.02, + "learning_rate": 4.996148249305633e-05, + "loss": 1.0254, + "step": 17770 + }, + { + "epoch": 0.02, + "learning_rate": 4.996147813440305e-05, + "loss": 1.1897, + "step": 17771 + }, + { + "epoch": 0.02, + "learning_rate": 4.996147377550336e-05, + "loss": 1.1157, + "step": 17772 + }, + { + "epoch": 0.02, + "learning_rate": 4.996146941635725e-05, + "loss": 1.2131, + "step": 17773 + }, + { + "epoch": 0.02, + "learning_rate": 4.996146505696475e-05, + "loss": 1.1821, + "step": 17774 + }, + { + "epoch": 0.02, + "learning_rate": 4.996146069732583e-05, + "loss": 1.0645, + "step": 17775 + }, + { + "epoch": 0.02, + "learning_rate": 4.99614563374405e-05, + "loss": 1.0137, + "step": 17776 + }, + { + "epoch": 0.02, + "learning_rate": 4.996145197730876e-05, + "loss": 1.1401, + "step": 17777 + }, + { + "epoch": 0.02, + "learning_rate": 4.996144761693062e-05, + "loss": 1.0146, + "step": 17778 + }, + { + "epoch": 0.02, + "learning_rate": 4.996144325630606e-05, + "loss": 0.8929, + "step": 17779 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961438895435096e-05, + "loss": 1.1418, + "step": 17780 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961434534317726e-05, + "loss": 1.2176, + "step": 17781 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961430172953944e-05, + "loss": 1.0159, + "step": 17782 + }, + { + "epoch": 0.02, + "learning_rate": 4.996142581134376e-05, + "loss": 1.3819, + "step": 17783 + }, + { + "epoch": 0.02, + "learning_rate": 4.996142144948716e-05, + "loss": 1.0658, + "step": 17784 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961417087384146e-05, + "loss": 1.0802, + "step": 17785 + }, + { + "epoch": 0.02, + "learning_rate": 4.996141272503473e-05, + "loss": 0.9955, + "step": 17786 + }, + { + "epoch": 0.02, + "learning_rate": 4.996140836243891e-05, + "loss": 1.0343, + "step": 17787 + }, + { + "epoch": 0.02, + "learning_rate": 4.996140399959668e-05, + "loss": 0.9223, + "step": 17788 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961399636508034e-05, + "loss": 0.8904, + "step": 17789 + }, + { + "epoch": 0.02, + "learning_rate": 4.996139527317298e-05, + "loss": 0.9913, + "step": 17790 + }, + { + "epoch": 0.02, + "learning_rate": 4.996139090959152e-05, + "loss": 1.0482, + "step": 17791 + }, + { + "epoch": 0.02, + "learning_rate": 4.996138654576366e-05, + "loss": 0.9714, + "step": 17792 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961382181689384e-05, + "loss": 1.0372, + "step": 17793 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961377817368704e-05, + "loss": 0.998, + "step": 17794 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961373452801606e-05, + "loss": 0.4208, + "step": 17795 + }, + { + "epoch": 0.02, + "learning_rate": 4.996136908798811e-05, + "loss": 0.752, + "step": 17796 + }, + { + "epoch": 0.02, + "learning_rate": 4.996136472292821e-05, + "loss": 1.138, + "step": 17797 + }, + { + "epoch": 0.02, + "learning_rate": 4.996136035762189e-05, + "loss": 1.0157, + "step": 17798 + }, + { + "epoch": 0.02, + "learning_rate": 4.996135599206916e-05, + "loss": 1.2037, + "step": 17799 + }, + { + "epoch": 0.02, + "learning_rate": 4.996135162627003e-05, + "loss": 1.1487, + "step": 17800 + }, + { + "epoch": 0.02, + "learning_rate": 4.996134726022449e-05, + "loss": 0.7653, + "step": 17801 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961342893932545e-05, + "loss": 0.8066, + "step": 17802 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961338527394185e-05, + "loss": 1.0121, + "step": 17803 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961334160609426e-05, + "loss": 1.0457, + "step": 17804 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961329793578255e-05, + "loss": 1.2664, + "step": 17805 + }, + { + "epoch": 0.02, + "learning_rate": 4.996132542630068e-05, + "loss": 1.0643, + "step": 17806 + }, + { + "epoch": 0.02, + "learning_rate": 4.996132105877669e-05, + "loss": 0.9038, + "step": 17807 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961316691006296e-05, + "loss": 0.7782, + "step": 17808 + }, + { + "epoch": 0.02, + "learning_rate": 4.99613123229895e-05, + "loss": 1.0781, + "step": 17809 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961307954726286e-05, + "loss": 0.7864, + "step": 17810 + }, + { + "epoch": 0.02, + "learning_rate": 4.996130358621667e-05, + "loss": 1.3976, + "step": 17811 + }, + { + "epoch": 0.02, + "learning_rate": 4.996129921746064e-05, + "loss": 1.5575, + "step": 17812 + }, + { + "epoch": 0.02, + "learning_rate": 4.996129484845821e-05, + "loss": 1.39, + "step": 17813 + }, + { + "epoch": 0.02, + "learning_rate": 4.996129047920937e-05, + "loss": 1.1701, + "step": 17814 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961286109714125e-05, + "loss": 0.9059, + "step": 17815 + }, + { + "epoch": 0.02, + "learning_rate": 4.996128173997247e-05, + "loss": 1.0271, + "step": 17816 + }, + { + "epoch": 0.02, + "learning_rate": 4.996127736998441e-05, + "loss": 0.9995, + "step": 17817 + }, + { + "epoch": 0.02, + "learning_rate": 4.996127299974994e-05, + "loss": 1.1153, + "step": 17818 + }, + { + "epoch": 0.02, + "learning_rate": 4.996126862926906e-05, + "loss": 1.0012, + "step": 17819 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961264258541785e-05, + "loss": 1.0479, + "step": 17820 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961259887568095e-05, + "loss": 1.0638, + "step": 17821 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961255516348e-05, + "loss": 1.2964, + "step": 17822 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961251144881494e-05, + "loss": 0.9223, + "step": 17823 + }, + { + "epoch": 0.02, + "learning_rate": 4.996124677316858e-05, + "loss": 1.0178, + "step": 17824 + }, + { + "epoch": 0.02, + "learning_rate": 4.996124240120927e-05, + "loss": 1.1145, + "step": 17825 + }, + { + "epoch": 0.02, + "learning_rate": 4.996123802900354e-05, + "loss": 1.0434, + "step": 17826 + }, + { + "epoch": 0.02, + "learning_rate": 4.996123365655141e-05, + "loss": 0.3403, + "step": 17827 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961229283852875e-05, + "loss": 0.2058, + "step": 17828 + }, + { + "epoch": 0.02, + "learning_rate": 4.996122491090793e-05, + "loss": 0.7896, + "step": 17829 + }, + { + "epoch": 0.02, + "learning_rate": 4.996122053771658e-05, + "loss": 0.5549, + "step": 17830 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961216164278824e-05, + "loss": 0.8353, + "step": 17831 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961211790594656e-05, + "loss": 0.5369, + "step": 17832 + }, + { + "epoch": 0.02, + "learning_rate": 4.996120741666408e-05, + "loss": 0.9177, + "step": 17833 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961203042487105e-05, + "loss": 1.225, + "step": 17834 + }, + { + "epoch": 0.02, + "learning_rate": 4.996119866806372e-05, + "loss": 1.0038, + "step": 17835 + }, + { + "epoch": 0.02, + "learning_rate": 4.996119429339393e-05, + "loss": 0.9841, + "step": 17836 + }, + { + "epoch": 0.02, + "learning_rate": 4.996118991847774e-05, + "loss": 1.1686, + "step": 17837 + }, + { + "epoch": 0.02, + "learning_rate": 4.996118554331513e-05, + "loss": 1.0309, + "step": 17838 + }, + { + "epoch": 0.02, + "learning_rate": 4.996118116790612e-05, + "loss": 1.4359, + "step": 17839 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961176792250705e-05, + "loss": 1.8824, + "step": 17840 + }, + { + "epoch": 0.02, + "learning_rate": 4.996117241634888e-05, + "loss": 1.6606, + "step": 17841 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961168040200656e-05, + "loss": 1.631, + "step": 17842 + }, + { + "epoch": 0.02, + "learning_rate": 4.996116366380602e-05, + "loss": 1.4825, + "step": 17843 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961159287164986e-05, + "loss": 1.5155, + "step": 17844 + }, + { + "epoch": 0.02, + "learning_rate": 4.996115491027753e-05, + "loss": 1.5507, + "step": 17845 + }, + { + "epoch": 0.02, + "learning_rate": 4.996115053314368e-05, + "loss": 1.4561, + "step": 17846 + }, + { + "epoch": 0.02, + "learning_rate": 4.996114615576342e-05, + "loss": 1.4281, + "step": 17847 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961141778136755e-05, + "loss": 1.4336, + "step": 17848 + }, + { + "epoch": 0.02, + "learning_rate": 4.996113740026369e-05, + "loss": 1.4138, + "step": 17849 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961133022144215e-05, + "loss": 1.1119, + "step": 17850 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961128643778333e-05, + "loss": 0.9559, + "step": 17851 + }, + { + "epoch": 0.02, + "learning_rate": 4.996112426516604e-05, + "loss": 1.2736, + "step": 17852 + }, + { + "epoch": 0.02, + "learning_rate": 4.996111988630735e-05, + "loss": 1.2252, + "step": 17853 + }, + { + "epoch": 0.02, + "learning_rate": 4.996111550720225e-05, + "loss": 1.1023, + "step": 17854 + }, + { + "epoch": 0.02, + "learning_rate": 4.996111112785075e-05, + "loss": 1.2164, + "step": 17855 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961106748252834e-05, + "loss": 0.8608, + "step": 17856 + }, + { + "epoch": 0.02, + "learning_rate": 4.996110236840852e-05, + "loss": 0.5434, + "step": 17857 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961097988317796e-05, + "loss": 1.5819, + "step": 17858 + }, + { + "epoch": 0.02, + "learning_rate": 4.996109360798067e-05, + "loss": 1.0247, + "step": 17859 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961089227397137e-05, + "loss": 0.9757, + "step": 17860 + }, + { + "epoch": 0.02, + "learning_rate": 4.99610848465672e-05, + "loss": 1.371, + "step": 17861 + }, + { + "epoch": 0.02, + "learning_rate": 4.996108046549086e-05, + "loss": 1.2974, + "step": 17862 + }, + { + "epoch": 0.02, + "learning_rate": 4.996107608416811e-05, + "loss": 0.3844, + "step": 17863 + }, + { + "epoch": 0.02, + "learning_rate": 4.996107170259896e-05, + "loss": 0.1993, + "step": 17864 + }, + { + "epoch": 0.02, + "learning_rate": 4.99610673207834e-05, + "loss": 0.1216, + "step": 17865 + }, + { + "epoch": 0.02, + "learning_rate": 4.996106293872144e-05, + "loss": 0.1464, + "step": 17866 + }, + { + "epoch": 0.02, + "learning_rate": 4.996105855641307e-05, + "loss": 0.9081, + "step": 17867 + }, + { + "epoch": 0.02, + "learning_rate": 4.996105417385829e-05, + "loss": 1.0491, + "step": 17868 + }, + { + "epoch": 0.02, + "learning_rate": 4.996104979105711e-05, + "loss": 1.1273, + "step": 17869 + }, + { + "epoch": 0.02, + "learning_rate": 4.996104540800953e-05, + "loss": 0.9371, + "step": 17870 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961041024715544e-05, + "loss": 0.9948, + "step": 17871 + }, + { + "epoch": 0.02, + "learning_rate": 4.996103664117515e-05, + "loss": 0.7867, + "step": 17872 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961032257388355e-05, + "loss": 1.2941, + "step": 17873 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961027873355145e-05, + "loss": 1.2699, + "step": 17874 + }, + { + "epoch": 0.02, + "learning_rate": 4.9961023489075544e-05, + "loss": 1.3135, + "step": 17875 + }, + { + "epoch": 0.02, + "learning_rate": 4.996101910454953e-05, + "loss": 1.1298, + "step": 17876 + }, + { + "epoch": 0.02, + "learning_rate": 4.996101471977711e-05, + "loss": 1.4404, + "step": 17877 + }, + { + "epoch": 0.02, + "learning_rate": 4.996101033475828e-05, + "loss": 1.2673, + "step": 17878 + }, + { + "epoch": 0.02, + "learning_rate": 4.996100594949306e-05, + "loss": 0.9231, + "step": 17879 + }, + { + "epoch": 0.02, + "learning_rate": 4.996100156398143e-05, + "loss": 1.3769, + "step": 17880 + }, + { + "epoch": 0.02, + "learning_rate": 4.996099717822339e-05, + "loss": 2.7435, + "step": 17881 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960992792218955e-05, + "loss": 1.3202, + "step": 17882 + }, + { + "epoch": 0.02, + "learning_rate": 4.996098840596811e-05, + "loss": 0.9185, + "step": 17883 + }, + { + "epoch": 0.02, + "learning_rate": 4.996098401947086e-05, + "loss": 0.8772, + "step": 17884 + }, + { + "epoch": 0.02, + "learning_rate": 4.996097963272721e-05, + "loss": 1.2196, + "step": 17885 + }, + { + "epoch": 0.02, + "learning_rate": 4.996097524573715e-05, + "loss": 0.8258, + "step": 17886 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960970858500685e-05, + "loss": 0.8416, + "step": 17887 + }, + { + "epoch": 0.02, + "learning_rate": 4.996096647101782e-05, + "loss": 0.919, + "step": 17888 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960962083288554e-05, + "loss": 1.1087, + "step": 17889 + }, + { + "epoch": 0.02, + "learning_rate": 4.996095769531288e-05, + "loss": 1.0873, + "step": 17890 + }, + { + "epoch": 0.02, + "learning_rate": 4.996095330709081e-05, + "loss": 1.1233, + "step": 17891 + }, + { + "epoch": 0.02, + "learning_rate": 4.996094891862232e-05, + "loss": 0.9123, + "step": 17892 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960944529907435e-05, + "loss": 1.1303, + "step": 17893 + }, + { + "epoch": 0.02, + "learning_rate": 4.996094014094614e-05, + "loss": 1.0402, + "step": 17894 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960935751738455e-05, + "loss": 1.1151, + "step": 17895 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960931362284356e-05, + "loss": 0.8309, + "step": 17896 + }, + { + "epoch": 0.02, + "learning_rate": 4.996092697258385e-05, + "loss": 0.9464, + "step": 17897 + }, + { + "epoch": 0.02, + "learning_rate": 4.996092258263695e-05, + "loss": 1.2149, + "step": 17898 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960918192443636e-05, + "loss": 1.0995, + "step": 17899 + }, + { + "epoch": 0.02, + "learning_rate": 4.996091380200393e-05, + "loss": 1.1807, + "step": 17900 + }, + { + "epoch": 0.02, + "learning_rate": 4.996090941131781e-05, + "loss": 1.1365, + "step": 17901 + }, + { + "epoch": 0.02, + "learning_rate": 4.996090502038529e-05, + "loss": 1.0854, + "step": 17902 + }, + { + "epoch": 0.02, + "learning_rate": 4.996090062920636e-05, + "loss": 0.7762, + "step": 17903 + }, + { + "epoch": 0.02, + "learning_rate": 4.996089623778104e-05, + "loss": 1.0915, + "step": 17904 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960891846109315e-05, + "loss": 1.6023, + "step": 17905 + }, + { + "epoch": 0.02, + "learning_rate": 4.996088745419117e-05, + "loss": 1.9308, + "step": 17906 + }, + { + "epoch": 0.02, + "learning_rate": 4.996088306202664e-05, + "loss": 2.0805, + "step": 17907 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960878669615694e-05, + "loss": 1.5449, + "step": 17908 + }, + { + "epoch": 0.02, + "learning_rate": 4.996087427695836e-05, + "loss": 1.3992, + "step": 17909 + }, + { + "epoch": 0.02, + "learning_rate": 4.996086988405461e-05, + "loss": 1.1294, + "step": 17910 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960865490904466e-05, + "loss": 0.9645, + "step": 17911 + }, + { + "epoch": 0.02, + "learning_rate": 4.99608610975079e-05, + "loss": 1.0826, + "step": 17912 + }, + { + "epoch": 0.02, + "learning_rate": 4.996085670386495e-05, + "loss": 1.1494, + "step": 17913 + }, + { + "epoch": 0.02, + "learning_rate": 4.996085230997559e-05, + "loss": 1.1544, + "step": 17914 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960847915839825e-05, + "loss": 1.3492, + "step": 17915 + }, + { + "epoch": 0.02, + "learning_rate": 4.996084352145767e-05, + "loss": 1.1013, + "step": 17916 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960839126829094e-05, + "loss": 1.0455, + "step": 17917 + }, + { + "epoch": 0.02, + "learning_rate": 4.996083473195412e-05, + "loss": 1.0529, + "step": 17918 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960830336832755e-05, + "loss": 1.4955, + "step": 17919 + }, + { + "epoch": 0.02, + "learning_rate": 4.996082594146497e-05, + "loss": 1.1802, + "step": 17920 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960821545850795e-05, + "loss": 1.0798, + "step": 17921 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960817149990213e-05, + "loss": 0.9555, + "step": 17922 + }, + { + "epoch": 0.02, + "learning_rate": 4.996081275388323e-05, + "loss": 0.9913, + "step": 17923 + }, + { + "epoch": 0.02, + "learning_rate": 4.996080835752984e-05, + "loss": 1.2333, + "step": 17924 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960803960930045e-05, + "loss": 1.1927, + "step": 17925 + }, + { + "epoch": 0.02, + "learning_rate": 4.996079956408386e-05, + "loss": 1.3165, + "step": 17926 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960795166991255e-05, + "loss": 1.1601, + "step": 17927 + }, + { + "epoch": 0.02, + "learning_rate": 4.996079076965226e-05, + "loss": 1.1796, + "step": 17928 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960786372066865e-05, + "loss": 1.1237, + "step": 17929 + }, + { + "epoch": 0.02, + "learning_rate": 4.996078197423506e-05, + "loss": 1.1265, + "step": 17930 + }, + { + "epoch": 0.02, + "learning_rate": 4.996077757615686e-05, + "loss": 1.3149, + "step": 17931 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960773177832254e-05, + "loss": 0.7454, + "step": 17932 + }, + { + "epoch": 0.02, + "learning_rate": 4.996076877926124e-05, + "loss": 0.5543, + "step": 17933 + }, + { + "epoch": 0.02, + "learning_rate": 4.996076438044383e-05, + "loss": 1.411, + "step": 17934 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960759981380015e-05, + "loss": 1.4529, + "step": 17935 + }, + { + "epoch": 0.02, + "learning_rate": 4.99607555820698e-05, + "loss": 1.3448, + "step": 17936 + }, + { + "epoch": 0.02, + "learning_rate": 4.996075118251318e-05, + "loss": 1.0683, + "step": 17937 + }, + { + "epoch": 0.02, + "learning_rate": 4.996074678271016e-05, + "loss": 1.2759, + "step": 17938 + }, + { + "epoch": 0.02, + "learning_rate": 4.996074238266074e-05, + "loss": 1.044, + "step": 17939 + }, + { + "epoch": 0.02, + "learning_rate": 4.996073798236492e-05, + "loss": 1.0703, + "step": 17940 + }, + { + "epoch": 0.02, + "learning_rate": 4.996073358182269e-05, + "loss": 1.0292, + "step": 17941 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960729181034065e-05, + "loss": 1.2431, + "step": 17942 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960724779999034e-05, + "loss": 0.6406, + "step": 17943 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960720378717605e-05, + "loss": 0.2434, + "step": 17944 + }, + { + "epoch": 0.02, + "learning_rate": 4.996071597718978e-05, + "loss": 0.3419, + "step": 17945 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960711575415544e-05, + "loss": 0.2989, + "step": 17946 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960707173394905e-05, + "loss": 0.7781, + "step": 17947 + }, + { + "epoch": 0.02, + "learning_rate": 4.996070277112787e-05, + "loss": 1.0198, + "step": 17948 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960698368614426e-05, + "loss": 1.0838, + "step": 17949 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960693965854585e-05, + "loss": 1.1136, + "step": 17950 + }, + { + "epoch": 0.02, + "learning_rate": 4.996068956284834e-05, + "loss": 1.0603, + "step": 17951 + }, + { + "epoch": 0.02, + "learning_rate": 4.99606851595957e-05, + "loss": 0.9599, + "step": 17952 + }, + { + "epoch": 0.02, + "learning_rate": 4.996068075609666e-05, + "loss": 1.1496, + "step": 17953 + }, + { + "epoch": 0.02, + "learning_rate": 4.996067635235121e-05, + "loss": 1.1231, + "step": 17954 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960671948359363e-05, + "loss": 1.2266, + "step": 17955 + }, + { + "epoch": 0.02, + "learning_rate": 4.996066754412111e-05, + "loss": 1.3298, + "step": 17956 + }, + { + "epoch": 0.02, + "learning_rate": 4.996066313963647e-05, + "loss": 1.2169, + "step": 17957 + }, + { + "epoch": 0.02, + "learning_rate": 4.996065873490541e-05, + "loss": 1.2234, + "step": 17958 + }, + { + "epoch": 0.02, + "learning_rate": 4.996065432992796e-05, + "loss": 1.3403, + "step": 17959 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960649924704105e-05, + "loss": 1.1476, + "step": 17960 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960645519233853e-05, + "loss": 0.8511, + "step": 17961 + }, + { + "epoch": 0.02, + "learning_rate": 4.99606411135172e-05, + "loss": 0.8109, + "step": 17962 + }, + { + "epoch": 0.02, + "learning_rate": 4.996063670755414e-05, + "loss": 0.8822, + "step": 17963 + }, + { + "epoch": 0.02, + "learning_rate": 4.996063230134469e-05, + "loss": 1.1449, + "step": 17964 + }, + { + "epoch": 0.02, + "learning_rate": 4.996062789488883e-05, + "loss": 1.0863, + "step": 17965 + }, + { + "epoch": 0.02, + "learning_rate": 4.996062348818657e-05, + "loss": 1.1397, + "step": 17966 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960619081237916e-05, + "loss": 1.0783, + "step": 17967 + }, + { + "epoch": 0.02, + "learning_rate": 4.996061467404285e-05, + "loss": 1.1457, + "step": 17968 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960610266601396e-05, + "loss": 1.4081, + "step": 17969 + }, + { + "epoch": 0.02, + "learning_rate": 4.996060585891353e-05, + "loss": 1.2906, + "step": 17970 + }, + { + "epoch": 0.02, + "learning_rate": 4.996060145097927e-05, + "loss": 1.1079, + "step": 17971 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960597042798605e-05, + "loss": 1.2476, + "step": 17972 + }, + { + "epoch": 0.02, + "learning_rate": 4.996059263437154e-05, + "loss": 0.9978, + "step": 17973 + }, + { + "epoch": 0.02, + "learning_rate": 4.996058822569808e-05, + "loss": 0.7534, + "step": 17974 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960583816778215e-05, + "loss": 0.8802, + "step": 17975 + }, + { + "epoch": 0.02, + "learning_rate": 4.996057940761195e-05, + "loss": 1.303, + "step": 17976 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960574998199285e-05, + "loss": 1.0353, + "step": 17977 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960570588540225e-05, + "loss": 1.2494, + "step": 17978 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960566178634754e-05, + "loss": 1.0256, + "step": 17979 + }, + { + "epoch": 0.02, + "learning_rate": 4.99605617684829e-05, + "loss": 1.1767, + "step": 17980 + }, + { + "epoch": 0.02, + "learning_rate": 4.996055735808463e-05, + "loss": 1.1519, + "step": 17981 + }, + { + "epoch": 0.02, + "learning_rate": 4.996055294743996e-05, + "loss": 1.0401, + "step": 17982 + }, + { + "epoch": 0.02, + "learning_rate": 4.996054853654889e-05, + "loss": 0.8165, + "step": 17983 + }, + { + "epoch": 0.02, + "learning_rate": 4.996054412541143e-05, + "loss": 1.5519, + "step": 17984 + }, + { + "epoch": 0.02, + "learning_rate": 4.996053971402757e-05, + "loss": 2.0726, + "step": 17985 + }, + { + "epoch": 0.02, + "learning_rate": 4.99605353023973e-05, + "loss": 1.8672, + "step": 17986 + }, + { + "epoch": 0.02, + "learning_rate": 4.996053089052063e-05, + "loss": 0.6705, + "step": 17987 + }, + { + "epoch": 0.02, + "learning_rate": 4.996052647839757e-05, + "loss": 1.5385, + "step": 17988 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960522066028106e-05, + "loss": 0.9687, + "step": 17989 + }, + { + "epoch": 0.02, + "learning_rate": 4.996051765341224e-05, + "loss": 1.1204, + "step": 17990 + }, + { + "epoch": 0.02, + "learning_rate": 4.996051324054997e-05, + "loss": 1.0293, + "step": 17991 + }, + { + "epoch": 0.02, + "learning_rate": 4.996050882744131e-05, + "loss": 0.7258, + "step": 17992 + }, + { + "epoch": 0.02, + "learning_rate": 4.996050441408625e-05, + "loss": 0.6446, + "step": 17993 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960500000484785e-05, + "loss": 0.5994, + "step": 17994 + }, + { + "epoch": 0.02, + "learning_rate": 4.996049558663692e-05, + "loss": 0.6836, + "step": 17995 + }, + { + "epoch": 0.02, + "learning_rate": 4.996049117254266e-05, + "loss": 0.574, + "step": 17996 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960486758202e-05, + "loss": 1.1941, + "step": 17997 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960482343614935e-05, + "loss": 0.984, + "step": 17998 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960477928781473e-05, + "loss": 0.6684, + "step": 17999 + }, + { + "epoch": 0.02, + "learning_rate": 4.996047351370161e-05, + "loss": 0.7643, + "step": 18000 + }, + { + "epoch": 0.02, + "eval_loss": 1.050679326057434, + "eval_runtime": 85.592, + "eval_samples_per_second": 16.181, + "eval_steps_per_second": 4.054, + "step": 18000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960469098375354e-05, + "loss": 0.9056, + "step": 18001 + }, + { + "epoch": 0.02, + "learning_rate": 4.99604646828027e-05, + "loss": 1.0514, + "step": 18002 + }, + { + "epoch": 0.02, + "learning_rate": 4.996046026698364e-05, + "loss": 1.2171, + "step": 18003 + }, + { + "epoch": 0.02, + "learning_rate": 4.996045585091818e-05, + "loss": 1.337, + "step": 18004 + }, + { + "epoch": 0.02, + "learning_rate": 4.996045143460633e-05, + "loss": 0.8909, + "step": 18005 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960447018048075e-05, + "loss": 1.1762, + "step": 18006 + }, + { + "epoch": 0.02, + "learning_rate": 4.996044260124342e-05, + "loss": 1.0932, + "step": 18007 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960438184192364e-05, + "loss": 0.9789, + "step": 18008 + }, + { + "epoch": 0.02, + "learning_rate": 4.996043376689491e-05, + "loss": 1.0348, + "step": 18009 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960429349351065e-05, + "loss": 0.9626, + "step": 18010 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960424931560814e-05, + "loss": 1.0603, + "step": 18011 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960420513524165e-05, + "loss": 1.0675, + "step": 18012 + }, + { + "epoch": 0.02, + "learning_rate": 4.996041609524112e-05, + "loss": 1.2027, + "step": 18013 + }, + { + "epoch": 0.02, + "learning_rate": 4.996041167671167e-05, + "loss": 0.9694, + "step": 18014 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960407257935826e-05, + "loss": 1.0229, + "step": 18015 + }, + { + "epoch": 0.02, + "learning_rate": 4.996040283891358e-05, + "loss": 0.6471, + "step": 18016 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960398419644935e-05, + "loss": 0.8391, + "step": 18017 + }, + { + "epoch": 0.02, + "learning_rate": 4.99603940001299e-05, + "loss": 1.4049, + "step": 18018 + }, + { + "epoch": 0.02, + "learning_rate": 4.996038958036846e-05, + "loss": 0.9414, + "step": 18019 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960385160360626e-05, + "loss": 0.6825, + "step": 18020 + }, + { + "epoch": 0.02, + "learning_rate": 4.99603807401064e-05, + "loss": 1.7898, + "step": 18021 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960376319605756e-05, + "loss": 1.1204, + "step": 18022 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960371898858723e-05, + "loss": 1.0597, + "step": 18023 + }, + { + "epoch": 0.02, + "learning_rate": 4.996036747786529e-05, + "loss": 1.0251, + "step": 18024 + }, + { + "epoch": 0.02, + "learning_rate": 4.996036305662546e-05, + "loss": 1.3625, + "step": 18025 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960358635139235e-05, + "loss": 1.1888, + "step": 18026 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960354213406615e-05, + "loss": 1.148, + "step": 18027 + }, + { + "epoch": 0.02, + "learning_rate": 4.996034979142759e-05, + "loss": 0.4998, + "step": 18028 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960345369202166e-05, + "loss": 0.6164, + "step": 18029 + }, + { + "epoch": 0.02, + "learning_rate": 4.996034094673035e-05, + "loss": 0.6197, + "step": 18030 + }, + { + "epoch": 0.02, + "learning_rate": 4.996033652401213e-05, + "loss": 1.141, + "step": 18031 + }, + { + "epoch": 0.02, + "learning_rate": 4.996033210104752e-05, + "loss": 1.2347, + "step": 18032 + }, + { + "epoch": 0.02, + "learning_rate": 4.996032767783651e-05, + "loss": 0.958, + "step": 18033 + }, + { + "epoch": 0.02, + "learning_rate": 4.996032325437909e-05, + "loss": 0.6896, + "step": 18034 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960318830675283e-05, + "loss": 1.3073, + "step": 18035 + }, + { + "epoch": 0.02, + "learning_rate": 4.996031440672508e-05, + "loss": 1.0048, + "step": 18036 + }, + { + "epoch": 0.02, + "learning_rate": 4.996030998252847e-05, + "loss": 1.022, + "step": 18037 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960305558085476e-05, + "loss": 1.2817, + "step": 18038 + }, + { + "epoch": 0.02, + "learning_rate": 4.996030113339607e-05, + "loss": 1.0414, + "step": 18039 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960296708460274e-05, + "loss": 1.2339, + "step": 18040 + }, + { + "epoch": 0.02, + "learning_rate": 4.996029228327808e-05, + "loss": 1.0857, + "step": 18041 + }, + { + "epoch": 0.02, + "learning_rate": 4.996028785784949e-05, + "loss": 1.0162, + "step": 18042 + }, + { + "epoch": 0.02, + "learning_rate": 4.99602834321745e-05, + "loss": 1.015, + "step": 18043 + }, + { + "epoch": 0.02, + "learning_rate": 4.996027900625312e-05, + "loss": 0.863, + "step": 18044 + }, + { + "epoch": 0.02, + "learning_rate": 4.996027458008533e-05, + "loss": 1.035, + "step": 18045 + }, + { + "epoch": 0.02, + "learning_rate": 4.996027015367115e-05, + "loss": 0.9516, + "step": 18046 + }, + { + "epoch": 0.02, + "learning_rate": 4.996026572701057e-05, + "loss": 1.1389, + "step": 18047 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960261300103594e-05, + "loss": 1.1366, + "step": 18048 + }, + { + "epoch": 0.02, + "learning_rate": 4.996025687295022e-05, + "loss": 0.784, + "step": 18049 + }, + { + "epoch": 0.02, + "learning_rate": 4.996025244555045e-05, + "loss": 1.0802, + "step": 18050 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960248017904285e-05, + "loss": 0.9317, + "step": 18051 + }, + { + "epoch": 0.02, + "learning_rate": 4.996024359001172e-05, + "loss": 1.2861, + "step": 18052 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960239161872756e-05, + "loss": 1.1905, + "step": 18053 + }, + { + "epoch": 0.02, + "learning_rate": 4.99602347334874e-05, + "loss": 1.2289, + "step": 18054 + }, + { + "epoch": 0.02, + "learning_rate": 4.996023030485565e-05, + "loss": 1.1468, + "step": 18055 + }, + { + "epoch": 0.02, + "learning_rate": 4.99602258759775e-05, + "loss": 1.1557, + "step": 18056 + }, + { + "epoch": 0.02, + "learning_rate": 4.996022144685294e-05, + "loss": 1.2146, + "step": 18057 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960217017482e-05, + "loss": 1.0674, + "step": 18058 + }, + { + "epoch": 0.02, + "learning_rate": 4.996021258786465e-05, + "loss": 1.1627, + "step": 18059 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960208158000916e-05, + "loss": 0.9766, + "step": 18060 + }, + { + "epoch": 0.02, + "learning_rate": 4.996020372789078e-05, + "loss": 0.9162, + "step": 18061 + }, + { + "epoch": 0.02, + "learning_rate": 4.996019929753425e-05, + "loss": 0.7696, + "step": 18062 + }, + { + "epoch": 0.02, + "learning_rate": 4.996019486693132e-05, + "loss": 1.1892, + "step": 18063 + }, + { + "epoch": 0.02, + "learning_rate": 4.996019043608199e-05, + "loss": 1.4385, + "step": 18064 + }, + { + "epoch": 0.02, + "learning_rate": 4.996018600498627e-05, + "loss": 1.0275, + "step": 18065 + }, + { + "epoch": 0.02, + "learning_rate": 4.996018157364415e-05, + "loss": 0.5482, + "step": 18066 + }, + { + "epoch": 0.02, + "learning_rate": 4.996017714205564e-05, + "loss": 0.4382, + "step": 18067 + }, + { + "epoch": 0.02, + "learning_rate": 4.996017271022073e-05, + "loss": 0.6151, + "step": 18068 + }, + { + "epoch": 0.02, + "learning_rate": 4.996016827813942e-05, + "loss": 0.4725, + "step": 18069 + }, + { + "epoch": 0.02, + "learning_rate": 4.996016384581171e-05, + "loss": 0.4936, + "step": 18070 + }, + { + "epoch": 0.02, + "learning_rate": 4.996015941323762e-05, + "loss": 0.3741, + "step": 18071 + }, + { + "epoch": 0.02, + "learning_rate": 4.996015498041712e-05, + "loss": 0.2599, + "step": 18072 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960150547350224e-05, + "loss": 0.2654, + "step": 18073 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960146114036936e-05, + "loss": 1.0072, + "step": 18074 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960141680477256e-05, + "loss": 1.0779, + "step": 18075 + }, + { + "epoch": 0.02, + "learning_rate": 4.996013724667117e-05, + "loss": 1.2817, + "step": 18076 + }, + { + "epoch": 0.02, + "learning_rate": 4.99601328126187e-05, + "loss": 1.3007, + "step": 18077 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960128378319825e-05, + "loss": 1.0265, + "step": 18078 + }, + { + "epoch": 0.02, + "learning_rate": 4.996012394377456e-05, + "loss": 1.1672, + "step": 18079 + }, + { + "epoch": 0.02, + "learning_rate": 4.996011950898289e-05, + "loss": 0.8514, + "step": 18080 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960115073944835e-05, + "loss": 1.0394, + "step": 18081 + }, + { + "epoch": 0.02, + "learning_rate": 4.996011063866037e-05, + "loss": 1.0375, + "step": 18082 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960106203129525e-05, + "loss": 1.1931, + "step": 18083 + }, + { + "epoch": 0.02, + "learning_rate": 4.996010176735227e-05, + "loss": 1.1129, + "step": 18084 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960097331328635e-05, + "loss": 1.2561, + "step": 18085 + }, + { + "epoch": 0.02, + "learning_rate": 4.99600928950586e-05, + "loss": 1.076, + "step": 18086 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960088458542164e-05, + "loss": 1.116, + "step": 18087 + }, + { + "epoch": 0.02, + "learning_rate": 4.996008402177934e-05, + "loss": 1.1238, + "step": 18088 + }, + { + "epoch": 0.02, + "learning_rate": 4.996007958477011e-05, + "loss": 1.007, + "step": 18089 + }, + { + "epoch": 0.02, + "learning_rate": 4.996007514751449e-05, + "loss": 1.0495, + "step": 18090 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960070710012474e-05, + "loss": 1.1175, + "step": 18091 + }, + { + "epoch": 0.02, + "learning_rate": 4.996006627226406e-05, + "loss": 1.0128, + "step": 18092 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960061834269255e-05, + "loss": 1.5412, + "step": 18093 + }, + { + "epoch": 0.02, + "learning_rate": 4.996005739602806e-05, + "loss": 1.4803, + "step": 18094 + }, + { + "epoch": 0.02, + "learning_rate": 4.996005295754046e-05, + "loss": 0.8497, + "step": 18095 + }, + { + "epoch": 0.02, + "learning_rate": 4.996004851880647e-05, + "loss": 1.0703, + "step": 18096 + }, + { + "epoch": 0.02, + "learning_rate": 4.996004407982608e-05, + "loss": 1.214, + "step": 18097 + }, + { + "epoch": 0.02, + "learning_rate": 4.99600396405993e-05, + "loss": 1.089, + "step": 18098 + }, + { + "epoch": 0.02, + "learning_rate": 4.996003520112613e-05, + "loss": 1.1132, + "step": 18099 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960030761406554e-05, + "loss": 1.1375, + "step": 18100 + }, + { + "epoch": 0.02, + "learning_rate": 4.996002632144059e-05, + "loss": 1.0878, + "step": 18101 + }, + { + "epoch": 0.02, + "learning_rate": 4.996002188122822e-05, + "loss": 0.8723, + "step": 18102 + }, + { + "epoch": 0.02, + "learning_rate": 4.996001744076947e-05, + "loss": 0.9465, + "step": 18103 + }, + { + "epoch": 0.02, + "learning_rate": 4.996001300006432e-05, + "loss": 1.0728, + "step": 18104 + }, + { + "epoch": 0.02, + "learning_rate": 4.996000855911278e-05, + "loss": 1.191, + "step": 18105 + }, + { + "epoch": 0.02, + "learning_rate": 4.996000411791484e-05, + "loss": 1.0594, + "step": 18106 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959999676470503e-05, + "loss": 1.0284, + "step": 18107 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959995234779775e-05, + "loss": 0.7396, + "step": 18108 + }, + { + "epoch": 0.02, + "learning_rate": 4.995999079284265e-05, + "loss": 0.8808, + "step": 18109 + }, + { + "epoch": 0.02, + "learning_rate": 4.995998635065913e-05, + "loss": 0.8918, + "step": 18110 + }, + { + "epoch": 0.02, + "learning_rate": 4.995998190822921e-05, + "loss": 1.0108, + "step": 18111 + }, + { + "epoch": 0.02, + "learning_rate": 4.995997746555291e-05, + "loss": 0.8671, + "step": 18112 + }, + { + "epoch": 0.02, + "learning_rate": 4.99599730226302e-05, + "loss": 0.7689, + "step": 18113 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959968579461116e-05, + "loss": 0.8836, + "step": 18114 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959964136045625e-05, + "loss": 0.9026, + "step": 18115 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959959692383736e-05, + "loss": 1.1428, + "step": 18116 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959955248475455e-05, + "loss": 1.1471, + "step": 18117 + }, + { + "epoch": 0.02, + "learning_rate": 4.995995080432078e-05, + "loss": 1.1529, + "step": 18118 + }, + { + "epoch": 0.02, + "learning_rate": 4.995994635991972e-05, + "loss": 1.0676, + "step": 18119 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959941915272254e-05, + "loss": 1.1613, + "step": 18120 + }, + { + "epoch": 0.02, + "learning_rate": 4.99599374703784e-05, + "loss": 1.4425, + "step": 18121 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959933025238146e-05, + "loss": 1.2366, + "step": 18122 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959928579851514e-05, + "loss": 0.8954, + "step": 18123 + }, + { + "epoch": 0.02, + "learning_rate": 4.995992413421847e-05, + "loss": 0.9595, + "step": 18124 + }, + { + "epoch": 0.02, + "learning_rate": 4.995991968833904e-05, + "loss": 1.1951, + "step": 18125 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959915242213215e-05, + "loss": 0.9885, + "step": 18126 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959910795840996e-05, + "loss": 1.1215, + "step": 18127 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959906349222386e-05, + "loss": 1.0994, + "step": 18128 + }, + { + "epoch": 0.02, + "learning_rate": 4.995990190235738e-05, + "loss": 1.4625, + "step": 18129 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959897455245976e-05, + "loss": 1.1744, + "step": 18130 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959893007888184e-05, + "loss": 1.0358, + "step": 18131 + }, + { + "epoch": 0.02, + "learning_rate": 4.995988856028399e-05, + "loss": 0.9786, + "step": 18132 + }, + { + "epoch": 0.02, + "learning_rate": 4.995988411243342e-05, + "loss": 1.3601, + "step": 18133 + }, + { + "epoch": 0.02, + "learning_rate": 4.995987966433644e-05, + "loss": 1.1878, + "step": 18134 + }, + { + "epoch": 0.02, + "learning_rate": 4.995987521599308e-05, + "loss": 1.212, + "step": 18135 + }, + { + "epoch": 0.02, + "learning_rate": 4.995987076740332e-05, + "loss": 1.003, + "step": 18136 + }, + { + "epoch": 0.02, + "learning_rate": 4.995986631856716e-05, + "loss": 1.0741, + "step": 18137 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959861869484615e-05, + "loss": 1.0701, + "step": 18138 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959857420155675e-05, + "loss": 1.0748, + "step": 18139 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959852970580344e-05, + "loss": 1.1109, + "step": 18140 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959848520758614e-05, + "loss": 0.9822, + "step": 18141 + }, + { + "epoch": 0.02, + "learning_rate": 4.99598440706905e-05, + "loss": 1.013, + "step": 18142 + }, + { + "epoch": 0.02, + "learning_rate": 4.995983962037598e-05, + "loss": 1.1869, + "step": 18143 + }, + { + "epoch": 0.02, + "learning_rate": 4.995983516981508e-05, + "loss": 1.1398, + "step": 18144 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959830719007784e-05, + "loss": 1.3199, + "step": 18145 + }, + { + "epoch": 0.02, + "learning_rate": 4.995982626795409e-05, + "loss": 1.2738, + "step": 18146 + }, + { + "epoch": 0.02, + "learning_rate": 4.995982181665401e-05, + "loss": 1.2449, + "step": 18147 + }, + { + "epoch": 0.02, + "learning_rate": 4.995981736510753e-05, + "loss": 0.9902, + "step": 18148 + }, + { + "epoch": 0.02, + "learning_rate": 4.995981291331466e-05, + "loss": 1.2013, + "step": 18149 + }, + { + "epoch": 0.02, + "learning_rate": 4.995980846127539e-05, + "loss": 1.0507, + "step": 18150 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959804008989744e-05, + "loss": 1.3328, + "step": 18151 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959799556457684e-05, + "loss": 1.3154, + "step": 18152 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959795103679254e-05, + "loss": 1.0196, + "step": 18153 + }, + { + "epoch": 0.02, + "learning_rate": 4.995979065065441e-05, + "loss": 1.1392, + "step": 18154 + }, + { + "epoch": 0.02, + "learning_rate": 4.995978619738319e-05, + "loss": 1.0309, + "step": 18155 + }, + { + "epoch": 0.02, + "learning_rate": 4.995978174386557e-05, + "loss": 1.3264, + "step": 18156 + }, + { + "epoch": 0.02, + "learning_rate": 4.995977729010156e-05, + "loss": 1.0503, + "step": 18157 + }, + { + "epoch": 0.02, + "learning_rate": 4.995977283609116e-05, + "loss": 0.8997, + "step": 18158 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959768381834356e-05, + "loss": 1.0872, + "step": 18159 + }, + { + "epoch": 0.02, + "learning_rate": 4.995976392733117e-05, + "loss": 1.1339, + "step": 18160 + }, + { + "epoch": 0.02, + "learning_rate": 4.995975947258159e-05, + "loss": 1.069, + "step": 18161 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959755017585616e-05, + "loss": 1.1728, + "step": 18162 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959750562343255e-05, + "loss": 1.607, + "step": 18163 + }, + { + "epoch": 0.02, + "learning_rate": 4.99597461068545e-05, + "loss": 0.8533, + "step": 18164 + }, + { + "epoch": 0.02, + "learning_rate": 4.995974165111935e-05, + "loss": 1.2901, + "step": 18165 + }, + { + "epoch": 0.02, + "learning_rate": 4.995973719513781e-05, + "loss": 1.2426, + "step": 18166 + }, + { + "epoch": 0.02, + "learning_rate": 4.995973273890988e-05, + "loss": 1.3435, + "step": 18167 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959728282435546e-05, + "loss": 1.2354, + "step": 18168 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959723825714834e-05, + "loss": 0.6667, + "step": 18169 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959719368747724e-05, + "loss": 1.2161, + "step": 18170 + }, + { + "epoch": 0.02, + "learning_rate": 4.995971491153422e-05, + "loss": 1.2082, + "step": 18171 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959710454074335e-05, + "loss": 1.1846, + "step": 18172 + }, + { + "epoch": 0.02, + "learning_rate": 4.995970599636804e-05, + "loss": 1.3826, + "step": 18173 + }, + { + "epoch": 0.02, + "learning_rate": 4.995970153841537e-05, + "loss": 1.3235, + "step": 18174 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959697080216304e-05, + "loss": 0.7302, + "step": 18175 + }, + { + "epoch": 0.02, + "learning_rate": 4.995969262177085e-05, + "loss": 0.9685, + "step": 18176 + }, + { + "epoch": 0.02, + "learning_rate": 4.995968816307899e-05, + "loss": 1.1002, + "step": 18177 + }, + { + "epoch": 0.02, + "learning_rate": 4.995968370414075e-05, + "loss": 0.9935, + "step": 18178 + }, + { + "epoch": 0.02, + "learning_rate": 4.995967924495611e-05, + "loss": 1.0348, + "step": 18179 + }, + { + "epoch": 0.02, + "learning_rate": 4.995967478552509e-05, + "loss": 1.0308, + "step": 18180 + }, + { + "epoch": 0.02, + "learning_rate": 4.995967032584767e-05, + "loss": 1.0874, + "step": 18181 + }, + { + "epoch": 0.02, + "learning_rate": 4.995966586592387e-05, + "loss": 0.8182, + "step": 18182 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959661405753666e-05, + "loss": 0.6662, + "step": 18183 + }, + { + "epoch": 0.02, + "learning_rate": 4.995965694533707e-05, + "loss": 1.0945, + "step": 18184 + }, + { + "epoch": 0.02, + "learning_rate": 4.99596524846741e-05, + "loss": 1.2955, + "step": 18185 + }, + { + "epoch": 0.02, + "learning_rate": 4.995964802376473e-05, + "loss": 1.2857, + "step": 18186 + }, + { + "epoch": 0.02, + "learning_rate": 4.995964356260896e-05, + "loss": 1.087, + "step": 18187 + }, + { + "epoch": 0.02, + "learning_rate": 4.99596391012068e-05, + "loss": 1.0841, + "step": 18188 + }, + { + "epoch": 0.02, + "learning_rate": 4.995963463955826e-05, + "loss": 1.1765, + "step": 18189 + }, + { + "epoch": 0.02, + "learning_rate": 4.995963017766332e-05, + "loss": 1.2735, + "step": 18190 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959625715522e-05, + "loss": 1.0869, + "step": 18191 + }, + { + "epoch": 0.02, + "learning_rate": 4.995962125313428e-05, + "loss": 1.2522, + "step": 18192 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959616790500165e-05, + "loss": 1.2131, + "step": 18193 + }, + { + "epoch": 0.02, + "learning_rate": 4.995961232761967e-05, + "loss": 1.1952, + "step": 18194 + }, + { + "epoch": 0.02, + "learning_rate": 4.995960786449278e-05, + "loss": 0.8877, + "step": 18195 + }, + { + "epoch": 0.02, + "learning_rate": 4.995960340111949e-05, + "loss": 1.2954, + "step": 18196 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959598937499816e-05, + "loss": 1.1065, + "step": 18197 + }, + { + "epoch": 0.02, + "learning_rate": 4.995959447363375e-05, + "loss": 1.0998, + "step": 18198 + }, + { + "epoch": 0.02, + "learning_rate": 4.99595900095213e-05, + "loss": 0.5373, + "step": 18199 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959585545162454e-05, + "loss": 1.0371, + "step": 18200 + }, + { + "epoch": 0.02, + "learning_rate": 4.995958108055722e-05, + "loss": 0.8803, + "step": 18201 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959576615705596e-05, + "loss": 1.131, + "step": 18202 + }, + { + "epoch": 0.02, + "learning_rate": 4.995957215060758e-05, + "loss": 0.8677, + "step": 18203 + }, + { + "epoch": 0.02, + "learning_rate": 4.995956768526318e-05, + "loss": 1.2961, + "step": 18204 + }, + { + "epoch": 0.02, + "learning_rate": 4.995956321967238e-05, + "loss": 1.0677, + "step": 18205 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959558753835186e-05, + "loss": 1.2602, + "step": 18206 + }, + { + "epoch": 0.02, + "learning_rate": 4.995955428775161e-05, + "loss": 1.3877, + "step": 18207 + }, + { + "epoch": 0.02, + "learning_rate": 4.995954982142165e-05, + "loss": 1.1169, + "step": 18208 + }, + { + "epoch": 0.02, + "learning_rate": 4.995954535484528e-05, + "loss": 0.8725, + "step": 18209 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959540888022536e-05, + "loss": 1.2329, + "step": 18210 + }, + { + "epoch": 0.02, + "learning_rate": 4.99595364209534e-05, + "loss": 1.1446, + "step": 18211 + }, + { + "epoch": 0.02, + "learning_rate": 4.995953195363787e-05, + "loss": 0.7766, + "step": 18212 + }, + { + "epoch": 0.02, + "learning_rate": 4.995952748607595e-05, + "loss": 0.5956, + "step": 18213 + }, + { + "epoch": 0.02, + "learning_rate": 4.995952301826765e-05, + "loss": 0.3396, + "step": 18214 + }, + { + "epoch": 0.02, + "learning_rate": 4.995951855021295e-05, + "loss": 0.5136, + "step": 18215 + }, + { + "epoch": 0.02, + "learning_rate": 4.995951408191186e-05, + "loss": 1.4237, + "step": 18216 + }, + { + "epoch": 0.02, + "learning_rate": 4.995950961336439e-05, + "loss": 1.7097, + "step": 18217 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959505144570515e-05, + "loss": 1.4086, + "step": 18218 + }, + { + "epoch": 0.02, + "learning_rate": 4.995950067553026e-05, + "loss": 1.1448, + "step": 18219 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959496206243616e-05, + "loss": 0.9727, + "step": 18220 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959491736710576e-05, + "loss": 1.3568, + "step": 18221 + }, + { + "epoch": 0.02, + "learning_rate": 4.995948726693115e-05, + "loss": 1.5732, + "step": 18222 + }, + { + "epoch": 0.02, + "learning_rate": 4.995948279690534e-05, + "loss": 1.6105, + "step": 18223 + }, + { + "epoch": 0.02, + "learning_rate": 4.995947832663314e-05, + "loss": 1.3131, + "step": 18224 + }, + { + "epoch": 0.02, + "learning_rate": 4.995947385611454e-05, + "loss": 0.9889, + "step": 18225 + }, + { + "epoch": 0.02, + "learning_rate": 4.995946938534955e-05, + "loss": 0.9401, + "step": 18226 + }, + { + "epoch": 0.02, + "learning_rate": 4.995946491433818e-05, + "loss": 1.1841, + "step": 18227 + }, + { + "epoch": 0.02, + "learning_rate": 4.995946044308042e-05, + "loss": 1.1538, + "step": 18228 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959455971576266e-05, + "loss": 0.924, + "step": 18229 + }, + { + "epoch": 0.02, + "learning_rate": 4.995945149982573e-05, + "loss": 0.9382, + "step": 18230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959447027828797e-05, + "loss": 1.1172, + "step": 18231 + }, + { + "epoch": 0.02, + "learning_rate": 4.995944255558548e-05, + "loss": 1.0739, + "step": 18232 + }, + { + "epoch": 0.02, + "learning_rate": 4.995943808309577e-05, + "loss": 1.1412, + "step": 18233 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959433610359675e-05, + "loss": 1.149, + "step": 18234 + }, + { + "epoch": 0.02, + "learning_rate": 4.995942913737719e-05, + "loss": 0.8393, + "step": 18235 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959424664148316e-05, + "loss": 1.2145, + "step": 18236 + }, + { + "epoch": 0.02, + "learning_rate": 4.995942019067305e-05, + "loss": 1.0294, + "step": 18237 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959415716951396e-05, + "loss": 1.3151, + "step": 18238 + }, + { + "epoch": 0.02, + "learning_rate": 4.995941124298336e-05, + "loss": 1.1017, + "step": 18239 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959406768768924e-05, + "loss": 1.1733, + "step": 18240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99594022943081e-05, + "loss": 1.1012, + "step": 18241 + }, + { + "epoch": 0.02, + "learning_rate": 4.99593978196009e-05, + "loss": 1.0657, + "step": 18242 + }, + { + "epoch": 0.02, + "learning_rate": 4.99593933446473e-05, + "loss": 0.996, + "step": 18243 + }, + { + "epoch": 0.02, + "learning_rate": 4.995938886944731e-05, + "loss": 1.5859, + "step": 18244 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959384394000944e-05, + "loss": 2.5835, + "step": 18245 + }, + { + "epoch": 0.02, + "learning_rate": 4.995937991830817e-05, + "loss": 1.9406, + "step": 18246 + }, + { + "epoch": 0.02, + "learning_rate": 4.995937544236903e-05, + "loss": 1.8655, + "step": 18247 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959370966183486e-05, + "loss": 0.9834, + "step": 18248 + }, + { + "epoch": 0.02, + "learning_rate": 4.995936648975156e-05, + "loss": 1.2702, + "step": 18249 + }, + { + "epoch": 0.02, + "learning_rate": 4.995936201307324e-05, + "loss": 1.4034, + "step": 18250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959357536148536e-05, + "loss": 1.0864, + "step": 18251 + }, + { + "epoch": 0.02, + "learning_rate": 4.995935305897744e-05, + "loss": 0.9772, + "step": 18252 + }, + { + "epoch": 0.02, + "learning_rate": 4.995934858155996e-05, + "loss": 0.6886, + "step": 18253 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959344103896094e-05, + "loss": 1.026, + "step": 18254 + }, + { + "epoch": 0.02, + "learning_rate": 4.995933962598584e-05, + "loss": 1.0624, + "step": 18255 + }, + { + "epoch": 0.02, + "learning_rate": 4.995933514782919e-05, + "loss": 1.1887, + "step": 18256 + }, + { + "epoch": 0.02, + "learning_rate": 4.995933066942616e-05, + "loss": 1.3504, + "step": 18257 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959326190776735e-05, + "loss": 0.7912, + "step": 18258 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959321711880924e-05, + "loss": 1.21, + "step": 18259 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959317232738736e-05, + "loss": 1.2832, + "step": 18260 + }, + { + "epoch": 0.02, + "learning_rate": 4.995931275335015e-05, + "loss": 0.7327, + "step": 18261 + }, + { + "epoch": 0.02, + "learning_rate": 4.995930827371517e-05, + "loss": 0.9154, + "step": 18262 + }, + { + "epoch": 0.02, + "learning_rate": 4.995930379383381e-05, + "loss": 1.2694, + "step": 18263 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959299313706064e-05, + "loss": 0.9726, + "step": 18264 + }, + { + "epoch": 0.02, + "learning_rate": 4.995929483333193e-05, + "loss": 1.0436, + "step": 18265 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959290352711404e-05, + "loss": 0.9869, + "step": 18266 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959285871844494e-05, + "loss": 1.1556, + "step": 18267 + }, + { + "epoch": 0.02, + "learning_rate": 4.995928139073119e-05, + "loss": 1.0325, + "step": 18268 + }, + { + "epoch": 0.02, + "learning_rate": 4.995927690937151e-05, + "loss": 1.0499, + "step": 18269 + }, + { + "epoch": 0.02, + "learning_rate": 4.995927242776543e-05, + "loss": 1.1624, + "step": 18270 + }, + { + "epoch": 0.02, + "learning_rate": 4.995926794591297e-05, + "loss": 1.084, + "step": 18271 + }, + { + "epoch": 0.02, + "learning_rate": 4.995926346381412e-05, + "loss": 1.0882, + "step": 18272 + }, + { + "epoch": 0.02, + "learning_rate": 4.995925898146889e-05, + "loss": 1.183, + "step": 18273 + }, + { + "epoch": 0.02, + "learning_rate": 4.995925449887726e-05, + "loss": 1.0154, + "step": 18274 + }, + { + "epoch": 0.02, + "learning_rate": 4.995925001603926e-05, + "loss": 1.0948, + "step": 18275 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959245532954856e-05, + "loss": 1.0534, + "step": 18276 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959241049624076e-05, + "loss": 0.9524, + "step": 18277 + }, + { + "epoch": 0.02, + "learning_rate": 4.99592365660469e-05, + "loss": 0.6684, + "step": 18278 + }, + { + "epoch": 0.02, + "learning_rate": 4.995923208222334e-05, + "loss": 1.1587, + "step": 18279 + }, + { + "epoch": 0.02, + "learning_rate": 4.995922759815339e-05, + "loss": 0.8138, + "step": 18280 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959223113837065e-05, + "loss": 0.951, + "step": 18281 + }, + { + "epoch": 0.02, + "learning_rate": 4.995921862927434e-05, + "loss": 1.586, + "step": 18282 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959214144465236e-05, + "loss": 0.6946, + "step": 18283 + }, + { + "epoch": 0.02, + "learning_rate": 4.995920965940975e-05, + "loss": 0.8482, + "step": 18284 + }, + { + "epoch": 0.02, + "learning_rate": 4.995920517410786e-05, + "loss": 0.8691, + "step": 18285 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959200688559596e-05, + "loss": 0.9515, + "step": 18286 + }, + { + "epoch": 0.02, + "learning_rate": 4.995919620276493e-05, + "loss": 1.0254, + "step": 18287 + }, + { + "epoch": 0.02, + "learning_rate": 4.99591917167239e-05, + "loss": 1.084, + "step": 18288 + }, + { + "epoch": 0.02, + "learning_rate": 4.995918723043647e-05, + "loss": 1.1622, + "step": 18289 + }, + { + "epoch": 0.02, + "learning_rate": 4.995918274390265e-05, + "loss": 1.2231, + "step": 18290 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959178257122456e-05, + "loss": 1.2836, + "step": 18291 + }, + { + "epoch": 0.02, + "learning_rate": 4.995917377009587e-05, + "loss": 1.1115, + "step": 18292 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959169282822895e-05, + "loss": 1.2112, + "step": 18293 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959164795303536e-05, + "loss": 1.2341, + "step": 18294 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959160307537787e-05, + "loss": 1.1189, + "step": 18295 + }, + { + "epoch": 0.02, + "learning_rate": 4.995915581952566e-05, + "loss": 1.2342, + "step": 18296 + }, + { + "epoch": 0.02, + "learning_rate": 4.995915133126714e-05, + "loss": 1.0963, + "step": 18297 + }, + { + "epoch": 0.02, + "learning_rate": 4.995914684276224e-05, + "loss": 1.3317, + "step": 18298 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959142354010945e-05, + "loss": 1.0509, + "step": 18299 + }, + { + "epoch": 0.02, + "learning_rate": 4.995913786501327e-05, + "loss": 1.0397, + "step": 18300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959133375769204e-05, + "loss": 1.0481, + "step": 18301 + }, + { + "epoch": 0.02, + "learning_rate": 4.995912888627875e-05, + "loss": 0.4182, + "step": 18302 + }, + { + "epoch": 0.02, + "learning_rate": 4.995912439654192e-05, + "loss": 0.3065, + "step": 18303 + }, + { + "epoch": 0.02, + "learning_rate": 4.99591199065587e-05, + "loss": 0.2723, + "step": 18304 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959115416329096e-05, + "loss": 0.237, + "step": 18305 + }, + { + "epoch": 0.02, + "learning_rate": 4.99591109258531e-05, + "loss": 0.1672, + "step": 18306 + }, + { + "epoch": 0.02, + "learning_rate": 4.995910643513072e-05, + "loss": 0.1905, + "step": 18307 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959101944161955e-05, + "loss": 0.183, + "step": 18308 + }, + { + "epoch": 0.02, + "learning_rate": 4.99590974529468e-05, + "loss": 0.1746, + "step": 18309 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959092961485264e-05, + "loss": 0.1514, + "step": 18310 + }, + { + "epoch": 0.02, + "learning_rate": 4.995908846977735e-05, + "loss": 0.1341, + "step": 18311 + }, + { + "epoch": 0.02, + "learning_rate": 4.995908397782304e-05, + "loss": 0.1512, + "step": 18312 + }, + { + "epoch": 0.02, + "learning_rate": 4.995907948562234e-05, + "loss": 0.1605, + "step": 18313 + }, + { + "epoch": 0.02, + "learning_rate": 4.995907499317527e-05, + "loss": 0.1645, + "step": 18314 + }, + { + "epoch": 0.02, + "learning_rate": 4.99590705004818e-05, + "loss": 0.1523, + "step": 18315 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959066007541955e-05, + "loss": 0.2021, + "step": 18316 + }, + { + "epoch": 0.02, + "learning_rate": 4.995906151435572e-05, + "loss": 0.6824, + "step": 18317 + }, + { + "epoch": 0.02, + "learning_rate": 4.99590570209231e-05, + "loss": 0.9813, + "step": 18318 + }, + { + "epoch": 0.02, + "learning_rate": 4.995905252724409e-05, + "loss": 1.1523, + "step": 18319 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959048033318704e-05, + "loss": 1.2604, + "step": 18320 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959043539146925e-05, + "loss": 1.0231, + "step": 18321 + }, + { + "epoch": 0.02, + "learning_rate": 4.995903904472877e-05, + "loss": 0.8977, + "step": 18322 + }, + { + "epoch": 0.02, + "learning_rate": 4.995903455006422e-05, + "loss": 1.2779, + "step": 18323 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959030055153286e-05, + "loss": 0.9602, + "step": 18324 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959025559995974e-05, + "loss": 1.1869, + "step": 18325 + }, + { + "epoch": 0.02, + "learning_rate": 4.995902106459227e-05, + "loss": 1.342, + "step": 18326 + }, + { + "epoch": 0.02, + "learning_rate": 4.995901656894218e-05, + "loss": 0.9983, + "step": 18327 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959012073045716e-05, + "loss": 1.0568, + "step": 18328 + }, + { + "epoch": 0.02, + "learning_rate": 4.995900757690286e-05, + "loss": 1.0632, + "step": 18329 + }, + { + "epoch": 0.02, + "learning_rate": 4.995900308051362e-05, + "loss": 1.1853, + "step": 18330 + }, + { + "epoch": 0.02, + "learning_rate": 4.995899858387799e-05, + "loss": 1.0868, + "step": 18331 + }, + { + "epoch": 0.02, + "learning_rate": 4.995899408699598e-05, + "loss": 0.9986, + "step": 18332 + }, + { + "epoch": 0.02, + "learning_rate": 4.995898958986759e-05, + "loss": 1.3138, + "step": 18333 + }, + { + "epoch": 0.02, + "learning_rate": 4.995898509249281e-05, + "loss": 0.8306, + "step": 18334 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958980594871645e-05, + "loss": 1.1192, + "step": 18335 + }, + { + "epoch": 0.02, + "learning_rate": 4.99589760970041e-05, + "loss": 0.8943, + "step": 18336 + }, + { + "epoch": 0.02, + "learning_rate": 4.995897159889016e-05, + "loss": 1.035, + "step": 18337 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958967100529845e-05, + "loss": 0.6405, + "step": 18338 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958962601923144e-05, + "loss": 1.4876, + "step": 18339 + }, + { + "epoch": 0.02, + "learning_rate": 4.995895810307006e-05, + "loss": 1.0796, + "step": 18340 + }, + { + "epoch": 0.02, + "learning_rate": 4.995895360397059e-05, + "loss": 0.8686, + "step": 18341 + }, + { + "epoch": 0.02, + "learning_rate": 4.995894910462473e-05, + "loss": 0.8334, + "step": 18342 + }, + { + "epoch": 0.02, + "learning_rate": 4.995894460503249e-05, + "loss": 1.1744, + "step": 18343 + }, + { + "epoch": 0.02, + "learning_rate": 4.995894010519387e-05, + "loss": 0.961, + "step": 18344 + }, + { + "epoch": 0.02, + "learning_rate": 4.995893560510886e-05, + "loss": 0.956, + "step": 18345 + }, + { + "epoch": 0.02, + "learning_rate": 4.995893110477747e-05, + "loss": 1.0698, + "step": 18346 + }, + { + "epoch": 0.02, + "learning_rate": 4.99589266041997e-05, + "loss": 1.0129, + "step": 18347 + }, + { + "epoch": 0.02, + "learning_rate": 4.995892210337554e-05, + "loss": 1.1725, + "step": 18348 + }, + { + "epoch": 0.02, + "learning_rate": 4.995891760230499e-05, + "loss": 1.346, + "step": 18349 + }, + { + "epoch": 0.02, + "learning_rate": 4.995891310098807e-05, + "loss": 1.2322, + "step": 18350 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958908599424755e-05, + "loss": 1.1883, + "step": 18351 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958904097615056e-05, + "loss": 1.0649, + "step": 18352 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958899595558987e-05, + "loss": 1.2905, + "step": 18353 + }, + { + "epoch": 0.02, + "learning_rate": 4.995889509325652e-05, + "loss": 1.1603, + "step": 18354 + }, + { + "epoch": 0.02, + "learning_rate": 4.995889059070767e-05, + "loss": 1.1534, + "step": 18355 + }, + { + "epoch": 0.02, + "learning_rate": 4.995888608791244e-05, + "loss": 1.0142, + "step": 18356 + }, + { + "epoch": 0.02, + "learning_rate": 4.995888158487083e-05, + "loss": 1.147, + "step": 18357 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958877081582836e-05, + "loss": 1.2824, + "step": 18358 + }, + { + "epoch": 0.02, + "learning_rate": 4.995887257804845e-05, + "loss": 1.151, + "step": 18359 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958868074267686e-05, + "loss": 0.9651, + "step": 18360 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958863570240536e-05, + "loss": 0.9157, + "step": 18361 + }, + { + "epoch": 0.02, + "learning_rate": 4.995885906596701e-05, + "loss": 1.0149, + "step": 18362 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958854561447096e-05, + "loss": 1.0436, + "step": 18363 + }, + { + "epoch": 0.02, + "learning_rate": 4.995885005668079e-05, + "loss": 1.2484, + "step": 18364 + }, + { + "epoch": 0.02, + "learning_rate": 4.995884555166812e-05, + "loss": 1.1382, + "step": 18365 + }, + { + "epoch": 0.02, + "learning_rate": 4.995884104640905e-05, + "loss": 1.1968, + "step": 18366 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958836540903604e-05, + "loss": 1.0643, + "step": 18367 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958832035151773e-05, + "loss": 1.0081, + "step": 18368 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958827529153565e-05, + "loss": 0.8702, + "step": 18369 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958823022908965e-05, + "loss": 1.1349, + "step": 18370 + }, + { + "epoch": 0.02, + "learning_rate": 4.995881851641798e-05, + "loss": 1.021, + "step": 18371 + }, + { + "epoch": 0.02, + "learning_rate": 4.995881400968062e-05, + "loss": 1.4183, + "step": 18372 + }, + { + "epoch": 0.02, + "learning_rate": 4.995880950269688e-05, + "loss": 1.3256, + "step": 18373 + }, + { + "epoch": 0.02, + "learning_rate": 4.995880499546675e-05, + "loss": 1.1291, + "step": 18374 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958800487990236e-05, + "loss": 1.2474, + "step": 18375 + }, + { + "epoch": 0.02, + "learning_rate": 4.995879598026734e-05, + "loss": 1.0276, + "step": 18376 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958791472298066e-05, + "loss": 0.9539, + "step": 18377 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958786964082406e-05, + "loss": 1.1405, + "step": 18378 + }, + { + "epoch": 0.02, + "learning_rate": 4.995878245562037e-05, + "loss": 1.0747, + "step": 18379 + }, + { + "epoch": 0.02, + "learning_rate": 4.995877794691194e-05, + "loss": 1.002, + "step": 18380 + }, + { + "epoch": 0.02, + "learning_rate": 4.995877343795714e-05, + "loss": 0.9923, + "step": 18381 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958768928755946e-05, + "loss": 0.2467, + "step": 18382 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958764419308375e-05, + "loss": 1.0074, + "step": 18383 + }, + { + "epoch": 0.02, + "learning_rate": 4.995875990961442e-05, + "loss": 1.134, + "step": 18384 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958755399674086e-05, + "loss": 1.0755, + "step": 18385 + }, + { + "epoch": 0.02, + "learning_rate": 4.995875088948737e-05, + "loss": 1.3666, + "step": 18386 + }, + { + "epoch": 0.02, + "learning_rate": 4.995874637905426e-05, + "loss": 0.9621, + "step": 18387 + }, + { + "epoch": 0.02, + "learning_rate": 4.995874186837478e-05, + "loss": 1.1432, + "step": 18388 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958737357448915e-05, + "loss": 1.055, + "step": 18389 + }, + { + "epoch": 0.02, + "learning_rate": 4.995873284627667e-05, + "loss": 1.0031, + "step": 18390 + }, + { + "epoch": 0.02, + "learning_rate": 4.995872833485804e-05, + "loss": 1.2682, + "step": 18391 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958723823193025e-05, + "loss": 1.0962, + "step": 18392 + }, + { + "epoch": 0.02, + "learning_rate": 4.995871931128163e-05, + "loss": 1.1938, + "step": 18393 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958714799123854e-05, + "loss": 1.3822, + "step": 18394 + }, + { + "epoch": 0.02, + "learning_rate": 4.99587102867197e-05, + "loss": 1.0939, + "step": 18395 + }, + { + "epoch": 0.02, + "learning_rate": 4.995870577406916e-05, + "loss": 1.2194, + "step": 18396 + }, + { + "epoch": 0.02, + "learning_rate": 4.995870126117224e-05, + "loss": 1.2812, + "step": 18397 + }, + { + "epoch": 0.02, + "learning_rate": 4.995869674802893e-05, + "loss": 1.0675, + "step": 18398 + }, + { + "epoch": 0.02, + "learning_rate": 4.995869223463925e-05, + "loss": 0.4616, + "step": 18399 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958687721003184e-05, + "loss": 1.031, + "step": 18400 + }, + { + "epoch": 0.02, + "learning_rate": 4.995868320712074e-05, + "loss": 1.1872, + "step": 18401 + }, + { + "epoch": 0.02, + "learning_rate": 4.99586786929919e-05, + "loss": 1.2939, + "step": 18402 + }, + { + "epoch": 0.02, + "learning_rate": 4.995867417861669e-05, + "loss": 1.1857, + "step": 18403 + }, + { + "epoch": 0.02, + "learning_rate": 4.99586696639951e-05, + "loss": 1.2095, + "step": 18404 + }, + { + "epoch": 0.02, + "learning_rate": 4.995866514912713e-05, + "loss": 1.1288, + "step": 18405 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958660634012764e-05, + "loss": 1.3285, + "step": 18406 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958656118652036e-05, + "loss": 1.2138, + "step": 18407 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958651603044916e-05, + "loss": 1.2384, + "step": 18408 + }, + { + "epoch": 0.02, + "learning_rate": 4.995864708719141e-05, + "loss": 0.8081, + "step": 18409 + }, + { + "epoch": 0.02, + "learning_rate": 4.995864257109153e-05, + "loss": 1.1014, + "step": 18410 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958638054745275e-05, + "loss": 1.2486, + "step": 18411 + }, + { + "epoch": 0.02, + "learning_rate": 4.995863353815263e-05, + "loss": 1.0324, + "step": 18412 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958629021313604e-05, + "loss": 1.1035, + "step": 18413 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958624504228195e-05, + "loss": 1.2168, + "step": 18414 + }, + { + "epoch": 0.02, + "learning_rate": 4.995861998689641e-05, + "loss": 1.0017, + "step": 18415 + }, + { + "epoch": 0.02, + "learning_rate": 4.995861546931824e-05, + "loss": 1.166, + "step": 18416 + }, + { + "epoch": 0.02, + "learning_rate": 4.995861095149369e-05, + "loss": 1.0983, + "step": 18417 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958606433422764e-05, + "loss": 1.0443, + "step": 18418 + }, + { + "epoch": 0.02, + "learning_rate": 4.995860191510545e-05, + "loss": 1.058, + "step": 18419 + }, + { + "epoch": 0.02, + "learning_rate": 4.995859739654176e-05, + "loss": 0.9717, + "step": 18420 + }, + { + "epoch": 0.02, + "learning_rate": 4.995859287773169e-05, + "loss": 1.1819, + "step": 18421 + }, + { + "epoch": 0.02, + "learning_rate": 4.995858835867524e-05, + "loss": 1.4359, + "step": 18422 + }, + { + "epoch": 0.02, + "learning_rate": 4.995858383937241e-05, + "loss": 1.3861, + "step": 18423 + }, + { + "epoch": 0.02, + "learning_rate": 4.99585793198232e-05, + "loss": 1.0926, + "step": 18424 + }, + { + "epoch": 0.02, + "learning_rate": 4.99585748000276e-05, + "loss": 0.8776, + "step": 18425 + }, + { + "epoch": 0.02, + "learning_rate": 4.995857027998562e-05, + "loss": 2.9384, + "step": 18426 + }, + { + "epoch": 0.02, + "learning_rate": 4.995856575969727e-05, + "loss": 5.9024, + "step": 18427 + }, + { + "epoch": 0.02, + "learning_rate": 4.995856123916253e-05, + "loss": 2.3389, + "step": 18428 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958556718381415e-05, + "loss": 0.8945, + "step": 18429 + }, + { + "epoch": 0.02, + "learning_rate": 4.995855219735392e-05, + "loss": 1.2027, + "step": 18430 + }, + { + "epoch": 0.02, + "learning_rate": 4.995854767608004e-05, + "loss": 0.9737, + "step": 18431 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958543154559786e-05, + "loss": 1.1853, + "step": 18432 + }, + { + "epoch": 0.02, + "learning_rate": 4.995853863279315e-05, + "loss": 1.2295, + "step": 18433 + }, + { + "epoch": 0.02, + "learning_rate": 4.995853411078013e-05, + "loss": 0.9874, + "step": 18434 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958529588520734e-05, + "loss": 1.3611, + "step": 18435 + }, + { + "epoch": 0.02, + "learning_rate": 4.995852506601496e-05, + "loss": 1.277, + "step": 18436 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958520543262796e-05, + "loss": 0.8831, + "step": 18437 + }, + { + "epoch": 0.02, + "learning_rate": 4.995851602026426e-05, + "loss": 1.3356, + "step": 18438 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958511497019346e-05, + "loss": 1.6394, + "step": 18439 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958506973528044e-05, + "loss": 1.4243, + "step": 18440 + }, + { + "epoch": 0.02, + "learning_rate": 4.995850244979037e-05, + "loss": 1.596, + "step": 18441 + }, + { + "epoch": 0.02, + "learning_rate": 4.995849792580631e-05, + "loss": 1.2079, + "step": 18442 + }, + { + "epoch": 0.02, + "learning_rate": 4.995849340157587e-05, + "loss": 1.1491, + "step": 18443 + }, + { + "epoch": 0.02, + "learning_rate": 4.995848887709905e-05, + "loss": 0.8432, + "step": 18444 + }, + { + "epoch": 0.02, + "learning_rate": 4.995848435237586e-05, + "loss": 1.4494, + "step": 18445 + }, + { + "epoch": 0.02, + "learning_rate": 4.995847982740628e-05, + "loss": 1.207, + "step": 18446 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958475302190325e-05, + "loss": 1.0892, + "step": 18447 + }, + { + "epoch": 0.02, + "learning_rate": 4.995847077672799e-05, + "loss": 1.853, + "step": 18448 + }, + { + "epoch": 0.02, + "learning_rate": 4.995846625101928e-05, + "loss": 0.9565, + "step": 18449 + }, + { + "epoch": 0.02, + "learning_rate": 4.995846172506418e-05, + "loss": 0.8053, + "step": 18450 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958457198862704e-05, + "loss": 0.6888, + "step": 18451 + }, + { + "epoch": 0.02, + "learning_rate": 4.995845267241486e-05, + "loss": 0.6319, + "step": 18452 + }, + { + "epoch": 0.02, + "learning_rate": 4.995844814572062e-05, + "loss": 1.2801, + "step": 18453 + }, + { + "epoch": 0.02, + "learning_rate": 4.995844361878001e-05, + "loss": 1.1611, + "step": 18454 + }, + { + "epoch": 0.02, + "learning_rate": 4.995843909159302e-05, + "loss": 1.163, + "step": 18455 + }, + { + "epoch": 0.02, + "learning_rate": 4.995843456415965e-05, + "loss": 1.3396, + "step": 18456 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958430036479894e-05, + "loss": 0.9252, + "step": 18457 + }, + { + "epoch": 0.02, + "learning_rate": 4.995842550855377e-05, + "loss": 1.6883, + "step": 18458 + }, + { + "epoch": 0.02, + "learning_rate": 4.995842098038126e-05, + "loss": 0.9914, + "step": 18459 + }, + { + "epoch": 0.02, + "learning_rate": 4.995841645196238e-05, + "loss": 0.8411, + "step": 18460 + }, + { + "epoch": 0.02, + "learning_rate": 4.995841192329711e-05, + "loss": 0.975, + "step": 18461 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958407394385464e-05, + "loss": 1.2624, + "step": 18462 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958402865227443e-05, + "loss": 1.0267, + "step": 18463 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958398335823045e-05, + "loss": 0.9703, + "step": 18464 + }, + { + "epoch": 0.02, + "learning_rate": 4.995839380617226e-05, + "loss": 1.0218, + "step": 18465 + }, + { + "epoch": 0.02, + "learning_rate": 4.99583892762751e-05, + "loss": 1.4141, + "step": 18466 + }, + { + "epoch": 0.02, + "learning_rate": 4.995838474613156e-05, + "loss": 0.9899, + "step": 18467 + }, + { + "epoch": 0.02, + "learning_rate": 4.995838021574165e-05, + "loss": 1.2778, + "step": 18468 + }, + { + "epoch": 0.02, + "learning_rate": 4.995837568510535e-05, + "loss": 1.2015, + "step": 18469 + }, + { + "epoch": 0.02, + "learning_rate": 4.995837115422267e-05, + "loss": 0.8757, + "step": 18470 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958366623093624e-05, + "loss": 0.9858, + "step": 18471 + }, + { + "epoch": 0.02, + "learning_rate": 4.995836209171819e-05, + "loss": 1.0113, + "step": 18472 + }, + { + "epoch": 0.02, + "learning_rate": 4.995835756009638e-05, + "loss": 1.0319, + "step": 18473 + }, + { + "epoch": 0.02, + "learning_rate": 4.99583530282282e-05, + "loss": 1.15, + "step": 18474 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958348496113625e-05, + "loss": 1.1155, + "step": 18475 + }, + { + "epoch": 0.02, + "learning_rate": 4.995834396375269e-05, + "loss": 1.098, + "step": 18476 + }, + { + "epoch": 0.02, + "learning_rate": 4.995833943114536e-05, + "loss": 1.2586, + "step": 18477 + }, + { + "epoch": 0.02, + "learning_rate": 4.995833489829166e-05, + "loss": 0.9165, + "step": 18478 + }, + { + "epoch": 0.02, + "learning_rate": 4.995833036519158e-05, + "loss": 1.2261, + "step": 18479 + }, + { + "epoch": 0.02, + "learning_rate": 4.995832583184512e-05, + "loss": 1.1286, + "step": 18480 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958321298252284e-05, + "loss": 1.201, + "step": 18481 + }, + { + "epoch": 0.02, + "learning_rate": 4.995831676441307e-05, + "loss": 1.0331, + "step": 18482 + }, + { + "epoch": 0.02, + "learning_rate": 4.995831223032748e-05, + "loss": 1.3341, + "step": 18483 + }, + { + "epoch": 0.02, + "learning_rate": 4.995830769599551e-05, + "loss": 1.3514, + "step": 18484 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958303161417165e-05, + "loss": 1.1229, + "step": 18485 + }, + { + "epoch": 0.02, + "learning_rate": 4.995829862659244e-05, + "loss": 0.8268, + "step": 18486 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958294091521343e-05, + "loss": 0.9977, + "step": 18487 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958289556203855e-05, + "loss": 1.1368, + "step": 18488 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958285020639995e-05, + "loss": 1.09, + "step": 18489 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958280484829764e-05, + "loss": 1.0654, + "step": 18490 + }, + { + "epoch": 0.02, + "learning_rate": 4.995827594877315e-05, + "loss": 0.9309, + "step": 18491 + }, + { + "epoch": 0.02, + "learning_rate": 4.995827141247016e-05, + "loss": 1.1166, + "step": 18492 + }, + { + "epoch": 0.02, + "learning_rate": 4.995826687592079e-05, + "loss": 0.9025, + "step": 18493 + }, + { + "epoch": 0.02, + "learning_rate": 4.995826233912504e-05, + "loss": 1.0814, + "step": 18494 + }, + { + "epoch": 0.02, + "learning_rate": 4.995825780208292e-05, + "loss": 1.0962, + "step": 18495 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958253264794416e-05, + "loss": 1.0873, + "step": 18496 + }, + { + "epoch": 0.02, + "learning_rate": 4.995824872725954e-05, + "loss": 0.961, + "step": 18497 + }, + { + "epoch": 0.02, + "learning_rate": 4.995824418947828e-05, + "loss": 1.0476, + "step": 18498 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958239651450647e-05, + "loss": 0.979, + "step": 18499 + }, + { + "epoch": 0.02, + "learning_rate": 4.995823511317664e-05, + "loss": 1.1472, + "step": 18500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0176509618759155, + "eval_runtime": 83.3027, + "eval_samples_per_second": 16.626, + "eval_steps_per_second": 4.166, + "step": 18500 + }, + { + "epoch": 0.02, + "learning_rate": 4.995823057465625e-05, + "loss": 1.0562, + "step": 18501 + }, + { + "epoch": 0.02, + "learning_rate": 4.995822603588949e-05, + "loss": 1.1454, + "step": 18502 + }, + { + "epoch": 0.02, + "learning_rate": 4.995822149687635e-05, + "loss": 0.8597, + "step": 18503 + }, + { + "epoch": 0.02, + "learning_rate": 4.995821695761682e-05, + "loss": 0.9547, + "step": 18504 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958212418110925e-05, + "loss": 1.1587, + "step": 18505 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958207878358656e-05, + "loss": 1.0525, + "step": 18506 + }, + { + "epoch": 0.02, + "learning_rate": 4.995820333836001e-05, + "loss": 1.1954, + "step": 18507 + }, + { + "epoch": 0.02, + "learning_rate": 4.995819879811498e-05, + "loss": 1.0153, + "step": 18508 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958194257623576e-05, + "loss": 1.2558, + "step": 18509 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958189716885795e-05, + "loss": 0.6076, + "step": 18510 + }, + { + "epoch": 0.02, + "learning_rate": 4.995818517590164e-05, + "loss": 1.058, + "step": 18511 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958180634671105e-05, + "loss": 1.1497, + "step": 18512 + }, + { + "epoch": 0.02, + "learning_rate": 4.99581760931942e-05, + "loss": 1.3763, + "step": 18513 + }, + { + "epoch": 0.02, + "learning_rate": 4.995817155147091e-05, + "loss": 1.1538, + "step": 18514 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958167009501245e-05, + "loss": 1.1228, + "step": 18515 + }, + { + "epoch": 0.02, + "learning_rate": 4.995816246728521e-05, + "loss": 1.0067, + "step": 18516 + }, + { + "epoch": 0.02, + "learning_rate": 4.995815792482279e-05, + "loss": 0.9833, + "step": 18517 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958153382113995e-05, + "loss": 0.8629, + "step": 18518 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958148839158824e-05, + "loss": 1.029, + "step": 18519 + }, + { + "epoch": 0.02, + "learning_rate": 4.995814429595728e-05, + "loss": 1.0649, + "step": 18520 + }, + { + "epoch": 0.02, + "learning_rate": 4.995813975250936e-05, + "loss": 0.7171, + "step": 18521 + }, + { + "epoch": 0.02, + "learning_rate": 4.995813520881506e-05, + "loss": 0.9066, + "step": 18522 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958130664874386e-05, + "loss": 0.897, + "step": 18523 + }, + { + "epoch": 0.02, + "learning_rate": 4.995812612068734e-05, + "loss": 1.1758, + "step": 18524 + }, + { + "epoch": 0.02, + "learning_rate": 4.995812157625391e-05, + "loss": 1.0875, + "step": 18525 + }, + { + "epoch": 0.02, + "learning_rate": 4.995811703157411e-05, + "loss": 0.9316, + "step": 18526 + }, + { + "epoch": 0.02, + "learning_rate": 4.995811248664793e-05, + "loss": 1.3038, + "step": 18527 + }, + { + "epoch": 0.02, + "learning_rate": 4.995810794147538e-05, + "loss": 1.2434, + "step": 18528 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958103396056445e-05, + "loss": 1.3386, + "step": 18529 + }, + { + "epoch": 0.02, + "learning_rate": 4.995809885039114e-05, + "loss": 1.028, + "step": 18530 + }, + { + "epoch": 0.02, + "learning_rate": 4.995809430447945e-05, + "loss": 0.5326, + "step": 18531 + }, + { + "epoch": 0.02, + "learning_rate": 4.99580897583214e-05, + "loss": 0.5613, + "step": 18532 + }, + { + "epoch": 0.02, + "learning_rate": 4.995808521191696e-05, + "loss": 0.8667, + "step": 18533 + }, + { + "epoch": 0.02, + "learning_rate": 4.995808066526615e-05, + "loss": 0.9885, + "step": 18534 + }, + { + "epoch": 0.02, + "learning_rate": 4.995807611836897e-05, + "loss": 1.0232, + "step": 18535 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958071571225405e-05, + "loss": 1.4324, + "step": 18536 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958067023835475e-05, + "loss": 1.2016, + "step": 18537 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958062476199166e-05, + "loss": 1.1257, + "step": 18538 + }, + { + "epoch": 0.02, + "learning_rate": 4.995805792831647e-05, + "loss": 0.7357, + "step": 18539 + }, + { + "epoch": 0.02, + "learning_rate": 4.995805338018741e-05, + "loss": 0.7083, + "step": 18540 + }, + { + "epoch": 0.02, + "learning_rate": 4.995804883181198e-05, + "loss": 0.8057, + "step": 18541 + }, + { + "epoch": 0.02, + "learning_rate": 4.995804428319016e-05, + "loss": 1.1432, + "step": 18542 + }, + { + "epoch": 0.02, + "learning_rate": 4.995803973432197e-05, + "loss": 1.3129, + "step": 18543 + }, + { + "epoch": 0.02, + "learning_rate": 4.995803518520741e-05, + "loss": 1.2044, + "step": 18544 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958030635846465e-05, + "loss": 1.7206, + "step": 18545 + }, + { + "epoch": 0.02, + "learning_rate": 4.995802608623915e-05, + "loss": 1.2542, + "step": 18546 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958021536385466e-05, + "loss": 1.0481, + "step": 18547 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958016986285396e-05, + "loss": 1.0254, + "step": 18548 + }, + { + "epoch": 0.02, + "learning_rate": 4.995801243593896e-05, + "loss": 0.5418, + "step": 18549 + }, + { + "epoch": 0.02, + "learning_rate": 4.995800788534615e-05, + "loss": 0.2343, + "step": 18550 + }, + { + "epoch": 0.02, + "learning_rate": 4.995800333450695e-05, + "loss": 0.2062, + "step": 18551 + }, + { + "epoch": 0.02, + "learning_rate": 4.995799878342139e-05, + "loss": 1.1097, + "step": 18552 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957994232089455e-05, + "loss": 1.0292, + "step": 18553 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957989680511144e-05, + "loss": 1.0258, + "step": 18554 + }, + { + "epoch": 0.02, + "learning_rate": 4.995798512868645e-05, + "loss": 1.3199, + "step": 18555 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957980576615385e-05, + "loss": 1.0545, + "step": 18556 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957976024297946e-05, + "loss": 1.1292, + "step": 18557 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957971471734135e-05, + "loss": 1.138, + "step": 18558 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957966918923946e-05, + "loss": 1.2978, + "step": 18559 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957962365867385e-05, + "loss": 1.2418, + "step": 18560 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957957812564447e-05, + "loss": 1.5199, + "step": 18561 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957953259015137e-05, + "loss": 1.5399, + "step": 18562 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957948705219455e-05, + "loss": 1.0994, + "step": 18563 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957944151177395e-05, + "loss": 0.994, + "step": 18564 + }, + { + "epoch": 0.02, + "learning_rate": 4.995793959688896e-05, + "loss": 1.4342, + "step": 18565 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957935042354155e-05, + "loss": 1.403, + "step": 18566 + }, + { + "epoch": 0.02, + "learning_rate": 4.995793048757297e-05, + "loss": 1.1099, + "step": 18567 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957925932545416e-05, + "loss": 1.0234, + "step": 18568 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957921377271486e-05, + "loss": 0.8214, + "step": 18569 + }, + { + "epoch": 0.02, + "learning_rate": 4.995791682175118e-05, + "loss": 1.1172, + "step": 18570 + }, + { + "epoch": 0.02, + "learning_rate": 4.99579122659845e-05, + "loss": 1.1485, + "step": 18571 + }, + { + "epoch": 0.02, + "learning_rate": 4.995790770997145e-05, + "loss": 1.0386, + "step": 18572 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957903153712024e-05, + "loss": 1.023, + "step": 18573 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957898597206223e-05, + "loss": 1.1225, + "step": 18574 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957894040454045e-05, + "loss": 1.1434, + "step": 18575 + }, + { + "epoch": 0.02, + "learning_rate": 4.99578894834555e-05, + "loss": 1.091, + "step": 18576 + }, + { + "epoch": 0.02, + "learning_rate": 4.995788492621057e-05, + "loss": 1.3018, + "step": 18577 + }, + { + "epoch": 0.02, + "learning_rate": 4.995788036871928e-05, + "loss": 1.4687, + "step": 18578 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957875810981615e-05, + "loss": 1.2286, + "step": 18579 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957871252997566e-05, + "loss": 0.8739, + "step": 18580 + }, + { + "epoch": 0.02, + "learning_rate": 4.995786669476715e-05, + "loss": 1.2501, + "step": 18581 + }, + { + "epoch": 0.02, + "learning_rate": 4.995786213629035e-05, + "loss": 0.9123, + "step": 18582 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957857577567197e-05, + "loss": 1.2209, + "step": 18583 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957853018597655e-05, + "loss": 1.1597, + "step": 18584 + }, + { + "epoch": 0.02, + "learning_rate": 4.995784845938175e-05, + "loss": 0.8396, + "step": 18585 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957843899919464e-05, + "loss": 1.0583, + "step": 18586 + }, + { + "epoch": 0.02, + "learning_rate": 4.995783934021081e-05, + "loss": 1.734, + "step": 18587 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957834780255774e-05, + "loss": 1.4075, + "step": 18588 + }, + { + "epoch": 0.02, + "learning_rate": 4.995783022005437e-05, + "loss": 1.0102, + "step": 18589 + }, + { + "epoch": 0.02, + "learning_rate": 4.995782565960659e-05, + "loss": 0.9147, + "step": 18590 + }, + { + "epoch": 0.02, + "learning_rate": 4.995782109891245e-05, + "loss": 1.0111, + "step": 18591 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957816537971924e-05, + "loss": 1.3474, + "step": 18592 + }, + { + "epoch": 0.02, + "learning_rate": 4.995781197678503e-05, + "loss": 1.5134, + "step": 18593 + }, + { + "epoch": 0.02, + "learning_rate": 4.995780741535175e-05, + "loss": 1.173, + "step": 18594 + }, + { + "epoch": 0.02, + "learning_rate": 4.995780285367212e-05, + "loss": 1.3334, + "step": 18595 + }, + { + "epoch": 0.02, + "learning_rate": 4.99577982917461e-05, + "loss": 1.3363, + "step": 18596 + }, + { + "epoch": 0.02, + "learning_rate": 4.995779372957371e-05, + "loss": 1.0683, + "step": 18597 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957789167154954e-05, + "loss": 0.9859, + "step": 18598 + }, + { + "epoch": 0.02, + "learning_rate": 4.995778460448982e-05, + "loss": 1.0153, + "step": 18599 + }, + { + "epoch": 0.02, + "learning_rate": 4.995778004157831e-05, + "loss": 0.8705, + "step": 18600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957775478420434e-05, + "loss": 0.7207, + "step": 18601 + }, + { + "epoch": 0.02, + "learning_rate": 4.995777091501618e-05, + "loss": 1.0778, + "step": 18602 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957766351365555e-05, + "loss": 0.9905, + "step": 18603 + }, + { + "epoch": 0.02, + "learning_rate": 4.995776178746856e-05, + "loss": 1.3214, + "step": 18604 + }, + { + "epoch": 0.02, + "learning_rate": 4.995775722332519e-05, + "loss": 1.0826, + "step": 18605 + }, + { + "epoch": 0.02, + "learning_rate": 4.995775265893545e-05, + "loss": 1.5456, + "step": 18606 + }, + { + "epoch": 0.02, + "learning_rate": 4.995774809429934e-05, + "loss": 1.081, + "step": 18607 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957743529416844e-05, + "loss": 1.1703, + "step": 18608 + }, + { + "epoch": 0.02, + "learning_rate": 4.995773896428799e-05, + "loss": 1.1928, + "step": 18609 + }, + { + "epoch": 0.02, + "learning_rate": 4.995773439891276e-05, + "loss": 1.2327, + "step": 18610 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957729833291156e-05, + "loss": 1.18, + "step": 18611 + }, + { + "epoch": 0.02, + "learning_rate": 4.995772526742318e-05, + "loss": 1.1311, + "step": 18612 + }, + { + "epoch": 0.02, + "learning_rate": 4.995772070130883e-05, + "loss": 1.0516, + "step": 18613 + }, + { + "epoch": 0.02, + "learning_rate": 4.995771613494811e-05, + "loss": 0.9645, + "step": 18614 + }, + { + "epoch": 0.02, + "learning_rate": 4.995771156834102e-05, + "loss": 0.7369, + "step": 18615 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957707001487556e-05, + "loss": 1.0913, + "step": 18616 + }, + { + "epoch": 0.02, + "learning_rate": 4.995770243438772e-05, + "loss": 1.0984, + "step": 18617 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957697867041515e-05, + "loss": 1.3612, + "step": 18618 + }, + { + "epoch": 0.02, + "learning_rate": 4.995769329944894e-05, + "loss": 1.2238, + "step": 18619 + }, + { + "epoch": 0.02, + "learning_rate": 4.995768873160999e-05, + "loss": 0.8196, + "step": 18620 + }, + { + "epoch": 0.02, + "learning_rate": 4.995768416352466e-05, + "loss": 1.0637, + "step": 18621 + }, + { + "epoch": 0.02, + "learning_rate": 4.995767959519296e-05, + "loss": 1.062, + "step": 18622 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957675026614896e-05, + "loss": 0.9911, + "step": 18623 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957670457790465e-05, + "loss": 1.0274, + "step": 18624 + }, + { + "epoch": 0.02, + "learning_rate": 4.995766588871965e-05, + "loss": 1.2646, + "step": 18625 + }, + { + "epoch": 0.02, + "learning_rate": 4.995766131940247e-05, + "loss": 1.1133, + "step": 18626 + }, + { + "epoch": 0.02, + "learning_rate": 4.995765674983891e-05, + "loss": 1.2661, + "step": 18627 + }, + { + "epoch": 0.02, + "learning_rate": 4.995765218002899e-05, + "loss": 1.2347, + "step": 18628 + }, + { + "epoch": 0.02, + "learning_rate": 4.995764760997269e-05, + "loss": 1.2202, + "step": 18629 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957643039670024e-05, + "loss": 1.3274, + "step": 18630 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957638469120987e-05, + "loss": 1.0043, + "step": 18631 + }, + { + "epoch": 0.02, + "learning_rate": 4.995763389832558e-05, + "loss": 1.1143, + "step": 18632 + }, + { + "epoch": 0.02, + "learning_rate": 4.99576293272838e-05, + "loss": 0.8379, + "step": 18633 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957624755995645e-05, + "loss": 1.0156, + "step": 18634 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957620184461115e-05, + "loss": 0.9409, + "step": 18635 + }, + { + "epoch": 0.02, + "learning_rate": 4.995761561268023e-05, + "loss": 1.569, + "step": 18636 + }, + { + "epoch": 0.02, + "learning_rate": 4.995761104065296e-05, + "loss": 1.2287, + "step": 18637 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957606468379324e-05, + "loss": 1.2217, + "step": 18638 + }, + { + "epoch": 0.02, + "learning_rate": 4.995760189585932e-05, + "loss": 1.1593, + "step": 18639 + }, + { + "epoch": 0.02, + "learning_rate": 4.995759732309294e-05, + "loss": 2.1623, + "step": 18640 + }, + { + "epoch": 0.02, + "learning_rate": 4.995759275008018e-05, + "loss": 1.002, + "step": 18641 + }, + { + "epoch": 0.02, + "learning_rate": 4.995758817682107e-05, + "loss": 1.09, + "step": 18642 + }, + { + "epoch": 0.02, + "learning_rate": 4.995758360331558e-05, + "loss": 0.9607, + "step": 18643 + }, + { + "epoch": 0.02, + "learning_rate": 4.995757902956372e-05, + "loss": 0.9771, + "step": 18644 + }, + { + "epoch": 0.02, + "learning_rate": 4.995757445556548e-05, + "loss": 1.1793, + "step": 18645 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957569881320874e-05, + "loss": 1.0006, + "step": 18646 + }, + { + "epoch": 0.02, + "learning_rate": 4.995756530682991e-05, + "loss": 1.1336, + "step": 18647 + }, + { + "epoch": 0.02, + "learning_rate": 4.995756073209256e-05, + "loss": 1.0241, + "step": 18648 + }, + { + "epoch": 0.02, + "learning_rate": 4.995755615710884e-05, + "loss": 1.098, + "step": 18649 + }, + { + "epoch": 0.02, + "learning_rate": 4.995755158187876e-05, + "loss": 1.1592, + "step": 18650 + }, + { + "epoch": 0.02, + "learning_rate": 4.995754700640231e-05, + "loss": 0.6572, + "step": 18651 + }, + { + "epoch": 0.02, + "learning_rate": 4.995754243067948e-05, + "loss": 0.724, + "step": 18652 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957537854710276e-05, + "loss": 0.9599, + "step": 18653 + }, + { + "epoch": 0.02, + "learning_rate": 4.995753327849472e-05, + "loss": 0.9119, + "step": 18654 + }, + { + "epoch": 0.02, + "learning_rate": 4.995752870203277e-05, + "loss": 1.1966, + "step": 18655 + }, + { + "epoch": 0.02, + "learning_rate": 4.995752412532447e-05, + "loss": 1.1889, + "step": 18656 + }, + { + "epoch": 0.02, + "learning_rate": 4.995751954836979e-05, + "loss": 1.1185, + "step": 18657 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957514971168745e-05, + "loss": 0.7278, + "step": 18658 + }, + { + "epoch": 0.02, + "learning_rate": 4.995751039372133e-05, + "loss": 1.0603, + "step": 18659 + }, + { + "epoch": 0.02, + "learning_rate": 4.995750581602754e-05, + "loss": 0.7097, + "step": 18660 + }, + { + "epoch": 0.02, + "learning_rate": 4.995750123808738e-05, + "loss": 0.9824, + "step": 18661 + }, + { + "epoch": 0.02, + "learning_rate": 4.995749665990085e-05, + "loss": 1.2096, + "step": 18662 + }, + { + "epoch": 0.02, + "learning_rate": 4.995749208146796e-05, + "loss": 0.8129, + "step": 18663 + }, + { + "epoch": 0.02, + "learning_rate": 4.995748750278869e-05, + "loss": 1.0414, + "step": 18664 + }, + { + "epoch": 0.02, + "learning_rate": 4.995748292386305e-05, + "loss": 0.9903, + "step": 18665 + }, + { + "epoch": 0.02, + "learning_rate": 4.995747834469104e-05, + "loss": 1.1082, + "step": 18666 + }, + { + "epoch": 0.02, + "learning_rate": 4.995747376527267e-05, + "loss": 1.2643, + "step": 18667 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957469185607917e-05, + "loss": 0.7967, + "step": 18668 + }, + { + "epoch": 0.02, + "learning_rate": 4.995746460569681e-05, + "loss": 1.1204, + "step": 18669 + }, + { + "epoch": 0.02, + "learning_rate": 4.995746002553932e-05, + "loss": 1.2089, + "step": 18670 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957455445135466e-05, + "loss": 0.9445, + "step": 18671 + }, + { + "epoch": 0.02, + "learning_rate": 4.995745086448524e-05, + "loss": 1.356, + "step": 18672 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957446283588646e-05, + "loss": 1.238, + "step": 18673 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957441702445686e-05, + "loss": 1.1101, + "step": 18674 + }, + { + "epoch": 0.02, + "learning_rate": 4.995743712105635e-05, + "loss": 1.0953, + "step": 18675 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957432539420645e-05, + "loss": 1.0968, + "step": 18676 + }, + { + "epoch": 0.02, + "learning_rate": 4.995742795753858e-05, + "loss": 0.8034, + "step": 18677 + }, + { + "epoch": 0.02, + "learning_rate": 4.995742337541014e-05, + "loss": 1.1695, + "step": 18678 + }, + { + "epoch": 0.02, + "learning_rate": 4.995741879303533e-05, + "loss": 0.8475, + "step": 18679 + }, + { + "epoch": 0.02, + "learning_rate": 4.995741421041415e-05, + "loss": 1.0989, + "step": 18680 + }, + { + "epoch": 0.02, + "learning_rate": 4.99574096275466e-05, + "loss": 1.2023, + "step": 18681 + }, + { + "epoch": 0.02, + "learning_rate": 4.995740504443268e-05, + "loss": 1.2671, + "step": 18682 + }, + { + "epoch": 0.02, + "learning_rate": 4.99574004610724e-05, + "loss": 1.2851, + "step": 18683 + }, + { + "epoch": 0.02, + "learning_rate": 4.995739587746574e-05, + "loss": 1.1667, + "step": 18684 + }, + { + "epoch": 0.02, + "learning_rate": 4.995739129361272e-05, + "loss": 1.0814, + "step": 18685 + }, + { + "epoch": 0.02, + "learning_rate": 4.995738670951333e-05, + "loss": 0.9102, + "step": 18686 + }, + { + "epoch": 0.02, + "learning_rate": 4.995738212516757e-05, + "loss": 1.0891, + "step": 18687 + }, + { + "epoch": 0.02, + "learning_rate": 4.995737754057543e-05, + "loss": 1.1878, + "step": 18688 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957372955736935e-05, + "loss": 1.2836, + "step": 18689 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957368370652066e-05, + "loss": 1.1388, + "step": 18690 + }, + { + "epoch": 0.02, + "learning_rate": 4.995736378532083e-05, + "loss": 1.4151, + "step": 18691 + }, + { + "epoch": 0.02, + "learning_rate": 4.995735919974323e-05, + "loss": 1.3109, + "step": 18692 + }, + { + "epoch": 0.02, + "learning_rate": 4.995735461391925e-05, + "loss": 0.5712, + "step": 18693 + }, + { + "epoch": 0.02, + "learning_rate": 4.995735002784891e-05, + "loss": 1.1513, + "step": 18694 + }, + { + "epoch": 0.02, + "learning_rate": 4.995734544153221e-05, + "loss": 1.0729, + "step": 18695 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957340854969124e-05, + "loss": 1.3447, + "step": 18696 + }, + { + "epoch": 0.02, + "learning_rate": 4.995733626815968e-05, + "loss": 0.9536, + "step": 18697 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957331681103863e-05, + "loss": 1.1581, + "step": 18698 + }, + { + "epoch": 0.02, + "learning_rate": 4.995732709380168e-05, + "loss": 1.3243, + "step": 18699 + }, + { + "epoch": 0.02, + "learning_rate": 4.995732250625313e-05, + "loss": 0.7225, + "step": 18700 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957317918458204e-05, + "loss": 1.2341, + "step": 18701 + }, + { + "epoch": 0.02, + "learning_rate": 4.995731333041692e-05, + "loss": 1.076, + "step": 18702 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957308742129264e-05, + "loss": 1.2282, + "step": 18703 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957304153595243e-05, + "loss": 0.7369, + "step": 18704 + }, + { + "epoch": 0.02, + "learning_rate": 4.995729956481485e-05, + "loss": 1.141, + "step": 18705 + }, + { + "epoch": 0.02, + "learning_rate": 4.995729497578809e-05, + "loss": 0.9246, + "step": 18706 + }, + { + "epoch": 0.02, + "learning_rate": 4.995729038651496e-05, + "loss": 1.1081, + "step": 18707 + }, + { + "epoch": 0.02, + "learning_rate": 4.995728579699547e-05, + "loss": 0.9558, + "step": 18708 + }, + { + "epoch": 0.02, + "learning_rate": 4.99572812072296e-05, + "loss": 1.0073, + "step": 18709 + }, + { + "epoch": 0.02, + "learning_rate": 4.995727661721737e-05, + "loss": 1.0935, + "step": 18710 + }, + { + "epoch": 0.02, + "learning_rate": 4.995727202695877e-05, + "loss": 1.0558, + "step": 18711 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957267436453804e-05, + "loss": 1.2808, + "step": 18712 + }, + { + "epoch": 0.02, + "learning_rate": 4.995726284570247e-05, + "loss": 1.0017, + "step": 18713 + }, + { + "epoch": 0.02, + "learning_rate": 4.995725825470476e-05, + "loss": 1.0826, + "step": 18714 + }, + { + "epoch": 0.02, + "learning_rate": 4.99572536634607e-05, + "loss": 0.9219, + "step": 18715 + }, + { + "epoch": 0.02, + "learning_rate": 4.995724907197026e-05, + "loss": 1.0565, + "step": 18716 + }, + { + "epoch": 0.02, + "learning_rate": 4.995724448023346e-05, + "loss": 1.0814, + "step": 18717 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957239888250285e-05, + "loss": 1.1296, + "step": 18718 + }, + { + "epoch": 0.02, + "learning_rate": 4.995723529602074e-05, + "loss": 1.8605, + "step": 18719 + }, + { + "epoch": 0.02, + "learning_rate": 4.995723070354484e-05, + "loss": 4.8837, + "step": 18720 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957226110822566e-05, + "loss": 2.0658, + "step": 18721 + }, + { + "epoch": 0.02, + "learning_rate": 4.995722151785392e-05, + "loss": 0.9957, + "step": 18722 + }, + { + "epoch": 0.02, + "learning_rate": 4.995721692463892e-05, + "loss": 0.8657, + "step": 18723 + }, + { + "epoch": 0.02, + "learning_rate": 4.995721233117754e-05, + "loss": 1.1575, + "step": 18724 + }, + { + "epoch": 0.02, + "learning_rate": 4.99572077374698e-05, + "loss": 1.1779, + "step": 18725 + }, + { + "epoch": 0.02, + "learning_rate": 4.995720314351569e-05, + "loss": 1.089, + "step": 18726 + }, + { + "epoch": 0.02, + "learning_rate": 4.995719854931521e-05, + "loss": 1.3419, + "step": 18727 + }, + { + "epoch": 0.02, + "learning_rate": 4.995719395486837e-05, + "loss": 1.1165, + "step": 18728 + }, + { + "epoch": 0.02, + "learning_rate": 4.995718936017516e-05, + "loss": 1.1278, + "step": 18729 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957184765235585e-05, + "loss": 1.2034, + "step": 18730 + }, + { + "epoch": 0.02, + "learning_rate": 4.995718017004964e-05, + "loss": 0.5961, + "step": 18731 + }, + { + "epoch": 0.02, + "learning_rate": 4.995717557461733e-05, + "loss": 0.6231, + "step": 18732 + }, + { + "epoch": 0.02, + "learning_rate": 4.995717097893865e-05, + "loss": 1.055, + "step": 18733 + }, + { + "epoch": 0.02, + "learning_rate": 4.995716638301361e-05, + "loss": 1.345, + "step": 18734 + }, + { + "epoch": 0.02, + "learning_rate": 4.99571617868422e-05, + "loss": 0.96, + "step": 18735 + }, + { + "epoch": 0.02, + "learning_rate": 4.995715719042442e-05, + "loss": 1.181, + "step": 18736 + }, + { + "epoch": 0.02, + "learning_rate": 4.995715259376027e-05, + "loss": 1.0727, + "step": 18737 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957147996849764e-05, + "loss": 1.2496, + "step": 18738 + }, + { + "epoch": 0.02, + "learning_rate": 4.995714339969289e-05, + "loss": 1.163, + "step": 18739 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957138802289645e-05, + "loss": 1.255, + "step": 18740 + }, + { + "epoch": 0.02, + "learning_rate": 4.995713420464003e-05, + "loss": 1.0586, + "step": 18741 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957129606744055e-05, + "loss": 1.0457, + "step": 18742 + }, + { + "epoch": 0.02, + "learning_rate": 4.995712500860171e-05, + "loss": 0.9793, + "step": 18743 + }, + { + "epoch": 0.02, + "learning_rate": 4.995712041021301e-05, + "loss": 0.9416, + "step": 18744 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957115811577934e-05, + "loss": 1.0545, + "step": 18745 + }, + { + "epoch": 0.02, + "learning_rate": 4.995711121269649e-05, + "loss": 1.1682, + "step": 18746 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957106613568684e-05, + "loss": 1.0444, + "step": 18747 + }, + { + "epoch": 0.02, + "learning_rate": 4.995710201419451e-05, + "loss": 1.0657, + "step": 18748 + }, + { + "epoch": 0.02, + "learning_rate": 4.995709741457397e-05, + "loss": 0.7604, + "step": 18749 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957092814707075e-05, + "loss": 0.9503, + "step": 18750 + }, + { + "epoch": 0.02, + "learning_rate": 4.99570882145938e-05, + "loss": 1.0412, + "step": 18751 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957083614234165e-05, + "loss": 1.2006, + "step": 18752 + }, + { + "epoch": 0.02, + "learning_rate": 4.995707901362816e-05, + "loss": 1.1235, + "step": 18753 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957074412775796e-05, + "loss": 1.0585, + "step": 18754 + }, + { + "epoch": 0.02, + "learning_rate": 4.995706981167706e-05, + "loss": 1.3471, + "step": 18755 + }, + { + "epoch": 0.02, + "learning_rate": 4.995706521033196e-05, + "loss": 1.1442, + "step": 18756 + }, + { + "epoch": 0.02, + "learning_rate": 4.995706060874049e-05, + "loss": 1.2157, + "step": 18757 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957056006902664e-05, + "loss": 1.1927, + "step": 18758 + }, + { + "epoch": 0.02, + "learning_rate": 4.995705140481847e-05, + "loss": 1.063, + "step": 18759 + }, + { + "epoch": 0.02, + "learning_rate": 4.995704680248791e-05, + "loss": 0.7825, + "step": 18760 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957042199910974e-05, + "loss": 1.1334, + "step": 18761 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957037597087684e-05, + "loss": 1.0263, + "step": 18762 + }, + { + "epoch": 0.02, + "learning_rate": 4.995703299401803e-05, + "loss": 1.2272, + "step": 18763 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957028390702003e-05, + "loss": 1.1425, + "step": 18764 + }, + { + "epoch": 0.02, + "learning_rate": 4.995702378713962e-05, + "loss": 1.0721, + "step": 18765 + }, + { + "epoch": 0.02, + "learning_rate": 4.9957019183330864e-05, + "loss": 0.9337, + "step": 18766 + }, + { + "epoch": 0.02, + "learning_rate": 4.995701457927574e-05, + "loss": 0.9464, + "step": 18767 + }, + { + "epoch": 0.02, + "learning_rate": 4.995700997497425e-05, + "loss": 1.1565, + "step": 18768 + }, + { + "epoch": 0.02, + "learning_rate": 4.995700537042641e-05, + "loss": 1.2243, + "step": 18769 + }, + { + "epoch": 0.02, + "learning_rate": 4.99570007656322e-05, + "loss": 1.6535, + "step": 18770 + }, + { + "epoch": 0.02, + "learning_rate": 4.995699616059162e-05, + "loss": 0.9981, + "step": 18771 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956991555304675e-05, + "loss": 1.1624, + "step": 18772 + }, + { + "epoch": 0.02, + "learning_rate": 4.995698694977137e-05, + "loss": 0.9038, + "step": 18773 + }, + { + "epoch": 0.02, + "learning_rate": 4.995698234399169e-05, + "loss": 0.9798, + "step": 18774 + }, + { + "epoch": 0.02, + "learning_rate": 4.995697773796565e-05, + "loss": 1.079, + "step": 18775 + }, + { + "epoch": 0.02, + "learning_rate": 4.995697313169325e-05, + "loss": 0.8937, + "step": 18776 + }, + { + "epoch": 0.02, + "learning_rate": 4.995696852517448e-05, + "loss": 1.2507, + "step": 18777 + }, + { + "epoch": 0.02, + "learning_rate": 4.995696391840935e-05, + "loss": 0.8063, + "step": 18778 + }, + { + "epoch": 0.02, + "learning_rate": 4.995695931139785e-05, + "loss": 0.7078, + "step": 18779 + }, + { + "epoch": 0.02, + "learning_rate": 4.995695470413999e-05, + "loss": 1.1335, + "step": 18780 + }, + { + "epoch": 0.02, + "learning_rate": 4.995695009663576e-05, + "loss": 1.1945, + "step": 18781 + }, + { + "epoch": 0.02, + "learning_rate": 4.995694548888517e-05, + "loss": 1.024, + "step": 18782 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956940880888216e-05, + "loss": 1.0968, + "step": 18783 + }, + { + "epoch": 0.02, + "learning_rate": 4.99569362726449e-05, + "loss": 0.6154, + "step": 18784 + }, + { + "epoch": 0.02, + "learning_rate": 4.995693166415521e-05, + "loss": 1.3035, + "step": 18785 + }, + { + "epoch": 0.02, + "learning_rate": 4.995692705541916e-05, + "loss": 1.192, + "step": 18786 + }, + { + "epoch": 0.02, + "learning_rate": 4.995692244643675e-05, + "loss": 1.5293, + "step": 18787 + }, + { + "epoch": 0.02, + "learning_rate": 4.995691783720797e-05, + "loss": 1.1368, + "step": 18788 + }, + { + "epoch": 0.02, + "learning_rate": 4.995691322773283e-05, + "loss": 1.1486, + "step": 18789 + }, + { + "epoch": 0.02, + "learning_rate": 4.995690861801132e-05, + "loss": 1.0575, + "step": 18790 + }, + { + "epoch": 0.02, + "learning_rate": 4.995690400804346e-05, + "loss": 0.8594, + "step": 18791 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956899397829224e-05, + "loss": 1.0828, + "step": 18792 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956894787368624e-05, + "loss": 1.1661, + "step": 18793 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956890176661666e-05, + "loss": 1.3301, + "step": 18794 + }, + { + "epoch": 0.02, + "learning_rate": 4.995688556570834e-05, + "loss": 1.3199, + "step": 18795 + }, + { + "epoch": 0.02, + "learning_rate": 4.995688095450864e-05, + "loss": 1.1489, + "step": 18796 + }, + { + "epoch": 0.02, + "learning_rate": 4.99568763430626e-05, + "loss": 1.1774, + "step": 18797 + }, + { + "epoch": 0.02, + "learning_rate": 4.995687173137018e-05, + "loss": 0.8964, + "step": 18798 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956867119431394e-05, + "loss": 1.2073, + "step": 18799 + }, + { + "epoch": 0.02, + "learning_rate": 4.995686250724625e-05, + "loss": 0.6838, + "step": 18800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956857894814745e-05, + "loss": 1.2512, + "step": 18801 + }, + { + "epoch": 0.02, + "learning_rate": 4.995685328213687e-05, + "loss": 1.143, + "step": 18802 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956848669212644e-05, + "loss": 1.6867, + "step": 18803 + }, + { + "epoch": 0.02, + "learning_rate": 4.995684405604204e-05, + "loss": 0.8509, + "step": 18804 + }, + { + "epoch": 0.02, + "learning_rate": 4.995683944262508e-05, + "loss": 1.221, + "step": 18805 + }, + { + "epoch": 0.02, + "learning_rate": 4.995683482896175e-05, + "loss": 1.0642, + "step": 18806 + }, + { + "epoch": 0.02, + "learning_rate": 4.995683021505206e-05, + "loss": 1.1661, + "step": 18807 + }, + { + "epoch": 0.02, + "learning_rate": 4.995682560089601e-05, + "loss": 0.9478, + "step": 18808 + }, + { + "epoch": 0.02, + "learning_rate": 4.99568209864936e-05, + "loss": 1.0575, + "step": 18809 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956816371844814e-05, + "loss": 1.1764, + "step": 18810 + }, + { + "epoch": 0.02, + "learning_rate": 4.995681175694967e-05, + "loss": 1.1719, + "step": 18811 + }, + { + "epoch": 0.02, + "learning_rate": 4.995680714180817e-05, + "loss": 1.2393, + "step": 18812 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956802526420305e-05, + "loss": 0.8329, + "step": 18813 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956797910786076e-05, + "loss": 0.511, + "step": 18814 + }, + { + "epoch": 0.02, + "learning_rate": 4.995679329490548e-05, + "loss": 1.016, + "step": 18815 + }, + { + "epoch": 0.02, + "learning_rate": 4.995678867877852e-05, + "loss": 0.5035, + "step": 18816 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956784062405205e-05, + "loss": 0.7687, + "step": 18817 + }, + { + "epoch": 0.02, + "learning_rate": 4.995677944578552e-05, + "loss": 1.0423, + "step": 18818 + }, + { + "epoch": 0.02, + "learning_rate": 4.995677482891947e-05, + "loss": 1.1893, + "step": 18819 + }, + { + "epoch": 0.02, + "learning_rate": 4.995677021180707e-05, + "loss": 1.1816, + "step": 18820 + }, + { + "epoch": 0.02, + "learning_rate": 4.99567655944483e-05, + "loss": 1.1766, + "step": 18821 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956760976843165e-05, + "loss": 1.2197, + "step": 18822 + }, + { + "epoch": 0.02, + "learning_rate": 4.995675635899166e-05, + "loss": 1.0619, + "step": 18823 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956751740893805e-05, + "loss": 0.9724, + "step": 18824 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956747122549585e-05, + "loss": 1.0751, + "step": 18825 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956742503959e-05, + "loss": 1.2292, + "step": 18826 + }, + { + "epoch": 0.02, + "learning_rate": 4.995673788512206e-05, + "loss": 1.1899, + "step": 18827 + }, + { + "epoch": 0.02, + "learning_rate": 4.995673326603874e-05, + "loss": 1.1267, + "step": 18828 + }, + { + "epoch": 0.02, + "learning_rate": 4.995672864670907e-05, + "loss": 0.8714, + "step": 18829 + }, + { + "epoch": 0.02, + "learning_rate": 4.995672402713304e-05, + "loss": 0.8407, + "step": 18830 + }, + { + "epoch": 0.02, + "learning_rate": 4.995671940731065e-05, + "loss": 0.9007, + "step": 18831 + }, + { + "epoch": 0.02, + "learning_rate": 4.995671478724189e-05, + "loss": 0.8815, + "step": 18832 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956710166926765e-05, + "loss": 1.1909, + "step": 18833 + }, + { + "epoch": 0.02, + "learning_rate": 4.995670554636529e-05, + "loss": 1.2753, + "step": 18834 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956700925557444e-05, + "loss": 0.5933, + "step": 18835 + }, + { + "epoch": 0.02, + "learning_rate": 4.995669630450323e-05, + "loss": 1.0495, + "step": 18836 + }, + { + "epoch": 0.02, + "learning_rate": 4.995669168320267e-05, + "loss": 0.7873, + "step": 18837 + }, + { + "epoch": 0.02, + "learning_rate": 4.995668706165574e-05, + "loss": 1.1398, + "step": 18838 + }, + { + "epoch": 0.02, + "learning_rate": 4.995668243986245e-05, + "loss": 0.7889, + "step": 18839 + }, + { + "epoch": 0.02, + "learning_rate": 4.995667781782279e-05, + "loss": 1.1508, + "step": 18840 + }, + { + "epoch": 0.02, + "learning_rate": 4.995667319553677e-05, + "loss": 1.126, + "step": 18841 + }, + { + "epoch": 0.02, + "learning_rate": 4.99566685730044e-05, + "loss": 1.3882, + "step": 18842 + }, + { + "epoch": 0.02, + "learning_rate": 4.995666395022566e-05, + "loss": 0.9422, + "step": 18843 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956659327200564e-05, + "loss": 1.1636, + "step": 18844 + }, + { + "epoch": 0.02, + "learning_rate": 4.99566547039291e-05, + "loss": 1.0074, + "step": 18845 + }, + { + "epoch": 0.02, + "learning_rate": 4.995665008041127e-05, + "loss": 1.1587, + "step": 18846 + }, + { + "epoch": 0.02, + "learning_rate": 4.995664545664709e-05, + "loss": 1.1707, + "step": 18847 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956640832636536e-05, + "loss": 0.9952, + "step": 18848 + }, + { + "epoch": 0.02, + "learning_rate": 4.995663620837963e-05, + "loss": 1.1831, + "step": 18849 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956631583876365e-05, + "loss": 1.0425, + "step": 18850 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956626959126726e-05, + "loss": 1.3147, + "step": 18851 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956622334130736e-05, + "loss": 1.029, + "step": 18852 + }, + { + "epoch": 0.02, + "learning_rate": 4.995661770888839e-05, + "loss": 1.019, + "step": 18853 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956613083399675e-05, + "loss": 1.1613, + "step": 18854 + }, + { + "epoch": 0.02, + "learning_rate": 4.99566084576646e-05, + "loss": 1.1628, + "step": 18855 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956603831683155e-05, + "loss": 1.1337, + "step": 18856 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956599205455356e-05, + "loss": 1.1482, + "step": 18857 + }, + { + "epoch": 0.02, + "learning_rate": 4.99565945789812e-05, + "loss": 1.0812, + "step": 18858 + }, + { + "epoch": 0.02, + "learning_rate": 4.995658995226068e-05, + "loss": 1.1089, + "step": 18859 + }, + { + "epoch": 0.02, + "learning_rate": 4.995658532529379e-05, + "loss": 0.8698, + "step": 18860 + }, + { + "epoch": 0.02, + "learning_rate": 4.995658069808056e-05, + "loss": 1.0523, + "step": 18861 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956576070620955e-05, + "loss": 0.9829, + "step": 18862 + }, + { + "epoch": 0.02, + "learning_rate": 4.995657144291499e-05, + "loss": 0.9738, + "step": 18863 + }, + { + "epoch": 0.02, + "learning_rate": 4.995656681496266e-05, + "loss": 1.401, + "step": 18864 + }, + { + "epoch": 0.02, + "learning_rate": 4.995656218676398e-05, + "loss": 1.3726, + "step": 18865 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956557558318926e-05, + "loss": 1.3136, + "step": 18866 + }, + { + "epoch": 0.02, + "learning_rate": 4.995655292962752e-05, + "loss": 1.1849, + "step": 18867 + }, + { + "epoch": 0.02, + "learning_rate": 4.995654830068976e-05, + "loss": 1.0544, + "step": 18868 + }, + { + "epoch": 0.02, + "learning_rate": 4.995654367150563e-05, + "loss": 1.0512, + "step": 18869 + }, + { + "epoch": 0.02, + "learning_rate": 4.995653904207514e-05, + "loss": 0.4722, + "step": 18870 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956534412398294e-05, + "loss": 0.3293, + "step": 18871 + }, + { + "epoch": 0.02, + "learning_rate": 4.995652978247508e-05, + "loss": 0.2504, + "step": 18872 + }, + { + "epoch": 0.02, + "learning_rate": 4.995652515230551e-05, + "loss": 0.9653, + "step": 18873 + }, + { + "epoch": 0.02, + "learning_rate": 4.995652052188958e-05, + "loss": 0.6443, + "step": 18874 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956515891227294e-05, + "loss": 0.4432, + "step": 18875 + }, + { + "epoch": 0.02, + "learning_rate": 4.995651126031865e-05, + "loss": 0.4998, + "step": 18876 + }, + { + "epoch": 0.02, + "learning_rate": 4.995650662916364e-05, + "loss": 0.6492, + "step": 18877 + }, + { + "epoch": 0.02, + "learning_rate": 4.995650199776226e-05, + "loss": 0.4128, + "step": 18878 + }, + { + "epoch": 0.02, + "learning_rate": 4.995649736611453e-05, + "loss": 0.3397, + "step": 18879 + }, + { + "epoch": 0.02, + "learning_rate": 4.995649273422044e-05, + "loss": 0.3419, + "step": 18880 + }, + { + "epoch": 0.02, + "learning_rate": 4.995648810207999e-05, + "loss": 0.215, + "step": 18881 + }, + { + "epoch": 0.02, + "learning_rate": 4.995648346969318e-05, + "loss": 0.897, + "step": 18882 + }, + { + "epoch": 0.02, + "learning_rate": 4.995647883706001e-05, + "loss": 1.0589, + "step": 18883 + }, + { + "epoch": 0.02, + "learning_rate": 4.995647420418048e-05, + "loss": 1.0587, + "step": 18884 + }, + { + "epoch": 0.02, + "learning_rate": 4.995646957105459e-05, + "loss": 1.3063, + "step": 18885 + }, + { + "epoch": 0.02, + "learning_rate": 4.995646493768234e-05, + "loss": 1.1808, + "step": 18886 + }, + { + "epoch": 0.02, + "learning_rate": 4.995646030406373e-05, + "loss": 1.1897, + "step": 18887 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956455670198765e-05, + "loss": 1.0648, + "step": 18888 + }, + { + "epoch": 0.02, + "learning_rate": 4.995645103608743e-05, + "loss": 1.3186, + "step": 18889 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956446401729743e-05, + "loss": 1.2679, + "step": 18890 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956441767125696e-05, + "loss": 1.3377, + "step": 18891 + }, + { + "epoch": 0.02, + "learning_rate": 4.995643713227529e-05, + "loss": 0.9258, + "step": 18892 + }, + { + "epoch": 0.02, + "learning_rate": 4.995643249717852e-05, + "loss": 1.0726, + "step": 18893 + }, + { + "epoch": 0.02, + "learning_rate": 4.99564278618354e-05, + "loss": 1.1436, + "step": 18894 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956423226245907e-05, + "loss": 1.1302, + "step": 18895 + }, + { + "epoch": 0.02, + "learning_rate": 4.995641859041007e-05, + "loss": 1.3627, + "step": 18896 + }, + { + "epoch": 0.02, + "learning_rate": 4.995641395432786e-05, + "loss": 1.3557, + "step": 18897 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956409317999295e-05, + "loss": 1.014, + "step": 18898 + }, + { + "epoch": 0.02, + "learning_rate": 4.995640468142437e-05, + "loss": 1.0417, + "step": 18899 + }, + { + "epoch": 0.02, + "learning_rate": 4.995640004460309e-05, + "loss": 1.0157, + "step": 18900 + }, + { + "epoch": 0.02, + "learning_rate": 4.995639540753546e-05, + "loss": 1.1135, + "step": 18901 + }, + { + "epoch": 0.02, + "learning_rate": 4.995639077022146e-05, + "loss": 1.1388, + "step": 18902 + }, + { + "epoch": 0.02, + "learning_rate": 4.995638613266109e-05, + "loss": 0.9336, + "step": 18903 + }, + { + "epoch": 0.02, + "learning_rate": 4.995638149485438e-05, + "loss": 1.1962, + "step": 18904 + }, + { + "epoch": 0.02, + "learning_rate": 4.99563768568013e-05, + "loss": 1.0552, + "step": 18905 + }, + { + "epoch": 0.02, + "learning_rate": 4.995637221850187e-05, + "loss": 1.3947, + "step": 18906 + }, + { + "epoch": 0.02, + "learning_rate": 4.995636757995608e-05, + "loss": 1.2745, + "step": 18907 + }, + { + "epoch": 0.02, + "learning_rate": 4.995636294116392e-05, + "loss": 1.0674, + "step": 18908 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956358302125404e-05, + "loss": 1.2956, + "step": 18909 + }, + { + "epoch": 0.02, + "learning_rate": 4.995635366284054e-05, + "loss": 1.193, + "step": 18910 + }, + { + "epoch": 0.02, + "learning_rate": 4.995634902330931e-05, + "loss": 1.1428, + "step": 18911 + }, + { + "epoch": 0.02, + "learning_rate": 4.995634438353172e-05, + "loss": 1.0531, + "step": 18912 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956339743507776e-05, + "loss": 0.9837, + "step": 18913 + }, + { + "epoch": 0.02, + "learning_rate": 4.995633510323747e-05, + "loss": 0.9115, + "step": 18914 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956330462720814e-05, + "loss": 1.073, + "step": 18915 + }, + { + "epoch": 0.02, + "learning_rate": 4.995632582195779e-05, + "loss": 1.2471, + "step": 18916 + }, + { + "epoch": 0.02, + "learning_rate": 4.995632118094841e-05, + "loss": 1.2539, + "step": 18917 + }, + { + "epoch": 0.02, + "learning_rate": 4.995631653969267e-05, + "loss": 0.8266, + "step": 18918 + }, + { + "epoch": 0.02, + "learning_rate": 4.995631189819058e-05, + "loss": 0.5051, + "step": 18919 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956307256442126e-05, + "loss": 0.6859, + "step": 18920 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956302614447316e-05, + "loss": 0.7548, + "step": 18921 + }, + { + "epoch": 0.02, + "learning_rate": 4.995629797220614e-05, + "loss": 0.5911, + "step": 18922 + }, + { + "epoch": 0.02, + "learning_rate": 4.995629332971861e-05, + "loss": 1.3208, + "step": 18923 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956288686984725e-05, + "loss": 1.0405, + "step": 18924 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956284044004484e-05, + "loss": 0.9442, + "step": 18925 + }, + { + "epoch": 0.02, + "learning_rate": 4.995627940077788e-05, + "loss": 1.1572, + "step": 18926 + }, + { + "epoch": 0.02, + "learning_rate": 4.995627475730492e-05, + "loss": 1.1518, + "step": 18927 + }, + { + "epoch": 0.02, + "learning_rate": 4.99562701135856e-05, + "loss": 1.0818, + "step": 18928 + }, + { + "epoch": 0.02, + "learning_rate": 4.995626546961992e-05, + "loss": 1.1069, + "step": 18929 + }, + { + "epoch": 0.02, + "learning_rate": 4.995626082540789e-05, + "loss": 1.0277, + "step": 18930 + }, + { + "epoch": 0.02, + "learning_rate": 4.99562561809495e-05, + "loss": 1.0615, + "step": 18931 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956251536244755e-05, + "loss": 0.8931, + "step": 18932 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956246891293645e-05, + "loss": 1.1395, + "step": 18933 + }, + { + "epoch": 0.02, + "learning_rate": 4.995624224609618e-05, + "loss": 0.9476, + "step": 18934 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956237600652356e-05, + "loss": 1.0827, + "step": 18935 + }, + { + "epoch": 0.02, + "learning_rate": 4.995623295496218e-05, + "loss": 0.9857, + "step": 18936 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956228309025644e-05, + "loss": 1.2044, + "step": 18937 + }, + { + "epoch": 0.02, + "learning_rate": 4.995622366284275e-05, + "loss": 0.8928, + "step": 18938 + }, + { + "epoch": 0.02, + "learning_rate": 4.99562190164135e-05, + "loss": 0.9947, + "step": 18939 + }, + { + "epoch": 0.02, + "learning_rate": 4.995621436973789e-05, + "loss": 1.0686, + "step": 18940 + }, + { + "epoch": 0.02, + "learning_rate": 4.995620972281593e-05, + "loss": 1.2732, + "step": 18941 + }, + { + "epoch": 0.02, + "learning_rate": 4.99562050756476e-05, + "loss": 1.1725, + "step": 18942 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956200428232924e-05, + "loss": 1.5026, + "step": 18943 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956195780571884e-05, + "loss": 0.9898, + "step": 18944 + }, + { + "epoch": 0.02, + "learning_rate": 4.995619113266449e-05, + "loss": 1.0833, + "step": 18945 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956186484510736e-05, + "loss": 1.0769, + "step": 18946 + }, + { + "epoch": 0.02, + "learning_rate": 4.995618183611063e-05, + "loss": 0.9637, + "step": 18947 + }, + { + "epoch": 0.02, + "learning_rate": 4.995617718746417e-05, + "loss": 0.9886, + "step": 18948 + }, + { + "epoch": 0.02, + "learning_rate": 4.995617253857134e-05, + "loss": 0.9436, + "step": 18949 + }, + { + "epoch": 0.02, + "learning_rate": 4.995616788943217e-05, + "loss": 1.1846, + "step": 18950 + }, + { + "epoch": 0.02, + "learning_rate": 4.995616324004663e-05, + "loss": 0.9475, + "step": 18951 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956158590414734e-05, + "loss": 1.2991, + "step": 18952 + }, + { + "epoch": 0.02, + "learning_rate": 4.995615394053649e-05, + "loss": 1.104, + "step": 18953 + }, + { + "epoch": 0.02, + "learning_rate": 4.995614929041189e-05, + "loss": 0.9206, + "step": 18954 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956144640040926e-05, + "loss": 1.102, + "step": 18955 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956139989423605e-05, + "loss": 1.18, + "step": 18956 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956135338559926e-05, + "loss": 1.0659, + "step": 18957 + }, + { + "epoch": 0.02, + "learning_rate": 4.99561306874499e-05, + "loss": 1.0928, + "step": 18958 + }, + { + "epoch": 0.02, + "learning_rate": 4.995612603609351e-05, + "loss": 1.2373, + "step": 18959 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956121384490764e-05, + "loss": 1.1252, + "step": 18960 + }, + { + "epoch": 0.02, + "learning_rate": 4.995611673264167e-05, + "loss": 1.0529, + "step": 18961 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956112080546207e-05, + "loss": 1.0768, + "step": 18962 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956107428204395e-05, + "loss": 1.1304, + "step": 18963 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956102775616225e-05, + "loss": 0.384, + "step": 18964 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956098122781704e-05, + "loss": 0.2102, + "step": 18965 + }, + { + "epoch": 0.02, + "learning_rate": 4.995609346970082e-05, + "loss": 0.1633, + "step": 18966 + }, + { + "epoch": 0.02, + "learning_rate": 4.995608881637358e-05, + "loss": 0.2497, + "step": 18967 + }, + { + "epoch": 0.02, + "learning_rate": 4.995608416279999e-05, + "loss": 0.9523, + "step": 18968 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956079508980035e-05, + "loss": 1.0711, + "step": 18969 + }, + { + "epoch": 0.02, + "learning_rate": 4.995607485491374e-05, + "loss": 1.0848, + "step": 18970 + }, + { + "epoch": 0.02, + "learning_rate": 4.995607020060107e-05, + "loss": 0.9964, + "step": 18971 + }, + { + "epoch": 0.02, + "learning_rate": 4.995606554604205e-05, + "loss": 1.3506, + "step": 18972 + }, + { + "epoch": 0.02, + "learning_rate": 4.995606089123668e-05, + "loss": 1.05, + "step": 18973 + }, + { + "epoch": 0.02, + "learning_rate": 4.995605623618495e-05, + "loss": 0.9567, + "step": 18974 + }, + { + "epoch": 0.02, + "learning_rate": 4.995605158088687e-05, + "loss": 1.0788, + "step": 18975 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956046925342424e-05, + "loss": 0.9734, + "step": 18976 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956042269551636e-05, + "loss": 1.1051, + "step": 18977 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956037613514483e-05, + "loss": 1.3113, + "step": 18978 + }, + { + "epoch": 0.02, + "learning_rate": 4.995603295723098e-05, + "loss": 1.2998, + "step": 18979 + }, + { + "epoch": 0.02, + "learning_rate": 4.995602830070112e-05, + "loss": 1.332, + "step": 18980 + }, + { + "epoch": 0.02, + "learning_rate": 4.99560236439249e-05, + "loss": 0.9759, + "step": 18981 + }, + { + "epoch": 0.02, + "learning_rate": 4.995601898690232e-05, + "loss": 0.9934, + "step": 18982 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956014329633394e-05, + "loss": 0.9981, + "step": 18983 + }, + { + "epoch": 0.02, + "learning_rate": 4.9956009672118115e-05, + "loss": 0.9694, + "step": 18984 + }, + { + "epoch": 0.02, + "learning_rate": 4.995600501435648e-05, + "loss": 1.2893, + "step": 18985 + }, + { + "epoch": 0.02, + "learning_rate": 4.995600035634848e-05, + "loss": 1.2033, + "step": 18986 + }, + { + "epoch": 0.02, + "learning_rate": 4.995599569809414e-05, + "loss": 1.2731, + "step": 18987 + }, + { + "epoch": 0.02, + "learning_rate": 4.995599103959343e-05, + "loss": 1.0699, + "step": 18988 + }, + { + "epoch": 0.02, + "learning_rate": 4.995598638084637e-05, + "loss": 1.1132, + "step": 18989 + }, + { + "epoch": 0.02, + "learning_rate": 4.995598172185296e-05, + "loss": 1.1168, + "step": 18990 + }, + { + "epoch": 0.02, + "learning_rate": 4.995597706261319e-05, + "loss": 1.3113, + "step": 18991 + }, + { + "epoch": 0.02, + "learning_rate": 4.995597240312707e-05, + "loss": 1.247, + "step": 18992 + }, + { + "epoch": 0.02, + "learning_rate": 4.995596774339459e-05, + "loss": 1.1948, + "step": 18993 + }, + { + "epoch": 0.02, + "learning_rate": 4.995596308341576e-05, + "loss": 1.2353, + "step": 18994 + }, + { + "epoch": 0.02, + "learning_rate": 4.995595842319056e-05, + "loss": 1.1199, + "step": 18995 + }, + { + "epoch": 0.02, + "learning_rate": 4.995595376271902e-05, + "loss": 1.2796, + "step": 18996 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955949102001125e-05, + "loss": 1.0496, + "step": 18997 + }, + { + "epoch": 0.02, + "learning_rate": 4.995594444103687e-05, + "loss": 0.8329, + "step": 18998 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955939779826264e-05, + "loss": 0.2451, + "step": 18999 + }, + { + "epoch": 0.02, + "learning_rate": 4.995593511836931e-05, + "loss": 0.152, + "step": 19000 + }, + { + "epoch": 0.02, + "eval_loss": 1.0204665660858154, + "eval_runtime": 85.1485, + "eval_samples_per_second": 16.266, + "eval_steps_per_second": 4.075, + "step": 19000 + }, + { + "epoch": 0.02, + "learning_rate": 4.995593045666599e-05, + "loss": 0.1561, + "step": 19001 + }, + { + "epoch": 0.02, + "learning_rate": 4.995592579471632e-05, + "loss": 0.1704, + "step": 19002 + }, + { + "epoch": 0.02, + "learning_rate": 4.99559211325203e-05, + "loss": 0.1196, + "step": 19003 + }, + { + "epoch": 0.02, + "learning_rate": 4.995591647007792e-05, + "loss": 0.1489, + "step": 19004 + }, + { + "epoch": 0.02, + "learning_rate": 4.995591180738919e-05, + "loss": 0.1265, + "step": 19005 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955907144454096e-05, + "loss": 0.2233, + "step": 19006 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955902481272655e-05, + "loss": 0.1087, + "step": 19007 + }, + { + "epoch": 0.02, + "learning_rate": 4.995589781784486e-05, + "loss": 0.0728, + "step": 19008 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955893154170704e-05, + "loss": 0.082, + "step": 19009 + }, + { + "epoch": 0.02, + "learning_rate": 4.995588849025021e-05, + "loss": 0.1207, + "step": 19010 + }, + { + "epoch": 0.02, + "learning_rate": 4.995588382608335e-05, + "loss": 0.1522, + "step": 19011 + }, + { + "epoch": 0.02, + "learning_rate": 4.995587916167014e-05, + "loss": 0.2727, + "step": 19012 + }, + { + "epoch": 0.02, + "learning_rate": 4.995587449701057e-05, + "loss": 0.182, + "step": 19013 + }, + { + "epoch": 0.02, + "learning_rate": 4.995586983210465e-05, + "loss": 0.5141, + "step": 19014 + }, + { + "epoch": 0.02, + "learning_rate": 4.995586516695238e-05, + "loss": 0.1179, + "step": 19015 + }, + { + "epoch": 0.02, + "learning_rate": 4.995586050155375e-05, + "loss": 0.1046, + "step": 19016 + }, + { + "epoch": 0.02, + "learning_rate": 4.995585583590877e-05, + "loss": 0.0833, + "step": 19017 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955851170017436e-05, + "loss": 0.6373, + "step": 19018 + }, + { + "epoch": 0.02, + "learning_rate": 4.995584650387975e-05, + "loss": 0.9522, + "step": 19019 + }, + { + "epoch": 0.02, + "learning_rate": 4.995584183749571e-05, + "loss": 0.8878, + "step": 19020 + }, + { + "epoch": 0.02, + "learning_rate": 4.995583717086532e-05, + "loss": 1.391, + "step": 19021 + }, + { + "epoch": 0.02, + "learning_rate": 4.995583250398856e-05, + "loss": 1.226, + "step": 19022 + }, + { + "epoch": 0.02, + "learning_rate": 4.995582783686546e-05, + "loss": 1.3243, + "step": 19023 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955823169496006e-05, + "loss": 1.3184, + "step": 19024 + }, + { + "epoch": 0.02, + "learning_rate": 4.99558185018802e-05, + "loss": 1.1628, + "step": 19025 + }, + { + "epoch": 0.02, + "learning_rate": 4.995581383401803e-05, + "loss": 1.1769, + "step": 19026 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955809165909514e-05, + "loss": 1.0807, + "step": 19027 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955804497554646e-05, + "loss": 0.9889, + "step": 19028 + }, + { + "epoch": 0.02, + "learning_rate": 4.995579982895343e-05, + "loss": 1.2633, + "step": 19029 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955795160105856e-05, + "loss": 1.0718, + "step": 19030 + }, + { + "epoch": 0.02, + "learning_rate": 4.995579049101192e-05, + "loss": 1.1655, + "step": 19031 + }, + { + "epoch": 0.02, + "learning_rate": 4.995578582167164e-05, + "loss": 1.1402, + "step": 19032 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955781152085005e-05, + "loss": 1.1924, + "step": 19033 + }, + { + "epoch": 0.02, + "learning_rate": 4.995577648225202e-05, + "loss": 1.2321, + "step": 19034 + }, + { + "epoch": 0.02, + "learning_rate": 4.995577181217268e-05, + "loss": 1.0401, + "step": 19035 + }, + { + "epoch": 0.02, + "learning_rate": 4.995576714184699e-05, + "loss": 1.089, + "step": 19036 + }, + { + "epoch": 0.02, + "learning_rate": 4.995576247127494e-05, + "loss": 1.1472, + "step": 19037 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955757800456546e-05, + "loss": 1.1526, + "step": 19038 + }, + { + "epoch": 0.02, + "learning_rate": 4.995575312939179e-05, + "loss": 1.1022, + "step": 19039 + }, + { + "epoch": 0.02, + "learning_rate": 4.995574845808069e-05, + "loss": 1.1399, + "step": 19040 + }, + { + "epoch": 0.02, + "learning_rate": 4.995574378652323e-05, + "loss": 1.1141, + "step": 19041 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955739114719425e-05, + "loss": 1.2824, + "step": 19042 + }, + { + "epoch": 0.02, + "learning_rate": 4.995573444266926e-05, + "loss": 1.1095, + "step": 19043 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955729770372745e-05, + "loss": 1.2525, + "step": 19044 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955725097829875e-05, + "loss": 1.1549, + "step": 19045 + }, + { + "epoch": 0.02, + "learning_rate": 4.995572042504066e-05, + "loss": 1.2527, + "step": 19046 + }, + { + "epoch": 0.02, + "learning_rate": 4.995571575200508e-05, + "loss": 1.0127, + "step": 19047 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955711078723164e-05, + "loss": 1.0015, + "step": 19048 + }, + { + "epoch": 0.02, + "learning_rate": 4.995570640519489e-05, + "loss": 1.0354, + "step": 19049 + }, + { + "epoch": 0.02, + "learning_rate": 4.995570173142026e-05, + "loss": 1.2093, + "step": 19050 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955697057399274e-05, + "loss": 0.7061, + "step": 19051 + }, + { + "epoch": 0.02, + "learning_rate": 4.995569238313195e-05, + "loss": 1.1595, + "step": 19052 + }, + { + "epoch": 0.02, + "learning_rate": 4.995568770861826e-05, + "loss": 1.2386, + "step": 19053 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955683033858225e-05, + "loss": 1.1142, + "step": 19054 + }, + { + "epoch": 0.02, + "learning_rate": 4.995567835885183e-05, + "loss": 1.1172, + "step": 19055 + }, + { + "epoch": 0.02, + "learning_rate": 4.995567368359909e-05, + "loss": 1.045, + "step": 19056 + }, + { + "epoch": 0.02, + "learning_rate": 4.99556690081e-05, + "loss": 1.1164, + "step": 19057 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955664332354554e-05, + "loss": 0.9473, + "step": 19058 + }, + { + "epoch": 0.02, + "learning_rate": 4.995565965636276e-05, + "loss": 1.8171, + "step": 19059 + }, + { + "epoch": 0.02, + "learning_rate": 4.995565498012461e-05, + "loss": 1.7887, + "step": 19060 + }, + { + "epoch": 0.02, + "learning_rate": 4.995565030364011e-05, + "loss": 1.2405, + "step": 19061 + }, + { + "epoch": 0.02, + "learning_rate": 4.995564562690927e-05, + "loss": 1.1586, + "step": 19062 + }, + { + "epoch": 0.02, + "learning_rate": 4.995564094993206e-05, + "loss": 1.2735, + "step": 19063 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955636272708503e-05, + "loss": 1.174, + "step": 19064 + }, + { + "epoch": 0.02, + "learning_rate": 4.99556315952386e-05, + "loss": 1.1701, + "step": 19065 + }, + { + "epoch": 0.02, + "learning_rate": 4.995562691752234e-05, + "loss": 0.9996, + "step": 19066 + }, + { + "epoch": 0.02, + "learning_rate": 4.995562223955973e-05, + "loss": 1.1356, + "step": 19067 + }, + { + "epoch": 0.02, + "learning_rate": 4.995561756135077e-05, + "loss": 1.0385, + "step": 19068 + }, + { + "epoch": 0.02, + "learning_rate": 4.995561288289546e-05, + "loss": 1.0056, + "step": 19069 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955608204193794e-05, + "loss": 1.1518, + "step": 19070 + }, + { + "epoch": 0.02, + "learning_rate": 4.995560352524579e-05, + "loss": 0.8987, + "step": 19071 + }, + { + "epoch": 0.02, + "learning_rate": 4.995559884605142e-05, + "loss": 1.0456, + "step": 19072 + }, + { + "epoch": 0.02, + "learning_rate": 4.99555941666107e-05, + "loss": 1.027, + "step": 19073 + }, + { + "epoch": 0.02, + "learning_rate": 4.995558948692363e-05, + "loss": 1.1552, + "step": 19074 + }, + { + "epoch": 0.02, + "learning_rate": 4.995558480699021e-05, + "loss": 0.9775, + "step": 19075 + }, + { + "epoch": 0.02, + "learning_rate": 4.995558012681044e-05, + "loss": 0.834, + "step": 19076 + }, + { + "epoch": 0.02, + "learning_rate": 4.995557544638432e-05, + "loss": 1.1551, + "step": 19077 + }, + { + "epoch": 0.02, + "learning_rate": 4.995557076571185e-05, + "loss": 3.9377, + "step": 19078 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955566084793026e-05, + "loss": 3.0507, + "step": 19079 + }, + { + "epoch": 0.02, + "learning_rate": 4.995556140362786e-05, + "loss": 3.5665, + "step": 19080 + }, + { + "epoch": 0.02, + "learning_rate": 4.995555672221633e-05, + "loss": 4.0749, + "step": 19081 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955552040558453e-05, + "loss": 3.454, + "step": 19082 + }, + { + "epoch": 0.02, + "learning_rate": 4.995554735865423e-05, + "loss": 3.5411, + "step": 19083 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955542676503654e-05, + "loss": 3.2418, + "step": 19084 + }, + { + "epoch": 0.02, + "learning_rate": 4.995553799410673e-05, + "loss": 3.5707, + "step": 19085 + }, + { + "epoch": 0.02, + "learning_rate": 4.995553331146345e-05, + "loss": 3.0043, + "step": 19086 + }, + { + "epoch": 0.02, + "learning_rate": 4.995552862857382e-05, + "loss": 3.3118, + "step": 19087 + }, + { + "epoch": 0.02, + "learning_rate": 4.995552394543784e-05, + "loss": 5.9625, + "step": 19088 + }, + { + "epoch": 0.02, + "learning_rate": 4.995551926205552e-05, + "loss": 5.9677, + "step": 19089 + }, + { + "epoch": 0.02, + "learning_rate": 4.995551457842683e-05, + "loss": 5.8623, + "step": 19090 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955509894551805e-05, + "loss": 5.8644, + "step": 19091 + }, + { + "epoch": 0.02, + "learning_rate": 4.995550521043043e-05, + "loss": 5.8618, + "step": 19092 + }, + { + "epoch": 0.02, + "learning_rate": 4.995550052606269e-05, + "loss": 5.8618, + "step": 19093 + }, + { + "epoch": 0.02, + "learning_rate": 4.995549584144862e-05, + "loss": 5.823, + "step": 19094 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955491156588184e-05, + "loss": 5.6292, + "step": 19095 + }, + { + "epoch": 0.02, + "learning_rate": 4.99554864714814e-05, + "loss": 0.5963, + "step": 19096 + }, + { + "epoch": 0.02, + "learning_rate": 4.995548178612828e-05, + "loss": 1.1896, + "step": 19097 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955477100528794e-05, + "loss": 1.1945, + "step": 19098 + }, + { + "epoch": 0.02, + "learning_rate": 4.995547241468297e-05, + "loss": 1.0754, + "step": 19099 + }, + { + "epoch": 0.02, + "learning_rate": 4.995546772859078e-05, + "loss": 1.0528, + "step": 19100 + }, + { + "epoch": 0.02, + "learning_rate": 4.995546304225225e-05, + "loss": 1.1762, + "step": 19101 + }, + { + "epoch": 0.02, + "learning_rate": 4.995545835566737e-05, + "loss": 1.0971, + "step": 19102 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955453668836146e-05, + "loss": 1.2426, + "step": 19103 + }, + { + "epoch": 0.02, + "learning_rate": 4.995544898175856e-05, + "loss": 1.2848, + "step": 19104 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955444294434636e-05, + "loss": 1.1273, + "step": 19105 + }, + { + "epoch": 0.02, + "learning_rate": 4.995543960686435e-05, + "loss": 1.1429, + "step": 19106 + }, + { + "epoch": 0.02, + "learning_rate": 4.995543491904773e-05, + "loss": 0.957, + "step": 19107 + }, + { + "epoch": 0.02, + "learning_rate": 4.995543023098475e-05, + "loss": 1.3764, + "step": 19108 + }, + { + "epoch": 0.02, + "learning_rate": 4.995542554267542e-05, + "loss": 1.1152, + "step": 19109 + }, + { + "epoch": 0.02, + "learning_rate": 4.995542085411975e-05, + "loss": 1.1084, + "step": 19110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955416165317714e-05, + "loss": 0.9628, + "step": 19111 + }, + { + "epoch": 0.02, + "learning_rate": 4.995541147626934e-05, + "loss": 0.8417, + "step": 19112 + }, + { + "epoch": 0.02, + "learning_rate": 4.995540678697462e-05, + "loss": 0.7174, + "step": 19113 + }, + { + "epoch": 0.02, + "learning_rate": 4.995540209743355e-05, + "loss": 1.3198, + "step": 19114 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955397407646123e-05, + "loss": 0.8946, + "step": 19115 + }, + { + "epoch": 0.02, + "learning_rate": 4.995539271761235e-05, + "loss": 0.9679, + "step": 19116 + }, + { + "epoch": 0.02, + "learning_rate": 4.995538802733223e-05, + "loss": 1.2588, + "step": 19117 + }, + { + "epoch": 0.02, + "learning_rate": 4.995538333680576e-05, + "loss": 1.0634, + "step": 19118 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955378646032936e-05, + "loss": 1.4185, + "step": 19119 + }, + { + "epoch": 0.02, + "learning_rate": 4.995537395501377e-05, + "loss": 1.0541, + "step": 19120 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955369263748246e-05, + "loss": 0.975, + "step": 19121 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955364572236385e-05, + "loss": 0.4344, + "step": 19122 + }, + { + "epoch": 0.02, + "learning_rate": 4.995535988047817e-05, + "loss": 0.8041, + "step": 19123 + }, + { + "epoch": 0.02, + "learning_rate": 4.99553551884736e-05, + "loss": 1.0857, + "step": 19124 + }, + { + "epoch": 0.02, + "learning_rate": 4.995535049622269e-05, + "loss": 0.7406, + "step": 19125 + }, + { + "epoch": 0.02, + "learning_rate": 4.995534580372543e-05, + "loss": 1.3219, + "step": 19126 + }, + { + "epoch": 0.02, + "learning_rate": 4.995534111098182e-05, + "loss": 2.2649, + "step": 19127 + }, + { + "epoch": 0.02, + "learning_rate": 4.995533641799186e-05, + "loss": 2.3423, + "step": 19128 + }, + { + "epoch": 0.02, + "learning_rate": 4.995533172475555e-05, + "loss": 2.2207, + "step": 19129 + }, + { + "epoch": 0.02, + "learning_rate": 4.995532703127289e-05, + "loss": 2.2989, + "step": 19130 + }, + { + "epoch": 0.02, + "learning_rate": 4.995532233754389e-05, + "loss": 2.2645, + "step": 19131 + }, + { + "epoch": 0.02, + "learning_rate": 4.995531764356853e-05, + "loss": 2.1723, + "step": 19132 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955312949346835e-05, + "loss": 2.3898, + "step": 19133 + }, + { + "epoch": 0.02, + "learning_rate": 4.995530825487879e-05, + "loss": 2.1433, + "step": 19134 + }, + { + "epoch": 0.02, + "learning_rate": 4.995530356016439e-05, + "loss": 2.0962, + "step": 19135 + }, + { + "epoch": 0.02, + "learning_rate": 4.995529886520364e-05, + "loss": 1.7601, + "step": 19136 + }, + { + "epoch": 0.02, + "learning_rate": 4.995529416999655e-05, + "loss": 1.3007, + "step": 19137 + }, + { + "epoch": 0.02, + "learning_rate": 4.99552894745431e-05, + "loss": 1.2957, + "step": 19138 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955284778843314e-05, + "loss": 1.0063, + "step": 19139 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955280082897174e-05, + "loss": 1.2809, + "step": 19140 + }, + { + "epoch": 0.02, + "learning_rate": 4.995527538670469e-05, + "loss": 0.9682, + "step": 19141 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955270690265855e-05, + "loss": 1.1679, + "step": 19142 + }, + { + "epoch": 0.02, + "learning_rate": 4.995526599358067e-05, + "loss": 1.2805, + "step": 19143 + }, + { + "epoch": 0.02, + "learning_rate": 4.995526129664914e-05, + "loss": 0.9726, + "step": 19144 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955256599471264e-05, + "loss": 1.3276, + "step": 19145 + }, + { + "epoch": 0.02, + "learning_rate": 4.995525190204704e-05, + "loss": 1.1737, + "step": 19146 + }, + { + "epoch": 0.02, + "learning_rate": 4.995524720437647e-05, + "loss": 0.842, + "step": 19147 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955242506459546e-05, + "loss": 1.6157, + "step": 19148 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955237808296274e-05, + "loss": 0.9117, + "step": 19149 + }, + { + "epoch": 0.02, + "learning_rate": 4.995523310988666e-05, + "loss": 0.9877, + "step": 19150 + }, + { + "epoch": 0.02, + "learning_rate": 4.995522841123069e-05, + "loss": 1.1675, + "step": 19151 + }, + { + "epoch": 0.02, + "learning_rate": 4.995522371232838e-05, + "loss": 1.0004, + "step": 19152 + }, + { + "epoch": 0.02, + "learning_rate": 4.995521901317972e-05, + "loss": 1.4656, + "step": 19153 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955214313784715e-05, + "loss": 1.1162, + "step": 19154 + }, + { + "epoch": 0.02, + "learning_rate": 4.995520961414336e-05, + "loss": 1.0988, + "step": 19155 + }, + { + "epoch": 0.02, + "learning_rate": 4.995520491425566e-05, + "loss": 1.0695, + "step": 19156 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955200214121614e-05, + "loss": 0.9797, + "step": 19157 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955195513741216e-05, + "loss": 0.7575, + "step": 19158 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955190813114473e-05, + "loss": 0.8562, + "step": 19159 + }, + { + "epoch": 0.02, + "learning_rate": 4.995518611224138e-05, + "loss": 1.2865, + "step": 19160 + }, + { + "epoch": 0.02, + "learning_rate": 4.995518141112194e-05, + "loss": 1.0898, + "step": 19161 + }, + { + "epoch": 0.02, + "learning_rate": 4.995517670975616e-05, + "loss": 1.1337, + "step": 19162 + }, + { + "epoch": 0.02, + "learning_rate": 4.995517200814403e-05, + "loss": 1.1592, + "step": 19163 + }, + { + "epoch": 0.02, + "learning_rate": 4.995516730628555e-05, + "loss": 1.1954, + "step": 19164 + }, + { + "epoch": 0.02, + "learning_rate": 4.995516260418073e-05, + "loss": 1.1467, + "step": 19165 + }, + { + "epoch": 0.02, + "learning_rate": 4.995515790182955e-05, + "loss": 1.1322, + "step": 19166 + }, + { + "epoch": 0.02, + "learning_rate": 4.995515319923203e-05, + "loss": 1.1098, + "step": 19167 + }, + { + "epoch": 0.02, + "learning_rate": 4.995514849638817e-05, + "loss": 0.9572, + "step": 19168 + }, + { + "epoch": 0.02, + "learning_rate": 4.995514379329795e-05, + "loss": 0.8791, + "step": 19169 + }, + { + "epoch": 0.02, + "learning_rate": 4.99551390899614e-05, + "loss": 1.1627, + "step": 19170 + }, + { + "epoch": 0.02, + "learning_rate": 4.995513438637849e-05, + "loss": 1.0847, + "step": 19171 + }, + { + "epoch": 0.02, + "learning_rate": 4.995512968254924e-05, + "loss": 1.1937, + "step": 19172 + }, + { + "epoch": 0.02, + "learning_rate": 4.995512497847364e-05, + "loss": 1.0026, + "step": 19173 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955120274151694e-05, + "loss": 0.8382, + "step": 19174 + }, + { + "epoch": 0.02, + "learning_rate": 4.99551155695834e-05, + "loss": 1.1517, + "step": 19175 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955110864768766e-05, + "loss": 1.0642, + "step": 19176 + }, + { + "epoch": 0.02, + "learning_rate": 4.995510615970778e-05, + "loss": 1.2226, + "step": 19177 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955101454400455e-05, + "loss": 0.6325, + "step": 19178 + }, + { + "epoch": 0.02, + "learning_rate": 4.995509674884677e-05, + "loss": 1.4666, + "step": 19179 + }, + { + "epoch": 0.02, + "learning_rate": 4.995509204304675e-05, + "loss": 2.118, + "step": 19180 + }, + { + "epoch": 0.02, + "learning_rate": 4.995508733700038e-05, + "loss": 1.3256, + "step": 19181 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955082630707674e-05, + "loss": 1.4319, + "step": 19182 + }, + { + "epoch": 0.02, + "learning_rate": 4.995507792416861e-05, + "loss": 1.1903, + "step": 19183 + }, + { + "epoch": 0.02, + "learning_rate": 4.99550732173832e-05, + "loss": 1.0023, + "step": 19184 + }, + { + "epoch": 0.02, + "learning_rate": 4.995506851035144e-05, + "loss": 1.085, + "step": 19185 + }, + { + "epoch": 0.02, + "learning_rate": 4.995506380307335e-05, + "loss": 0.907, + "step": 19186 + }, + { + "epoch": 0.02, + "learning_rate": 4.995505909554891e-05, + "loss": 0.9016, + "step": 19187 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955054387778114e-05, + "loss": 1.1099, + "step": 19188 + }, + { + "epoch": 0.02, + "learning_rate": 4.995504967976098e-05, + "loss": 1.1104, + "step": 19189 + }, + { + "epoch": 0.02, + "learning_rate": 4.995504497149749e-05, + "loss": 0.7809, + "step": 19190 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955040262987665e-05, + "loss": 1.0954, + "step": 19191 + }, + { + "epoch": 0.02, + "learning_rate": 4.995503555423149e-05, + "loss": 1.0978, + "step": 19192 + }, + { + "epoch": 0.02, + "learning_rate": 4.995503084522897e-05, + "loss": 0.9413, + "step": 19193 + }, + { + "epoch": 0.02, + "learning_rate": 4.995502613598011e-05, + "loss": 1.2441, + "step": 19194 + }, + { + "epoch": 0.02, + "learning_rate": 4.995502142648489e-05, + "loss": 1.2783, + "step": 19195 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955016716743345e-05, + "loss": 0.9133, + "step": 19196 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955012006755445e-05, + "loss": 0.5905, + "step": 19197 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955007296521194e-05, + "loss": 0.6176, + "step": 19198 + }, + { + "epoch": 0.02, + "learning_rate": 4.99550025860406e-05, + "loss": 0.7252, + "step": 19199 + }, + { + "epoch": 0.02, + "learning_rate": 4.995499787531367e-05, + "loss": 0.5752, + "step": 19200 + }, + { + "epoch": 0.02, + "learning_rate": 4.995499316434038e-05, + "loss": 1.1965, + "step": 19201 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954988453120755e-05, + "loss": 0.9561, + "step": 19202 + }, + { + "epoch": 0.02, + "learning_rate": 4.995498374165478e-05, + "loss": 0.9637, + "step": 19203 + }, + { + "epoch": 0.02, + "learning_rate": 4.995497902994246e-05, + "loss": 1.0664, + "step": 19204 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954974317983806e-05, + "loss": 1.0012, + "step": 19205 + }, + { + "epoch": 0.02, + "learning_rate": 4.995496960577879e-05, + "loss": 1.0365, + "step": 19206 + }, + { + "epoch": 0.02, + "learning_rate": 4.995496489332744e-05, + "loss": 0.9338, + "step": 19207 + }, + { + "epoch": 0.02, + "learning_rate": 4.995496018062974e-05, + "loss": 1.1256, + "step": 19208 + }, + { + "epoch": 0.02, + "learning_rate": 4.99549554676857e-05, + "loss": 1.244, + "step": 19209 + }, + { + "epoch": 0.02, + "learning_rate": 4.995495075449531e-05, + "loss": 0.8106, + "step": 19210 + }, + { + "epoch": 0.02, + "learning_rate": 4.995494604105858e-05, + "loss": 0.9954, + "step": 19211 + }, + { + "epoch": 0.02, + "learning_rate": 4.995494132737551e-05, + "loss": 1.1606, + "step": 19212 + }, + { + "epoch": 0.02, + "learning_rate": 4.995493661344608e-05, + "loss": 1.2498, + "step": 19213 + }, + { + "epoch": 0.02, + "learning_rate": 4.995493189927032e-05, + "loss": 1.7133, + "step": 19214 + }, + { + "epoch": 0.02, + "learning_rate": 4.99549271848482e-05, + "loss": 2.2877, + "step": 19215 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954922470179754e-05, + "loss": 2.1447, + "step": 19216 + }, + { + "epoch": 0.02, + "learning_rate": 4.995491775526495e-05, + "loss": 1.3295, + "step": 19217 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954913040103806e-05, + "loss": 1.0012, + "step": 19218 + }, + { + "epoch": 0.02, + "learning_rate": 4.995490832469632e-05, + "loss": 0.9109, + "step": 19219 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954903609042486e-05, + "loss": 0.5036, + "step": 19220 + }, + { + "epoch": 0.02, + "learning_rate": 4.995489889314231e-05, + "loss": 2.0009, + "step": 19221 + }, + { + "epoch": 0.02, + "learning_rate": 4.995489417699579e-05, + "loss": 1.3429, + "step": 19222 + }, + { + "epoch": 0.02, + "learning_rate": 4.995488946060292e-05, + "loss": 1.0764, + "step": 19223 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954884743963716e-05, + "loss": 1.4857, + "step": 19224 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954880027078156e-05, + "loss": 1.2899, + "step": 19225 + }, + { + "epoch": 0.02, + "learning_rate": 4.995487530994626e-05, + "loss": 0.721, + "step": 19226 + }, + { + "epoch": 0.02, + "learning_rate": 4.995487059256802e-05, + "loss": 1.0742, + "step": 19227 + }, + { + "epoch": 0.02, + "learning_rate": 4.995486587494344e-05, + "loss": 0.8446, + "step": 19228 + }, + { + "epoch": 0.02, + "learning_rate": 4.99548611570725e-05, + "loss": 0.753, + "step": 19229 + }, + { + "epoch": 0.02, + "learning_rate": 4.995485643895523e-05, + "loss": 0.4958, + "step": 19230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954851720591614e-05, + "loss": 0.4861, + "step": 19231 + }, + { + "epoch": 0.02, + "learning_rate": 4.995484700198165e-05, + "loss": 0.4741, + "step": 19232 + }, + { + "epoch": 0.02, + "learning_rate": 4.995484228312535e-05, + "loss": 0.4521, + "step": 19233 + }, + { + "epoch": 0.02, + "learning_rate": 4.995483756402269e-05, + "loss": 1.2927, + "step": 19234 + }, + { + "epoch": 0.02, + "learning_rate": 4.99548328446737e-05, + "loss": 1.245, + "step": 19235 + }, + { + "epoch": 0.02, + "learning_rate": 4.995482812507837e-05, + "loss": 1.1217, + "step": 19236 + }, + { + "epoch": 0.02, + "learning_rate": 4.995482340523669e-05, + "loss": 0.9203, + "step": 19237 + }, + { + "epoch": 0.02, + "learning_rate": 4.995481868514866e-05, + "loss": 1.1989, + "step": 19238 + }, + { + "epoch": 0.02, + "learning_rate": 4.99548139648143e-05, + "loss": 1.0409, + "step": 19239 + }, + { + "epoch": 0.02, + "learning_rate": 4.995480924423359e-05, + "loss": 1.045, + "step": 19240 + }, + { + "epoch": 0.02, + "learning_rate": 4.995480452340654e-05, + "loss": 0.9666, + "step": 19241 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954799802333144e-05, + "loss": 1.1373, + "step": 19242 + }, + { + "epoch": 0.02, + "learning_rate": 4.99547950810134e-05, + "loss": 1.3707, + "step": 19243 + }, + { + "epoch": 0.02, + "learning_rate": 4.995479035944732e-05, + "loss": 1.1284, + "step": 19244 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954785637634894e-05, + "loss": 0.9551, + "step": 19245 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954780915576124e-05, + "loss": 1.0073, + "step": 19246 + }, + { + "epoch": 0.02, + "learning_rate": 4.995477619327101e-05, + "loss": 1.0787, + "step": 19247 + }, + { + "epoch": 0.02, + "learning_rate": 4.995477147071955e-05, + "loss": 0.5148, + "step": 19248 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954766747921756e-05, + "loss": 1.3025, + "step": 19249 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954762024877616e-05, + "loss": 0.984, + "step": 19250 + }, + { + "epoch": 0.02, + "learning_rate": 4.995475730158713e-05, + "loss": 0.9945, + "step": 19251 + }, + { + "epoch": 0.02, + "learning_rate": 4.99547525780503e-05, + "loss": 1.0042, + "step": 19252 + }, + { + "epoch": 0.02, + "learning_rate": 4.995474785426714e-05, + "loss": 0.9965, + "step": 19253 + }, + { + "epoch": 0.02, + "learning_rate": 4.995474313023763e-05, + "loss": 1.1785, + "step": 19254 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954738405961765e-05, + "loss": 1.0819, + "step": 19255 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954733681439566e-05, + "loss": 1.0069, + "step": 19256 + }, + { + "epoch": 0.02, + "learning_rate": 4.995472895667103e-05, + "loss": 0.7242, + "step": 19257 + }, + { + "epoch": 0.02, + "learning_rate": 4.995472423165615e-05, + "loss": 1.3509, + "step": 19258 + }, + { + "epoch": 0.02, + "learning_rate": 4.995471950639492e-05, + "loss": 1.0406, + "step": 19259 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954714780887355e-05, + "loss": 1.0825, + "step": 19260 + }, + { + "epoch": 0.02, + "learning_rate": 4.995471005513344e-05, + "loss": 1.2383, + "step": 19261 + }, + { + "epoch": 0.02, + "learning_rate": 4.995470532913319e-05, + "loss": 1.0412, + "step": 19262 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954700602886594e-05, + "loss": 1.1885, + "step": 19263 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954695876393655e-05, + "loss": 1.029, + "step": 19264 + }, + { + "epoch": 0.02, + "learning_rate": 4.995469114965438e-05, + "loss": 1.0229, + "step": 19265 + }, + { + "epoch": 0.02, + "learning_rate": 4.995468642266875e-05, + "loss": 1.1967, + "step": 19266 + }, + { + "epoch": 0.02, + "learning_rate": 4.995468169543679e-05, + "loss": 1.2703, + "step": 19267 + }, + { + "epoch": 0.02, + "learning_rate": 4.995467696795848e-05, + "loss": 1.3139, + "step": 19268 + }, + { + "epoch": 0.02, + "learning_rate": 4.995467224023383e-05, + "loss": 1.4161, + "step": 19269 + }, + { + "epoch": 0.02, + "learning_rate": 4.995466751226284e-05, + "loss": 1.2393, + "step": 19270 + }, + { + "epoch": 0.02, + "learning_rate": 4.995466278404551e-05, + "loss": 0.8414, + "step": 19271 + }, + { + "epoch": 0.02, + "learning_rate": 4.995465805558184e-05, + "loss": 0.9599, + "step": 19272 + }, + { + "epoch": 0.02, + "learning_rate": 4.995465332687182e-05, + "loss": 1.415, + "step": 19273 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954648597915464e-05, + "loss": 0.8591, + "step": 19274 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954643868712764e-05, + "loss": 0.5772, + "step": 19275 + }, + { + "epoch": 0.02, + "learning_rate": 4.995463913926373e-05, + "loss": 0.9447, + "step": 19276 + }, + { + "epoch": 0.02, + "learning_rate": 4.995463440956833e-05, + "loss": 1.0767, + "step": 19277 + }, + { + "epoch": 0.02, + "learning_rate": 4.995462967962661e-05, + "loss": 0.8706, + "step": 19278 + }, + { + "epoch": 0.02, + "learning_rate": 4.995462494943854e-05, + "loss": 1.2448, + "step": 19279 + }, + { + "epoch": 0.02, + "learning_rate": 4.995462021900414e-05, + "loss": 1.2057, + "step": 19280 + }, + { + "epoch": 0.02, + "learning_rate": 4.995461548832338e-05, + "loss": 0.9464, + "step": 19281 + }, + { + "epoch": 0.02, + "learning_rate": 4.99546107573963e-05, + "loss": 1.2164, + "step": 19282 + }, + { + "epoch": 0.02, + "learning_rate": 4.995460602622287e-05, + "loss": 0.6809, + "step": 19283 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954601294803096e-05, + "loss": 0.9319, + "step": 19284 + }, + { + "epoch": 0.02, + "learning_rate": 4.995459656313698e-05, + "loss": 0.8735, + "step": 19285 + }, + { + "epoch": 0.02, + "learning_rate": 4.995459183122452e-05, + "loss": 1.0454, + "step": 19286 + }, + { + "epoch": 0.02, + "learning_rate": 4.995458709906572e-05, + "loss": 1.0508, + "step": 19287 + }, + { + "epoch": 0.02, + "learning_rate": 4.995458236666059e-05, + "loss": 1.2713, + "step": 19288 + }, + { + "epoch": 0.02, + "learning_rate": 4.99545776340091e-05, + "loss": 1.0254, + "step": 19289 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954572901111286e-05, + "loss": 1.0027, + "step": 19290 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954568167967125e-05, + "loss": 1.1721, + "step": 19291 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954563434576627e-05, + "loss": 1.0835, + "step": 19292 + }, + { + "epoch": 0.02, + "learning_rate": 4.995455870093978e-05, + "loss": 1.1481, + "step": 19293 + }, + { + "epoch": 0.02, + "learning_rate": 4.99545539670566e-05, + "loss": 1.1336, + "step": 19294 + }, + { + "epoch": 0.02, + "learning_rate": 4.995454923292707e-05, + "loss": 1.2392, + "step": 19295 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954544498551204e-05, + "loss": 1.0011, + "step": 19296 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954539763929e-05, + "loss": 0.9099, + "step": 19297 + }, + { + "epoch": 0.02, + "learning_rate": 4.995453502906045e-05, + "loss": 0.9345, + "step": 19298 + }, + { + "epoch": 0.02, + "learning_rate": 4.995453029394556e-05, + "loss": 0.8692, + "step": 19299 + }, + { + "epoch": 0.02, + "learning_rate": 4.995452555858433e-05, + "loss": 0.8648, + "step": 19300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954520822976756e-05, + "loss": 1.0525, + "step": 19301 + }, + { + "epoch": 0.02, + "learning_rate": 4.995451608712285e-05, + "loss": 1.2489, + "step": 19302 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954511351022604e-05, + "loss": 1.1957, + "step": 19303 + }, + { + "epoch": 0.02, + "learning_rate": 4.995450661467601e-05, + "loss": 1.1293, + "step": 19304 + }, + { + "epoch": 0.02, + "learning_rate": 4.995450187808308e-05, + "loss": 1.4837, + "step": 19305 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954497141243804e-05, + "loss": 1.0879, + "step": 19306 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954492404158195e-05, + "loss": 1.1045, + "step": 19307 + }, + { + "epoch": 0.02, + "learning_rate": 4.995448766682624e-05, + "loss": 1.3895, + "step": 19308 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954482929247945e-05, + "loss": 1.0971, + "step": 19309 + }, + { + "epoch": 0.02, + "learning_rate": 4.995447819142332e-05, + "loss": 1.3849, + "step": 19310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954473453352346e-05, + "loss": 1.1698, + "step": 19311 + }, + { + "epoch": 0.02, + "learning_rate": 4.995446871503503e-05, + "loss": 0.708, + "step": 19312 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954463976471375e-05, + "loss": 0.8596, + "step": 19313 + }, + { + "epoch": 0.02, + "learning_rate": 4.995445923766139e-05, + "loss": 0.9283, + "step": 19314 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954454498605055e-05, + "loss": 0.9766, + "step": 19315 + }, + { + "epoch": 0.02, + "learning_rate": 4.995444975930238e-05, + "loss": 1.1457, + "step": 19316 + }, + { + "epoch": 0.02, + "learning_rate": 4.995444501975337e-05, + "loss": 0.9783, + "step": 19317 + }, + { + "epoch": 0.02, + "learning_rate": 4.995444027995802e-05, + "loss": 1.2832, + "step": 19318 + }, + { + "epoch": 0.02, + "learning_rate": 4.995443553991633e-05, + "loss": 1.1363, + "step": 19319 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954430799628294e-05, + "loss": 1.2653, + "step": 19320 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954426059093926e-05, + "loss": 1.0693, + "step": 19321 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954421318313214e-05, + "loss": 1.4815, + "step": 19322 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954416577286165e-05, + "loss": 1.1423, + "step": 19323 + }, + { + "epoch": 0.02, + "learning_rate": 4.995441183601278e-05, + "loss": 1.0474, + "step": 19324 + }, + { + "epoch": 0.02, + "learning_rate": 4.995440709449305e-05, + "loss": 0.9858, + "step": 19325 + }, + { + "epoch": 0.02, + "learning_rate": 4.995440235272698e-05, + "loss": 1.1529, + "step": 19326 + }, + { + "epoch": 0.02, + "learning_rate": 4.995439761071457e-05, + "loss": 1.23, + "step": 19327 + }, + { + "epoch": 0.02, + "learning_rate": 4.995439286845582e-05, + "loss": 1.5527, + "step": 19328 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954388125950734e-05, + "loss": 1.0115, + "step": 19329 + }, + { + "epoch": 0.02, + "learning_rate": 4.995438338319931e-05, + "loss": 0.9263, + "step": 19330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954378640201546e-05, + "loss": 0.9061, + "step": 19331 + }, + { + "epoch": 0.02, + "learning_rate": 4.995437389695744e-05, + "loss": 0.9853, + "step": 19332 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954369153467e-05, + "loss": 1.196, + "step": 19333 + }, + { + "epoch": 0.02, + "learning_rate": 4.995436440973022e-05, + "loss": 0.9523, + "step": 19334 + }, + { + "epoch": 0.02, + "learning_rate": 4.99543596657471e-05, + "loss": 0.8554, + "step": 19335 + }, + { + "epoch": 0.02, + "learning_rate": 4.995435492151764e-05, + "loss": 1.0282, + "step": 19336 + }, + { + "epoch": 0.02, + "learning_rate": 4.995435017704184e-05, + "loss": 1.1775, + "step": 19337 + }, + { + "epoch": 0.02, + "learning_rate": 4.995434543231971e-05, + "loss": 1.0784, + "step": 19338 + }, + { + "epoch": 0.02, + "learning_rate": 4.995434068735123e-05, + "loss": 1.1094, + "step": 19339 + }, + { + "epoch": 0.02, + "learning_rate": 4.995433594213641e-05, + "loss": 1.0715, + "step": 19340 + }, + { + "epoch": 0.02, + "learning_rate": 4.995433119667526e-05, + "loss": 1.1213, + "step": 19341 + }, + { + "epoch": 0.02, + "learning_rate": 4.995432645096777e-05, + "loss": 1.1566, + "step": 19342 + }, + { + "epoch": 0.02, + "learning_rate": 4.995432170501394e-05, + "loss": 1.2043, + "step": 19343 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954316958813765e-05, + "loss": 1.1413, + "step": 19344 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954312212367264e-05, + "loss": 1.124, + "step": 19345 + }, + { + "epoch": 0.02, + "learning_rate": 4.995430746567441e-05, + "loss": 1.0499, + "step": 19346 + }, + { + "epoch": 0.02, + "learning_rate": 4.995430271873523e-05, + "loss": 1.2526, + "step": 19347 + }, + { + "epoch": 0.02, + "learning_rate": 4.99542979715497e-05, + "loss": 1.1216, + "step": 19348 + }, + { + "epoch": 0.02, + "learning_rate": 4.995429322411784e-05, + "loss": 1.119, + "step": 19349 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954288476439645e-05, + "loss": 1.117, + "step": 19350 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954283728515105e-05, + "loss": 1.2026, + "step": 19351 + }, + { + "epoch": 0.02, + "learning_rate": 4.995427898034423e-05, + "loss": 1.2079, + "step": 19352 + }, + { + "epoch": 0.02, + "learning_rate": 4.995427423192702e-05, + "loss": 1.2886, + "step": 19353 + }, + { + "epoch": 0.02, + "learning_rate": 4.995426948326346e-05, + "loss": 1.0737, + "step": 19354 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954264734353576e-05, + "loss": 1.2677, + "step": 19355 + }, + { + "epoch": 0.02, + "learning_rate": 4.995425998519735e-05, + "loss": 1.1583, + "step": 19356 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954255235794776e-05, + "loss": 0.6243, + "step": 19357 + }, + { + "epoch": 0.02, + "learning_rate": 4.995425048614587e-05, + "loss": 0.9922, + "step": 19358 + }, + { + "epoch": 0.02, + "learning_rate": 4.995424573625063e-05, + "loss": 1.06, + "step": 19359 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954240986109055e-05, + "loss": 1.2124, + "step": 19360 + }, + { + "epoch": 0.02, + "learning_rate": 4.995423623572113e-05, + "loss": 1.0914, + "step": 19361 + }, + { + "epoch": 0.02, + "learning_rate": 4.995423148508688e-05, + "loss": 0.83, + "step": 19362 + }, + { + "epoch": 0.02, + "learning_rate": 4.995422673420629e-05, + "loss": 1.0264, + "step": 19363 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954221983079355e-05, + "loss": 0.9966, + "step": 19364 + }, + { + "epoch": 0.02, + "learning_rate": 4.995421723170609e-05, + "loss": 0.8204, + "step": 19365 + }, + { + "epoch": 0.02, + "learning_rate": 4.995421248008648e-05, + "loss": 1.1324, + "step": 19366 + }, + { + "epoch": 0.02, + "learning_rate": 4.995420772822054e-05, + "loss": 1.1546, + "step": 19367 + }, + { + "epoch": 0.02, + "learning_rate": 4.995420297610826e-05, + "loss": 1.258, + "step": 19368 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954198223749646e-05, + "loss": 1.0821, + "step": 19369 + }, + { + "epoch": 0.02, + "learning_rate": 4.995419347114469e-05, + "loss": 1.2707, + "step": 19370 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954188718293396e-05, + "loss": 1.1921, + "step": 19371 + }, + { + "epoch": 0.02, + "learning_rate": 4.995418396519577e-05, + "loss": 1.3018, + "step": 19372 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954179211851796e-05, + "loss": 0.868, + "step": 19373 + }, + { + "epoch": 0.02, + "learning_rate": 4.995417445826149e-05, + "loss": 1.1215, + "step": 19374 + }, + { + "epoch": 0.02, + "learning_rate": 4.995416970442485e-05, + "loss": 0.977, + "step": 19375 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954164950341875e-05, + "loss": 1.0759, + "step": 19376 + }, + { + "epoch": 0.02, + "learning_rate": 4.995416019601256e-05, + "loss": 1.0187, + "step": 19377 + }, + { + "epoch": 0.02, + "learning_rate": 4.995415544143691e-05, + "loss": 0.3381, + "step": 19378 + }, + { + "epoch": 0.02, + "learning_rate": 4.995415068661492e-05, + "loss": 0.8756, + "step": 19379 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954145931546594e-05, + "loss": 0.9663, + "step": 19380 + }, + { + "epoch": 0.02, + "learning_rate": 4.995414117623193e-05, + "loss": 1.0871, + "step": 19381 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954136420670926e-05, + "loss": 1.3809, + "step": 19382 + }, + { + "epoch": 0.02, + "learning_rate": 4.99541316648636e-05, + "loss": 1.5835, + "step": 19383 + }, + { + "epoch": 0.02, + "learning_rate": 4.995412690880992e-05, + "loss": 1.0434, + "step": 19384 + }, + { + "epoch": 0.02, + "learning_rate": 4.995412215250991e-05, + "loss": 1.0567, + "step": 19385 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954117395963566e-05, + "loss": 1.0762, + "step": 19386 + }, + { + "epoch": 0.02, + "learning_rate": 4.995411263917088e-05, + "loss": 1.0567, + "step": 19387 + }, + { + "epoch": 0.02, + "learning_rate": 4.995410788213186e-05, + "loss": 0.8567, + "step": 19388 + }, + { + "epoch": 0.02, + "learning_rate": 4.995410312484651e-05, + "loss": 0.9516, + "step": 19389 + }, + { + "epoch": 0.02, + "learning_rate": 4.995409836731482e-05, + "loss": 1.1749, + "step": 19390 + }, + { + "epoch": 0.02, + "learning_rate": 4.995409360953679e-05, + "loss": 1.2256, + "step": 19391 + }, + { + "epoch": 0.02, + "learning_rate": 4.995408885151243e-05, + "loss": 0.8389, + "step": 19392 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954084093241725e-05, + "loss": 0.8863, + "step": 19393 + }, + { + "epoch": 0.02, + "learning_rate": 4.995407933472469e-05, + "loss": 0.577, + "step": 19394 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954074575961315e-05, + "loss": 0.9026, + "step": 19395 + }, + { + "epoch": 0.02, + "learning_rate": 4.995406981695161e-05, + "loss": 0.9742, + "step": 19396 + }, + { + "epoch": 0.02, + "learning_rate": 4.995406505769556e-05, + "loss": 1.0701, + "step": 19397 + }, + { + "epoch": 0.02, + "learning_rate": 4.995406029819318e-05, + "loss": 1.0514, + "step": 19398 + }, + { + "epoch": 0.02, + "learning_rate": 4.995405553844446e-05, + "loss": 1.2353, + "step": 19399 + }, + { + "epoch": 0.02, + "learning_rate": 4.995405077844941e-05, + "loss": 1.0002, + "step": 19400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954046018208026e-05, + "loss": 0.8695, + "step": 19401 + }, + { + "epoch": 0.02, + "learning_rate": 4.99540412577203e-05, + "loss": 0.5463, + "step": 19402 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954036496986236e-05, + "loss": 1.0383, + "step": 19403 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954031736005835e-05, + "loss": 1.3333, + "step": 19404 + }, + { + "epoch": 0.02, + "learning_rate": 4.99540269747791e-05, + "loss": 1.028, + "step": 19405 + }, + { + "epoch": 0.02, + "learning_rate": 4.995402221330604e-05, + "loss": 1.0235, + "step": 19406 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954017451586634e-05, + "loss": 1.0162, + "step": 19407 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954012689620896e-05, + "loss": 0.9853, + "step": 19408 + }, + { + "epoch": 0.02, + "learning_rate": 4.995400792740882e-05, + "loss": 0.8552, + "step": 19409 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954003164950415e-05, + "loss": 0.9314, + "step": 19410 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953998402245665e-05, + "loss": 0.6745, + "step": 19411 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953993639294584e-05, + "loss": 1.0115, + "step": 19412 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953988876097165e-05, + "loss": 1.0986, + "step": 19413 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953984112653416e-05, + "loss": 1.1983, + "step": 19414 + }, + { + "epoch": 0.02, + "learning_rate": 4.995397934896333e-05, + "loss": 0.9997, + "step": 19415 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953974585026905e-05, + "loss": 0.5433, + "step": 19416 + }, + { + "epoch": 0.02, + "learning_rate": 4.995396982084416e-05, + "loss": 0.4551, + "step": 19417 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953965056415065e-05, + "loss": 0.983, + "step": 19418 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953960291739635e-05, + "loss": 1.3413, + "step": 19419 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953955526817874e-05, + "loss": 0.9104, + "step": 19420 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953950761649776e-05, + "loss": 1.1702, + "step": 19421 + }, + { + "epoch": 0.02, + "learning_rate": 4.995394599623534e-05, + "loss": 0.9552, + "step": 19422 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953941230574574e-05, + "loss": 1.1938, + "step": 19423 + }, + { + "epoch": 0.02, + "learning_rate": 4.995393646466748e-05, + "loss": 1.0448, + "step": 19424 + }, + { + "epoch": 0.02, + "learning_rate": 4.995393169851404e-05, + "loss": 1.392, + "step": 19425 + }, + { + "epoch": 0.02, + "learning_rate": 4.995392693211427e-05, + "loss": 0.5965, + "step": 19426 + }, + { + "epoch": 0.02, + "learning_rate": 4.995392216546817e-05, + "loss": 0.2987, + "step": 19427 + }, + { + "epoch": 0.02, + "learning_rate": 4.995391739857572e-05, + "loss": 0.3413, + "step": 19428 + }, + { + "epoch": 0.02, + "learning_rate": 4.995391263143695e-05, + "loss": 0.9478, + "step": 19429 + }, + { + "epoch": 0.02, + "learning_rate": 4.995390786405184e-05, + "loss": 1.0214, + "step": 19430 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953903096420394e-05, + "loss": 1.0263, + "step": 19431 + }, + { + "epoch": 0.02, + "learning_rate": 4.995389832854262e-05, + "loss": 1.1779, + "step": 19432 + }, + { + "epoch": 0.02, + "learning_rate": 4.995389356041851e-05, + "loss": 1.4795, + "step": 19433 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953888792048064e-05, + "loss": 1.0849, + "step": 19434 + }, + { + "epoch": 0.02, + "learning_rate": 4.995388402343128e-05, + "loss": 1.2079, + "step": 19435 + }, + { + "epoch": 0.02, + "learning_rate": 4.995387925456817e-05, + "loss": 1.1864, + "step": 19436 + }, + { + "epoch": 0.02, + "learning_rate": 4.995387448545872e-05, + "loss": 1.1666, + "step": 19437 + }, + { + "epoch": 0.02, + "learning_rate": 4.995386971610293e-05, + "loss": 1.2221, + "step": 19438 + }, + { + "epoch": 0.02, + "learning_rate": 4.995386494650082e-05, + "loss": 0.9236, + "step": 19439 + }, + { + "epoch": 0.02, + "learning_rate": 4.995386017665237e-05, + "loss": 0.633, + "step": 19440 + }, + { + "epoch": 0.02, + "learning_rate": 4.995385540655758e-05, + "loss": 0.464, + "step": 19441 + }, + { + "epoch": 0.02, + "learning_rate": 4.995385063621646e-05, + "loss": 0.5513, + "step": 19442 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953845865629006e-05, + "loss": 0.3287, + "step": 19443 + }, + { + "epoch": 0.02, + "learning_rate": 4.995384109479522e-05, + "loss": 0.5287, + "step": 19444 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953836323715096e-05, + "loss": 0.8404, + "step": 19445 + }, + { + "epoch": 0.02, + "learning_rate": 4.995383155238864e-05, + "loss": 1.1239, + "step": 19446 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953826780815856e-05, + "loss": 1.4788, + "step": 19447 + }, + { + "epoch": 0.02, + "learning_rate": 4.995382200899673e-05, + "loss": 1.1838, + "step": 19448 + }, + { + "epoch": 0.02, + "learning_rate": 4.995381723693128e-05, + "loss": 0.8796, + "step": 19449 + }, + { + "epoch": 0.02, + "learning_rate": 4.995381246461949e-05, + "loss": 0.9667, + "step": 19450 + }, + { + "epoch": 0.02, + "learning_rate": 4.995380769206136e-05, + "loss": 1.2038, + "step": 19451 + }, + { + "epoch": 0.02, + "learning_rate": 4.995380291925691e-05, + "loss": 1.1696, + "step": 19452 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953798146206124e-05, + "loss": 1.4705, + "step": 19453 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953793372908996e-05, + "loss": 1.1468, + "step": 19454 + }, + { + "epoch": 0.02, + "learning_rate": 4.995378859936554e-05, + "loss": 1.23, + "step": 19455 + }, + { + "epoch": 0.02, + "learning_rate": 4.995378382557575e-05, + "loss": 1.3248, + "step": 19456 + }, + { + "epoch": 0.02, + "learning_rate": 4.995377905153963e-05, + "loss": 1.3197, + "step": 19457 + }, + { + "epoch": 0.02, + "learning_rate": 4.995377427725717e-05, + "loss": 1.0324, + "step": 19458 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953769502728384e-05, + "loss": 1.0601, + "step": 19459 + }, + { + "epoch": 0.02, + "learning_rate": 4.995376472795326e-05, + "loss": 0.994, + "step": 19460 + }, + { + "epoch": 0.02, + "learning_rate": 4.99537599529318e-05, + "loss": 1.1759, + "step": 19461 + }, + { + "epoch": 0.02, + "learning_rate": 4.995375517766402e-05, + "loss": 1.1833, + "step": 19462 + }, + { + "epoch": 0.02, + "learning_rate": 4.995375040214989e-05, + "loss": 1.0677, + "step": 19463 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953745626389435e-05, + "loss": 1.1962, + "step": 19464 + }, + { + "epoch": 0.02, + "learning_rate": 4.995374085038265e-05, + "loss": 0.7766, + "step": 19465 + }, + { + "epoch": 0.02, + "learning_rate": 4.995373607412953e-05, + "loss": 0.9128, + "step": 19466 + }, + { + "epoch": 0.02, + "learning_rate": 4.995373129763008e-05, + "loss": 0.995, + "step": 19467 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953726520884294e-05, + "loss": 1.2128, + "step": 19468 + }, + { + "epoch": 0.02, + "learning_rate": 4.995372174389218e-05, + "loss": 0.8503, + "step": 19469 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953716966653725e-05, + "loss": 1.0194, + "step": 19470 + }, + { + "epoch": 0.02, + "learning_rate": 4.995371218916894e-05, + "loss": 1.2112, + "step": 19471 + }, + { + "epoch": 0.02, + "learning_rate": 4.995370741143782e-05, + "loss": 1.0213, + "step": 19472 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953702633460374e-05, + "loss": 1.1726, + "step": 19473 + }, + { + "epoch": 0.02, + "learning_rate": 4.99536978552366e-05, + "loss": 1.1736, + "step": 19474 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953693076766484e-05, + "loss": 1.0172, + "step": 19475 + }, + { + "epoch": 0.02, + "learning_rate": 4.995368829805004e-05, + "loss": 1.0514, + "step": 19476 + }, + { + "epoch": 0.02, + "learning_rate": 4.995368351908726e-05, + "loss": 1.0098, + "step": 19477 + }, + { + "epoch": 0.02, + "learning_rate": 4.995367873987815e-05, + "loss": 1.3219, + "step": 19478 + }, + { + "epoch": 0.02, + "learning_rate": 4.995367396042271e-05, + "loss": 1.1564, + "step": 19479 + }, + { + "epoch": 0.02, + "learning_rate": 4.995366918072093e-05, + "loss": 1.027, + "step": 19480 + }, + { + "epoch": 0.02, + "learning_rate": 4.995366440077282e-05, + "loss": 0.9331, + "step": 19481 + }, + { + "epoch": 0.02, + "learning_rate": 4.995365962057839e-05, + "loss": 0.926, + "step": 19482 + }, + { + "epoch": 0.02, + "learning_rate": 4.995365484013762e-05, + "loss": 0.801, + "step": 19483 + }, + { + "epoch": 0.02, + "learning_rate": 4.995365005945052e-05, + "loss": 1.0164, + "step": 19484 + }, + { + "epoch": 0.02, + "learning_rate": 4.995364527851708e-05, + "loss": 1.2393, + "step": 19485 + }, + { + "epoch": 0.02, + "learning_rate": 4.995364049733732e-05, + "loss": 0.6805, + "step": 19486 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953635715911215e-05, + "loss": 0.2952, + "step": 19487 + }, + { + "epoch": 0.02, + "learning_rate": 4.995363093423878e-05, + "loss": 0.2485, + "step": 19488 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953626152320024e-05, + "loss": 0.2909, + "step": 19489 + }, + { + "epoch": 0.02, + "learning_rate": 4.995362137015493e-05, + "loss": 0.9623, + "step": 19490 + }, + { + "epoch": 0.02, + "learning_rate": 4.995361658774351e-05, + "loss": 1.0428, + "step": 19491 + }, + { + "epoch": 0.02, + "learning_rate": 4.995361180508575e-05, + "loss": 1.266, + "step": 19492 + }, + { + "epoch": 0.02, + "learning_rate": 4.995360702218166e-05, + "loss": 1.1301, + "step": 19493 + }, + { + "epoch": 0.02, + "learning_rate": 4.995360223903124e-05, + "loss": 1.6919, + "step": 19494 + }, + { + "epoch": 0.02, + "learning_rate": 4.995359745563449e-05, + "loss": 0.9695, + "step": 19495 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953592671991414e-05, + "loss": 0.9921, + "step": 19496 + }, + { + "epoch": 0.02, + "learning_rate": 4.995358788810199e-05, + "loss": 1.1344, + "step": 19497 + }, + { + "epoch": 0.02, + "learning_rate": 4.995358310396625e-05, + "loss": 1.2144, + "step": 19498 + }, + { + "epoch": 0.02, + "learning_rate": 4.995357831958417e-05, + "loss": 1.3514, + "step": 19499 + }, + { + "epoch": 0.02, + "learning_rate": 4.995357353495577e-05, + "loss": 1.266, + "step": 19500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0280624628067017, + "eval_runtime": 91.79, + "eval_samples_per_second": 15.089, + "eval_steps_per_second": 3.78, + "step": 19500 + }, + { + "epoch": 0.02, + "learning_rate": 4.995356875008103e-05, + "loss": 1.0593, + "step": 19501 + }, + { + "epoch": 0.02, + "learning_rate": 4.995356396495996e-05, + "loss": 1.1086, + "step": 19502 + }, + { + "epoch": 0.02, + "learning_rate": 4.995355917959256e-05, + "loss": 0.8475, + "step": 19503 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953554393978825e-05, + "loss": 1.234, + "step": 19504 + }, + { + "epoch": 0.02, + "learning_rate": 4.995354960811876e-05, + "loss": 1.1417, + "step": 19505 + }, + { + "epoch": 0.02, + "learning_rate": 4.995354482201237e-05, + "loss": 0.9985, + "step": 19506 + }, + { + "epoch": 0.02, + "learning_rate": 4.995354003565964e-05, + "loss": 1.6862, + "step": 19507 + }, + { + "epoch": 0.02, + "learning_rate": 4.995353524906058e-05, + "loss": 1.2272, + "step": 19508 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953530462215204e-05, + "loss": 1.1184, + "step": 19509 + }, + { + "epoch": 0.02, + "learning_rate": 4.995352567512348e-05, + "loss": 1.0556, + "step": 19510 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953520887785434e-05, + "loss": 1.1329, + "step": 19511 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953516100201056e-05, + "loss": 1.1709, + "step": 19512 + }, + { + "epoch": 0.02, + "learning_rate": 4.995351131237035e-05, + "loss": 0.8374, + "step": 19513 + }, + { + "epoch": 0.02, + "learning_rate": 4.995350652429331e-05, + "loss": 1.1394, + "step": 19514 + }, + { + "epoch": 0.02, + "learning_rate": 4.995350173596994e-05, + "loss": 1.069, + "step": 19515 + }, + { + "epoch": 0.02, + "learning_rate": 4.995349694740024e-05, + "loss": 1.1622, + "step": 19516 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953492158584206e-05, + "loss": 0.7735, + "step": 19517 + }, + { + "epoch": 0.02, + "learning_rate": 4.995348736952184e-05, + "loss": 0.1568, + "step": 19518 + }, + { + "epoch": 0.02, + "learning_rate": 4.995348258021315e-05, + "loss": 0.5787, + "step": 19519 + }, + { + "epoch": 0.02, + "learning_rate": 4.995347779065813e-05, + "loss": 1.0258, + "step": 19520 + }, + { + "epoch": 0.02, + "learning_rate": 4.995347300085678e-05, + "loss": 1.0835, + "step": 19521 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953468210809094e-05, + "loss": 0.9143, + "step": 19522 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953463420515084e-05, + "loss": 1.0433, + "step": 19523 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953458629974744e-05, + "loss": 1.0386, + "step": 19524 + }, + { + "epoch": 0.02, + "learning_rate": 4.995345383918807e-05, + "loss": 1.2294, + "step": 19525 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953449048155066e-05, + "loss": 1.0884, + "step": 19526 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953444256875734e-05, + "loss": 0.7121, + "step": 19527 + }, + { + "epoch": 0.02, + "learning_rate": 4.995343946535007e-05, + "loss": 1.0282, + "step": 19528 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953434673578084e-05, + "loss": 1.2684, + "step": 19529 + }, + { + "epoch": 0.02, + "learning_rate": 4.995342988155975e-05, + "loss": 1.3149, + "step": 19530 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953425089295105e-05, + "loss": 1.1232, + "step": 19531 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953420296784126e-05, + "loss": 1.0037, + "step": 19532 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953415504026816e-05, + "loss": 1.2248, + "step": 19533 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953410711023175e-05, + "loss": 0.9695, + "step": 19534 + }, + { + "epoch": 0.02, + "learning_rate": 4.99534059177732e-05, + "loss": 0.9408, + "step": 19535 + }, + { + "epoch": 0.02, + "learning_rate": 4.99534011242769e-05, + "loss": 0.7978, + "step": 19536 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953396330534276e-05, + "loss": 0.8975, + "step": 19537 + }, + { + "epoch": 0.02, + "learning_rate": 4.995339153654531e-05, + "loss": 0.9662, + "step": 19538 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953386742310026e-05, + "loss": 0.7139, + "step": 19539 + }, + { + "epoch": 0.02, + "learning_rate": 4.995338194782841e-05, + "loss": 1.0453, + "step": 19540 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953377153100465e-05, + "loss": 0.8875, + "step": 19541 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953372358126186e-05, + "loss": 1.003, + "step": 19542 + }, + { + "epoch": 0.02, + "learning_rate": 4.995336756290558e-05, + "loss": 1.6193, + "step": 19543 + }, + { + "epoch": 0.02, + "learning_rate": 4.995336276743865e-05, + "loss": 1.2163, + "step": 19544 + }, + { + "epoch": 0.02, + "learning_rate": 4.995335797172539e-05, + "loss": 1.2068, + "step": 19545 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953353175765794e-05, + "loss": 0.9622, + "step": 19546 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953348379559875e-05, + "loss": 1.0852, + "step": 19547 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953343583107624e-05, + "loss": 1.081, + "step": 19548 + }, + { + "epoch": 0.02, + "learning_rate": 4.995333878640904e-05, + "loss": 1.3372, + "step": 19549 + }, + { + "epoch": 0.02, + "learning_rate": 4.995333398946414e-05, + "loss": 1.0742, + "step": 19550 + }, + { + "epoch": 0.02, + "learning_rate": 4.99533291922729e-05, + "loss": 1.2906, + "step": 19551 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953324394835336e-05, + "loss": 0.888, + "step": 19552 + }, + { + "epoch": 0.02, + "learning_rate": 4.995331959715144e-05, + "loss": 1.2805, + "step": 19553 + }, + { + "epoch": 0.02, + "learning_rate": 4.995331479922122e-05, + "loss": 0.5993, + "step": 19554 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953310001044665e-05, + "loss": 1.1229, + "step": 19555 + }, + { + "epoch": 0.02, + "learning_rate": 4.995330520262179e-05, + "loss": 1.0982, + "step": 19556 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953300403952576e-05, + "loss": 1.1552, + "step": 19557 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953295605037045e-05, + "loss": 1.465, + "step": 19558 + }, + { + "epoch": 0.02, + "learning_rate": 4.995329080587518e-05, + "loss": 1.5656, + "step": 19559 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953286006466985e-05, + "loss": 1.4026, + "step": 19560 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953281206812455e-05, + "loss": 1.1061, + "step": 19561 + }, + { + "epoch": 0.02, + "learning_rate": 4.995327640691161e-05, + "loss": 1.2087, + "step": 19562 + }, + { + "epoch": 0.02, + "learning_rate": 4.995327160676443e-05, + "loss": 1.4417, + "step": 19563 + }, + { + "epoch": 0.02, + "learning_rate": 4.995326680637092e-05, + "loss": 0.5304, + "step": 19564 + }, + { + "epoch": 0.02, + "learning_rate": 4.995326200573109e-05, + "loss": 0.1152, + "step": 19565 + }, + { + "epoch": 0.02, + "learning_rate": 4.995325720484493e-05, + "loss": 0.067, + "step": 19566 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953252403712434e-05, + "loss": 0.0598, + "step": 19567 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953247602333616e-05, + "loss": 0.0576, + "step": 19568 + }, + { + "epoch": 0.02, + "learning_rate": 4.995324280070847e-05, + "loss": 0.0508, + "step": 19569 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953237998836995e-05, + "loss": 0.0933, + "step": 19570 + }, + { + "epoch": 0.02, + "learning_rate": 4.995323319671919e-05, + "loss": 0.1034, + "step": 19571 + }, + { + "epoch": 0.02, + "learning_rate": 4.995322839435506e-05, + "loss": 1.1872, + "step": 19572 + }, + { + "epoch": 0.02, + "learning_rate": 4.99532235917446e-05, + "loss": 0.9402, + "step": 19573 + }, + { + "epoch": 0.02, + "learning_rate": 4.995321878888782e-05, + "loss": 0.9242, + "step": 19574 + }, + { + "epoch": 0.02, + "learning_rate": 4.99532139857847e-05, + "loss": 0.955, + "step": 19575 + }, + { + "epoch": 0.02, + "learning_rate": 4.995320918243526e-05, + "loss": 1.0, + "step": 19576 + }, + { + "epoch": 0.02, + "learning_rate": 4.995320437883949e-05, + "loss": 1.1454, + "step": 19577 + }, + { + "epoch": 0.02, + "learning_rate": 4.99531995749974e-05, + "loss": 1.1186, + "step": 19578 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953194770908976e-05, + "loss": 1.041, + "step": 19579 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953189966574224e-05, + "loss": 1.1514, + "step": 19580 + }, + { + "epoch": 0.02, + "learning_rate": 4.995318516199314e-05, + "loss": 1.1516, + "step": 19581 + }, + { + "epoch": 0.02, + "learning_rate": 4.995318035716574e-05, + "loss": 1.082, + "step": 19582 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953175552092e-05, + "loss": 1.0778, + "step": 19583 + }, + { + "epoch": 0.02, + "learning_rate": 4.995317074677194e-05, + "loss": 1.107, + "step": 19584 + }, + { + "epoch": 0.02, + "learning_rate": 4.995316594120556e-05, + "loss": 0.8963, + "step": 19585 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953161135392844e-05, + "loss": 1.089, + "step": 19586 + }, + { + "epoch": 0.02, + "learning_rate": 4.99531563293338e-05, + "loss": 1.1228, + "step": 19587 + }, + { + "epoch": 0.02, + "learning_rate": 4.995315152302843e-05, + "loss": 1.0707, + "step": 19588 + }, + { + "epoch": 0.02, + "learning_rate": 4.995314671647674e-05, + "loss": 1.3275, + "step": 19589 + }, + { + "epoch": 0.02, + "learning_rate": 4.995314190967871e-05, + "loss": 0.7864, + "step": 19590 + }, + { + "epoch": 0.02, + "learning_rate": 4.995313710263436e-05, + "loss": 0.9707, + "step": 19591 + }, + { + "epoch": 0.02, + "learning_rate": 4.995313229534369e-05, + "loss": 1.0789, + "step": 19592 + }, + { + "epoch": 0.02, + "learning_rate": 4.995312748780668e-05, + "loss": 1.92, + "step": 19593 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953122680023354e-05, + "loss": 0.9433, + "step": 19594 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953117871993695e-05, + "loss": 0.9429, + "step": 19595 + }, + { + "epoch": 0.02, + "learning_rate": 4.995311306371772e-05, + "loss": 1.3022, + "step": 19596 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953108255195404e-05, + "loss": 1.0819, + "step": 19597 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953103446426766e-05, + "loss": 1.0501, + "step": 19598 + }, + { + "epoch": 0.02, + "learning_rate": 4.99530986374118e-05, + "loss": 1.0737, + "step": 19599 + }, + { + "epoch": 0.02, + "learning_rate": 4.995309382815051e-05, + "loss": 1.3264, + "step": 19600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953089018642895e-05, + "loss": 0.9325, + "step": 19601 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953084208888954e-05, + "loss": 0.9234, + "step": 19602 + }, + { + "epoch": 0.02, + "learning_rate": 4.995307939888868e-05, + "loss": 1.0842, + "step": 19603 + }, + { + "epoch": 0.02, + "learning_rate": 4.995307458864209e-05, + "loss": 1.3292, + "step": 19604 + }, + { + "epoch": 0.02, + "learning_rate": 4.995306977814916e-05, + "loss": 0.8856, + "step": 19605 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953064967409925e-05, + "loss": 1.3047, + "step": 19606 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953060156424344e-05, + "loss": 1.0327, + "step": 19607 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953055345192446e-05, + "loss": 0.8712, + "step": 19608 + }, + { + "epoch": 0.02, + "learning_rate": 4.995305053371422e-05, + "loss": 1.0555, + "step": 19609 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953045721989665e-05, + "loss": 1.2446, + "step": 19610 + }, + { + "epoch": 0.02, + "learning_rate": 4.995304091001879e-05, + "loss": 1.0868, + "step": 19611 + }, + { + "epoch": 0.02, + "learning_rate": 4.995303609780159e-05, + "loss": 1.3396, + "step": 19612 + }, + { + "epoch": 0.02, + "learning_rate": 4.995303128533806e-05, + "loss": 1.0766, + "step": 19613 + }, + { + "epoch": 0.02, + "learning_rate": 4.995302647262821e-05, + "loss": 1.103, + "step": 19614 + }, + { + "epoch": 0.02, + "learning_rate": 4.995302165967202e-05, + "loss": 1.113, + "step": 19615 + }, + { + "epoch": 0.02, + "learning_rate": 4.995301684646951e-05, + "loss": 0.9803, + "step": 19616 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953012033020685e-05, + "loss": 1.0499, + "step": 19617 + }, + { + "epoch": 0.02, + "learning_rate": 4.995300721932552e-05, + "loss": 0.7142, + "step": 19618 + }, + { + "epoch": 0.02, + "learning_rate": 4.995300240538404e-05, + "loss": 1.1143, + "step": 19619 + }, + { + "epoch": 0.02, + "learning_rate": 4.995299759119623e-05, + "loss": 0.9296, + "step": 19620 + }, + { + "epoch": 0.02, + "learning_rate": 4.99529927767621e-05, + "loss": 0.8527, + "step": 19621 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952987962081636e-05, + "loss": 1.0394, + "step": 19622 + }, + { + "epoch": 0.02, + "learning_rate": 4.995298314715485e-05, + "loss": 1.1625, + "step": 19623 + }, + { + "epoch": 0.02, + "learning_rate": 4.995297833198174e-05, + "loss": 1.2135, + "step": 19624 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952973516562305e-05, + "loss": 0.8277, + "step": 19625 + }, + { + "epoch": 0.02, + "learning_rate": 4.995296870089654e-05, + "loss": 1.2074, + "step": 19626 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952963884984455e-05, + "loss": 1.1347, + "step": 19627 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952959068826044e-05, + "loss": 0.6683, + "step": 19628 + }, + { + "epoch": 0.02, + "learning_rate": 4.995295425242131e-05, + "loss": 0.9953, + "step": 19629 + }, + { + "epoch": 0.02, + "learning_rate": 4.995294943577024e-05, + "loss": 1.1722, + "step": 19630 + }, + { + "epoch": 0.02, + "learning_rate": 4.995294461887286e-05, + "loss": 1.0451, + "step": 19631 + }, + { + "epoch": 0.02, + "learning_rate": 4.995293980172915e-05, + "loss": 1.0502, + "step": 19632 + }, + { + "epoch": 0.02, + "learning_rate": 4.995293498433911e-05, + "loss": 1.108, + "step": 19633 + }, + { + "epoch": 0.02, + "learning_rate": 4.995293016670275e-05, + "loss": 0.872, + "step": 19634 + }, + { + "epoch": 0.02, + "learning_rate": 4.995292534882006e-05, + "loss": 0.9183, + "step": 19635 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952920530691046e-05, + "loss": 1.1408, + "step": 19636 + }, + { + "epoch": 0.02, + "learning_rate": 4.995291571231572e-05, + "loss": 0.8893, + "step": 19637 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952910893694056e-05, + "loss": 1.0212, + "step": 19638 + }, + { + "epoch": 0.02, + "learning_rate": 4.995290607482607e-05, + "loss": 0.9591, + "step": 19639 + }, + { + "epoch": 0.02, + "learning_rate": 4.995290125571176e-05, + "loss": 1.1269, + "step": 19640 + }, + { + "epoch": 0.02, + "learning_rate": 4.995289643635113e-05, + "loss": 1.0326, + "step": 19641 + }, + { + "epoch": 0.02, + "learning_rate": 4.995289161674417e-05, + "loss": 1.1609, + "step": 19642 + }, + { + "epoch": 0.02, + "learning_rate": 4.995288679689088e-05, + "loss": 1.0486, + "step": 19643 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952881976791275e-05, + "loss": 1.2846, + "step": 19644 + }, + { + "epoch": 0.02, + "learning_rate": 4.995287715644534e-05, + "loss": 1.0467, + "step": 19645 + }, + { + "epoch": 0.02, + "learning_rate": 4.995287233585309e-05, + "loss": 1.0944, + "step": 19646 + }, + { + "epoch": 0.02, + "learning_rate": 4.995286751501451e-05, + "loss": 1.0255, + "step": 19647 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952862693929604e-05, + "loss": 1.1184, + "step": 19648 + }, + { + "epoch": 0.02, + "learning_rate": 4.995285787259838e-05, + "loss": 0.8052, + "step": 19649 + }, + { + "epoch": 0.02, + "learning_rate": 4.995285305102082e-05, + "loss": 0.7232, + "step": 19650 + }, + { + "epoch": 0.02, + "learning_rate": 4.995284822919695e-05, + "loss": 1.0202, + "step": 19651 + }, + { + "epoch": 0.02, + "learning_rate": 4.995284340712674e-05, + "loss": 1.0011, + "step": 19652 + }, + { + "epoch": 0.02, + "learning_rate": 4.995283858481022e-05, + "loss": 0.8517, + "step": 19653 + }, + { + "epoch": 0.02, + "learning_rate": 4.995283376224737e-05, + "loss": 1.1823, + "step": 19654 + }, + { + "epoch": 0.02, + "learning_rate": 4.99528289394382e-05, + "loss": 1.2, + "step": 19655 + }, + { + "epoch": 0.02, + "learning_rate": 4.99528241163827e-05, + "loss": 0.8898, + "step": 19656 + }, + { + "epoch": 0.02, + "learning_rate": 4.995281929308088e-05, + "loss": 1.0298, + "step": 19657 + }, + { + "epoch": 0.02, + "learning_rate": 4.995281446953274e-05, + "loss": 0.9889, + "step": 19658 + }, + { + "epoch": 0.02, + "learning_rate": 4.995280964573827e-05, + "loss": 1.1419, + "step": 19659 + }, + { + "epoch": 0.02, + "learning_rate": 4.995280482169747e-05, + "loss": 1.0369, + "step": 19660 + }, + { + "epoch": 0.02, + "learning_rate": 4.995279999741036e-05, + "loss": 0.9841, + "step": 19661 + }, + { + "epoch": 0.02, + "learning_rate": 4.995279517287692e-05, + "loss": 1.0522, + "step": 19662 + }, + { + "epoch": 0.02, + "learning_rate": 4.995279034809716e-05, + "loss": 1.0818, + "step": 19663 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952785523071074e-05, + "loss": 1.0004, + "step": 19664 + }, + { + "epoch": 0.02, + "learning_rate": 4.995278069779867e-05, + "loss": 1.2191, + "step": 19665 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952775872279935e-05, + "loss": 1.1579, + "step": 19666 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952771046514877e-05, + "loss": 1.1779, + "step": 19667 + }, + { + "epoch": 0.02, + "learning_rate": 4.99527662205035e-05, + "loss": 1.2012, + "step": 19668 + }, + { + "epoch": 0.02, + "learning_rate": 4.99527613942458e-05, + "loss": 1.2191, + "step": 19669 + }, + { + "epoch": 0.02, + "learning_rate": 4.995275656774178e-05, + "loss": 1.027, + "step": 19670 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952751740991436e-05, + "loss": 1.0522, + "step": 19671 + }, + { + "epoch": 0.02, + "learning_rate": 4.995274691399476e-05, + "loss": 1.0838, + "step": 19672 + }, + { + "epoch": 0.02, + "learning_rate": 4.995274208675176e-05, + "loss": 1.6773, + "step": 19673 + }, + { + "epoch": 0.02, + "learning_rate": 4.995273725926245e-05, + "loss": 1.2025, + "step": 19674 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952732431526804e-05, + "loss": 1.4595, + "step": 19675 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952727603544844e-05, + "loss": 0.6885, + "step": 19676 + }, + { + "epoch": 0.02, + "learning_rate": 4.995272277531656e-05, + "loss": 1.1281, + "step": 19677 + }, + { + "epoch": 0.02, + "learning_rate": 4.995271794684196e-05, + "loss": 1.2027, + "step": 19678 + }, + { + "epoch": 0.02, + "learning_rate": 4.995271311812102e-05, + "loss": 1.0859, + "step": 19679 + }, + { + "epoch": 0.02, + "learning_rate": 4.995270828915377e-05, + "loss": 1.1027, + "step": 19680 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952703459940187e-05, + "loss": 0.9807, + "step": 19681 + }, + { + "epoch": 0.02, + "learning_rate": 4.995269863048029e-05, + "loss": 1.2301, + "step": 19682 + }, + { + "epoch": 0.02, + "learning_rate": 4.995269380077407e-05, + "loss": 0.6885, + "step": 19683 + }, + { + "epoch": 0.02, + "learning_rate": 4.995268897082153e-05, + "loss": 0.9541, + "step": 19684 + }, + { + "epoch": 0.02, + "learning_rate": 4.995268414062266e-05, + "loss": 1.0124, + "step": 19685 + }, + { + "epoch": 0.02, + "learning_rate": 4.995267931017747e-05, + "loss": 0.934, + "step": 19686 + }, + { + "epoch": 0.02, + "learning_rate": 4.995267447948596e-05, + "loss": 1.0919, + "step": 19687 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952669648548126e-05, + "loss": 1.3242, + "step": 19688 + }, + { + "epoch": 0.02, + "learning_rate": 4.995266481736397e-05, + "loss": 1.311, + "step": 19689 + }, + { + "epoch": 0.02, + "learning_rate": 4.995265998593349e-05, + "loss": 1.0763, + "step": 19690 + }, + { + "epoch": 0.02, + "learning_rate": 4.995265515425669e-05, + "loss": 1.1591, + "step": 19691 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952650322333574e-05, + "loss": 1.1842, + "step": 19692 + }, + { + "epoch": 0.02, + "learning_rate": 4.995264549016413e-05, + "loss": 0.8564, + "step": 19693 + }, + { + "epoch": 0.02, + "learning_rate": 4.995264065774836e-05, + "loss": 1.107, + "step": 19694 + }, + { + "epoch": 0.02, + "learning_rate": 4.995263582508627e-05, + "loss": 1.2695, + "step": 19695 + }, + { + "epoch": 0.02, + "learning_rate": 4.995263099217786e-05, + "loss": 1.3649, + "step": 19696 + }, + { + "epoch": 0.02, + "learning_rate": 4.995262615902313e-05, + "loss": 1.7468, + "step": 19697 + }, + { + "epoch": 0.02, + "learning_rate": 4.995262132562208e-05, + "loss": 1.7001, + "step": 19698 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952616491974704e-05, + "loss": 1.7814, + "step": 19699 + }, + { + "epoch": 0.02, + "learning_rate": 4.995261165808101e-05, + "loss": 1.6476, + "step": 19700 + }, + { + "epoch": 0.02, + "learning_rate": 4.995260682394098e-05, + "loss": 1.6183, + "step": 19701 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952601989554646e-05, + "loss": 1.6833, + "step": 19702 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952597154921986e-05, + "loss": 1.5801, + "step": 19703 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952592320043e-05, + "loss": 1.5981, + "step": 19704 + }, + { + "epoch": 0.02, + "learning_rate": 4.995258748491769e-05, + "loss": 1.5753, + "step": 19705 + }, + { + "epoch": 0.02, + "learning_rate": 4.995258264954607e-05, + "loss": 1.5866, + "step": 19706 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952577813928125e-05, + "loss": 1.508, + "step": 19707 + }, + { + "epoch": 0.02, + "learning_rate": 4.995257297806385e-05, + "loss": 1.272, + "step": 19708 + }, + { + "epoch": 0.02, + "learning_rate": 4.995256814195326e-05, + "loss": 0.4184, + "step": 19709 + }, + { + "epoch": 0.02, + "learning_rate": 4.995256330559635e-05, + "loss": 0.3692, + "step": 19710 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952558468993115e-05, + "loss": 0.3675, + "step": 19711 + }, + { + "epoch": 0.02, + "learning_rate": 4.995255363214356e-05, + "loss": 1.2069, + "step": 19712 + }, + { + "epoch": 0.02, + "learning_rate": 4.995254879504769e-05, + "loss": 2.1163, + "step": 19713 + }, + { + "epoch": 0.02, + "learning_rate": 4.995254395770549e-05, + "loss": 2.0282, + "step": 19714 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952539120116976e-05, + "loss": 1.9783, + "step": 19715 + }, + { + "epoch": 0.02, + "learning_rate": 4.995253428228214e-05, + "loss": 2.0105, + "step": 19716 + }, + { + "epoch": 0.02, + "learning_rate": 4.995252944420098e-05, + "loss": 1.9535, + "step": 19717 + }, + { + "epoch": 0.02, + "learning_rate": 4.99525246058735e-05, + "loss": 1.9379, + "step": 19718 + }, + { + "epoch": 0.02, + "learning_rate": 4.99525197672997e-05, + "loss": 1.8932, + "step": 19719 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952514928479575e-05, + "loss": 1.3612, + "step": 19720 + }, + { + "epoch": 0.02, + "learning_rate": 4.995251008941314e-05, + "loss": 0.8687, + "step": 19721 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952505250100376e-05, + "loss": 1.0693, + "step": 19722 + }, + { + "epoch": 0.02, + "learning_rate": 4.995250041054129e-05, + "loss": 1.0527, + "step": 19723 + }, + { + "epoch": 0.02, + "learning_rate": 4.995249557073589e-05, + "loss": 1.0204, + "step": 19724 + }, + { + "epoch": 0.02, + "learning_rate": 4.995249073068416e-05, + "loss": 1.1925, + "step": 19725 + }, + { + "epoch": 0.02, + "learning_rate": 4.995248589038611e-05, + "loss": 1.017, + "step": 19726 + }, + { + "epoch": 0.02, + "learning_rate": 4.995248104984175e-05, + "loss": 1.1953, + "step": 19727 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952476209051066e-05, + "loss": 0.7603, + "step": 19728 + }, + { + "epoch": 0.02, + "learning_rate": 4.995247136801406e-05, + "loss": 0.7274, + "step": 19729 + }, + { + "epoch": 0.02, + "learning_rate": 4.995246652673073e-05, + "loss": 1.0518, + "step": 19730 + }, + { + "epoch": 0.02, + "learning_rate": 4.995246168520108e-05, + "loss": 1.1496, + "step": 19731 + }, + { + "epoch": 0.02, + "learning_rate": 4.995245684342512e-05, + "loss": 1.3168, + "step": 19732 + }, + { + "epoch": 0.02, + "learning_rate": 4.995245200140283e-05, + "loss": 1.0891, + "step": 19733 + }, + { + "epoch": 0.02, + "learning_rate": 4.995244715913422e-05, + "loss": 1.0854, + "step": 19734 + }, + { + "epoch": 0.02, + "learning_rate": 4.99524423166193e-05, + "loss": 0.9309, + "step": 19735 + }, + { + "epoch": 0.02, + "learning_rate": 4.995243747385805e-05, + "loss": 1.1332, + "step": 19736 + }, + { + "epoch": 0.02, + "learning_rate": 4.995243263085049e-05, + "loss": 0.9958, + "step": 19737 + }, + { + "epoch": 0.02, + "learning_rate": 4.99524277875966e-05, + "loss": 0.9782, + "step": 19738 + }, + { + "epoch": 0.02, + "learning_rate": 4.995242294409639e-05, + "loss": 0.7962, + "step": 19739 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952418100349866e-05, + "loss": 0.3554, + "step": 19740 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952413256357024e-05, + "loss": 0.2748, + "step": 19741 + }, + { + "epoch": 0.02, + "learning_rate": 4.995240841211785e-05, + "loss": 0.3307, + "step": 19742 + }, + { + "epoch": 0.02, + "learning_rate": 4.995240356763237e-05, + "loss": 0.6902, + "step": 19743 + }, + { + "epoch": 0.02, + "learning_rate": 4.995239872290056e-05, + "loss": 0.8881, + "step": 19744 + }, + { + "epoch": 0.02, + "learning_rate": 4.995239387792244e-05, + "loss": 1.4307, + "step": 19745 + }, + { + "epoch": 0.02, + "learning_rate": 4.995238903269799e-05, + "loss": 1.3082, + "step": 19746 + }, + { + "epoch": 0.02, + "learning_rate": 4.995238418722723e-05, + "loss": 1.2909, + "step": 19747 + }, + { + "epoch": 0.02, + "learning_rate": 4.995237934151015e-05, + "loss": 1.2271, + "step": 19748 + }, + { + "epoch": 0.02, + "learning_rate": 4.995237449554674e-05, + "loss": 1.0485, + "step": 19749 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952369649337024e-05, + "loss": 1.1645, + "step": 19750 + }, + { + "epoch": 0.02, + "learning_rate": 4.995236480288098e-05, + "loss": 1.5562, + "step": 19751 + }, + { + "epoch": 0.02, + "learning_rate": 4.995235995617862e-05, + "loss": 0.9653, + "step": 19752 + }, + { + "epoch": 0.02, + "learning_rate": 4.995235510922994e-05, + "loss": 0.8649, + "step": 19753 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952350262034944e-05, + "loss": 0.6512, + "step": 19754 + }, + { + "epoch": 0.02, + "learning_rate": 4.995234541459363e-05, + "loss": 1.0656, + "step": 19755 + }, + { + "epoch": 0.02, + "learning_rate": 4.995234056690599e-05, + "loss": 0.9732, + "step": 19756 + }, + { + "epoch": 0.02, + "learning_rate": 4.995233571897203e-05, + "loss": 1.0942, + "step": 19757 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952330870791755e-05, + "loss": 1.0566, + "step": 19758 + }, + { + "epoch": 0.02, + "learning_rate": 4.995232602236516e-05, + "loss": 1.0405, + "step": 19759 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952321173692255e-05, + "loss": 1.0702, + "step": 19760 + }, + { + "epoch": 0.02, + "learning_rate": 4.995231632477302e-05, + "loss": 1.0673, + "step": 19761 + }, + { + "epoch": 0.02, + "learning_rate": 4.995231147560747e-05, + "loss": 1.099, + "step": 19762 + }, + { + "epoch": 0.02, + "learning_rate": 4.99523066261956e-05, + "loss": 1.2589, + "step": 19763 + }, + { + "epoch": 0.02, + "learning_rate": 4.995230177653741e-05, + "loss": 0.5698, + "step": 19764 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952296926632904e-05, + "loss": 0.5423, + "step": 19765 + }, + { + "epoch": 0.02, + "learning_rate": 4.995229207648208e-05, + "loss": 0.7262, + "step": 19766 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952287226084936e-05, + "loss": 1.0165, + "step": 19767 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952282375441476e-05, + "loss": 1.188, + "step": 19768 + }, + { + "epoch": 0.02, + "learning_rate": 4.995227752455169e-05, + "loss": 0.9412, + "step": 19769 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952272673415604e-05, + "loss": 1.111, + "step": 19770 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952267822033186e-05, + "loss": 0.9398, + "step": 19771 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952262970404443e-05, + "loss": 1.2999, + "step": 19772 + }, + { + "epoch": 0.02, + "learning_rate": 4.995225811852939e-05, + "loss": 1.0939, + "step": 19773 + }, + { + "epoch": 0.02, + "learning_rate": 4.995225326640802e-05, + "loss": 0.9866, + "step": 19774 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952248414040334e-05, + "loss": 0.8609, + "step": 19775 + }, + { + "epoch": 0.02, + "learning_rate": 4.995224356142633e-05, + "loss": 1.0768, + "step": 19776 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952238708566e-05, + "loss": 1.1745, + "step": 19777 + }, + { + "epoch": 0.02, + "learning_rate": 4.995223385545935e-05, + "loss": 0.7907, + "step": 19778 + }, + { + "epoch": 0.02, + "learning_rate": 4.995222900210639e-05, + "loss": 0.9543, + "step": 19779 + }, + { + "epoch": 0.02, + "learning_rate": 4.995222414850711e-05, + "loss": 1.2915, + "step": 19780 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952219294661517e-05, + "loss": 1.0397, + "step": 19781 + }, + { + "epoch": 0.02, + "learning_rate": 4.99522144405696e-05, + "loss": 1.0812, + "step": 19782 + }, + { + "epoch": 0.02, + "learning_rate": 4.995220958623137e-05, + "loss": 0.9663, + "step": 19783 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952204731646816e-05, + "loss": 1.1324, + "step": 19784 + }, + { + "epoch": 0.02, + "learning_rate": 4.995219987681595e-05, + "loss": 1.1988, + "step": 19785 + }, + { + "epoch": 0.02, + "learning_rate": 4.995219502173876e-05, + "loss": 1.2214, + "step": 19786 + }, + { + "epoch": 0.02, + "learning_rate": 4.995219016641526e-05, + "loss": 1.0391, + "step": 19787 + }, + { + "epoch": 0.02, + "learning_rate": 4.995218531084544e-05, + "loss": 1.1344, + "step": 19788 + }, + { + "epoch": 0.02, + "learning_rate": 4.99521804550293e-05, + "loss": 0.921, + "step": 19789 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952175598966844e-05, + "loss": 1.0889, + "step": 19790 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952170742658075e-05, + "loss": 0.2578, + "step": 19791 + }, + { + "epoch": 0.02, + "learning_rate": 4.995216588610298e-05, + "loss": 0.2139, + "step": 19792 + }, + { + "epoch": 0.02, + "learning_rate": 4.995216102930157e-05, + "loss": 0.1942, + "step": 19793 + }, + { + "epoch": 0.02, + "learning_rate": 4.995215617225385e-05, + "loss": 0.2236, + "step": 19794 + }, + { + "epoch": 0.02, + "learning_rate": 4.995215131495981e-05, + "loss": 0.2054, + "step": 19795 + }, + { + "epoch": 0.02, + "learning_rate": 4.995214645741945e-05, + "loss": 0.179, + "step": 19796 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952141599632775e-05, + "loss": 0.2465, + "step": 19797 + }, + { + "epoch": 0.02, + "learning_rate": 4.995213674159977e-05, + "loss": 0.6638, + "step": 19798 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952131883320466e-05, + "loss": 1.2053, + "step": 19799 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952127024794836e-05, + "loss": 1.0865, + "step": 19800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952122166022895e-05, + "loss": 0.7951, + "step": 19801 + }, + { + "epoch": 0.02, + "learning_rate": 4.995211730700464e-05, + "loss": 0.7298, + "step": 19802 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952112447740056e-05, + "loss": 1.1313, + "step": 19803 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952107588229164e-05, + "loss": 1.3023, + "step": 19804 + }, + { + "epoch": 0.02, + "learning_rate": 4.995210272847195e-05, + "loss": 1.1852, + "step": 19805 + }, + { + "epoch": 0.02, + "learning_rate": 4.995209786846843e-05, + "loss": 1.0843, + "step": 19806 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952093008218584e-05, + "loss": 1.0489, + "step": 19807 + }, + { + "epoch": 0.02, + "learning_rate": 4.995208814772242e-05, + "loss": 1.3249, + "step": 19808 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952083286979946e-05, + "loss": 1.2792, + "step": 19809 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952078425991144e-05, + "loss": 1.366, + "step": 19810 + }, + { + "epoch": 0.02, + "learning_rate": 4.995207356475604e-05, + "loss": 1.2116, + "step": 19811 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952068703274615e-05, + "loss": 1.2203, + "step": 19812 + }, + { + "epoch": 0.02, + "learning_rate": 4.995206384154687e-05, + "loss": 1.044, + "step": 19813 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952058979572805e-05, + "loss": 1.3441, + "step": 19814 + }, + { + "epoch": 0.02, + "learning_rate": 4.995205411735243e-05, + "loss": 1.4112, + "step": 19815 + }, + { + "epoch": 0.02, + "learning_rate": 4.995204925488574e-05, + "loss": 1.0541, + "step": 19816 + }, + { + "epoch": 0.02, + "learning_rate": 4.995204439217274e-05, + "loss": 0.9978, + "step": 19817 + }, + { + "epoch": 0.02, + "learning_rate": 4.995203952921341e-05, + "loss": 1.1235, + "step": 19818 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952034666007775e-05, + "loss": 1.1931, + "step": 19819 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952029802555815e-05, + "loss": 1.1176, + "step": 19820 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952024938857544e-05, + "loss": 1.0524, + "step": 19821 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952020074912956e-05, + "loss": 1.034, + "step": 19822 + }, + { + "epoch": 0.02, + "learning_rate": 4.995201521072205e-05, + "loss": 0.928, + "step": 19823 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952010346284836e-05, + "loss": 1.0914, + "step": 19824 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952005481601296e-05, + "loss": 0.9478, + "step": 19825 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952000616671447e-05, + "loss": 1.2157, + "step": 19826 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951995751495286e-05, + "loss": 1.0494, + "step": 19827 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951990886072796e-05, + "loss": 1.0317, + "step": 19828 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951986020404e-05, + "loss": 1.3918, + "step": 19829 + }, + { + "epoch": 0.02, + "learning_rate": 4.995198115448889e-05, + "loss": 1.1973, + "step": 19830 + }, + { + "epoch": 0.02, + "learning_rate": 4.995197628832746e-05, + "loss": 0.9511, + "step": 19831 + }, + { + "epoch": 0.02, + "learning_rate": 4.995197142191972e-05, + "loss": 0.9834, + "step": 19832 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951966555265665e-05, + "loss": 1.1609, + "step": 19833 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951961688365284e-05, + "loss": 0.9816, + "step": 19834 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951956821218593e-05, + "loss": 1.5256, + "step": 19835 + }, + { + "epoch": 0.02, + "learning_rate": 4.995195195382559e-05, + "loss": 1.1088, + "step": 19836 + }, + { + "epoch": 0.02, + "learning_rate": 4.995194708618627e-05, + "loss": 0.4684, + "step": 19837 + }, + { + "epoch": 0.02, + "learning_rate": 4.995194221830063e-05, + "loss": 0.245, + "step": 19838 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951937350168685e-05, + "loss": 0.2415, + "step": 19839 + }, + { + "epoch": 0.02, + "learning_rate": 4.995193248179042e-05, + "loss": 0.1603, + "step": 19840 + }, + { + "epoch": 0.02, + "learning_rate": 4.995192761316584e-05, + "loss": 0.4589, + "step": 19841 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951922744294944e-05, + "loss": 0.9528, + "step": 19842 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951917875177736e-05, + "loss": 0.502, + "step": 19843 + }, + { + "epoch": 0.02, + "learning_rate": 4.995191300581421e-05, + "loss": 1.1688, + "step": 19844 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951908136204376e-05, + "loss": 1.187, + "step": 19845 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951903266348216e-05, + "loss": 1.0579, + "step": 19846 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951898396245746e-05, + "loss": 1.6157, + "step": 19847 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951893525896966e-05, + "loss": 0.3302, + "step": 19848 + }, + { + "epoch": 0.02, + "learning_rate": 4.995188865530186e-05, + "loss": 0.3947, + "step": 19849 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951883784460454e-05, + "loss": 1.3273, + "step": 19850 + }, + { + "epoch": 0.02, + "learning_rate": 4.995187891337273e-05, + "loss": 1.0389, + "step": 19851 + }, + { + "epoch": 0.02, + "learning_rate": 4.995187404203868e-05, + "loss": 1.0853, + "step": 19852 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951869170458334e-05, + "loss": 1.1117, + "step": 19853 + }, + { + "epoch": 0.02, + "learning_rate": 4.995186429863166e-05, + "loss": 1.1337, + "step": 19854 + }, + { + "epoch": 0.02, + "learning_rate": 4.995185942655868e-05, + "loss": 1.119, + "step": 19855 + }, + { + "epoch": 0.02, + "learning_rate": 4.995185455423937e-05, + "loss": 1.3506, + "step": 19856 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951849681673765e-05, + "loss": 1.5422, + "step": 19857 + }, + { + "epoch": 0.02, + "learning_rate": 4.995184480886183e-05, + "loss": 1.4147, + "step": 19858 + }, + { + "epoch": 0.02, + "learning_rate": 4.995183993580359e-05, + "loss": 0.9951, + "step": 19859 + }, + { + "epoch": 0.02, + "learning_rate": 4.995183506249904e-05, + "loss": 0.893, + "step": 19860 + }, + { + "epoch": 0.02, + "learning_rate": 4.995183018894817e-05, + "loss": 1.2312, + "step": 19861 + }, + { + "epoch": 0.02, + "learning_rate": 4.995182531515099e-05, + "loss": 1.1476, + "step": 19862 + }, + { + "epoch": 0.02, + "learning_rate": 4.995182044110749e-05, + "loss": 1.229, + "step": 19863 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951815566817676e-05, + "loss": 1.1509, + "step": 19864 + }, + { + "epoch": 0.02, + "learning_rate": 4.995181069228155e-05, + "loss": 0.9121, + "step": 19865 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951805817499116e-05, + "loss": 1.3532, + "step": 19866 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951800942470364e-05, + "loss": 1.2682, + "step": 19867 + }, + { + "epoch": 0.02, + "learning_rate": 4.99517960671953e-05, + "loss": 1.1546, + "step": 19868 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951791191673914e-05, + "loss": 1.1764, + "step": 19869 + }, + { + "epoch": 0.02, + "learning_rate": 4.995178631590622e-05, + "loss": 0.8395, + "step": 19870 + }, + { + "epoch": 0.02, + "learning_rate": 4.995178143989222e-05, + "loss": 1.2247, + "step": 19871 + }, + { + "epoch": 0.02, + "learning_rate": 4.99517765636319e-05, + "loss": 1.0671, + "step": 19872 + }, + { + "epoch": 0.02, + "learning_rate": 4.995177168712526e-05, + "loss": 1.1357, + "step": 19873 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951766810372316e-05, + "loss": 1.0832, + "step": 19874 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951761933373054e-05, + "loss": 1.1123, + "step": 19875 + }, + { + "epoch": 0.02, + "learning_rate": 4.995175705612748e-05, + "loss": 1.2845, + "step": 19876 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951752178635597e-05, + "loss": 1.3855, + "step": 19877 + }, + { + "epoch": 0.02, + "learning_rate": 4.99517473008974e-05, + "loss": 1.2405, + "step": 19878 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951742422912884e-05, + "loss": 0.6798, + "step": 19879 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951737544682056e-05, + "loss": 1.0642, + "step": 19880 + }, + { + "epoch": 0.02, + "learning_rate": 4.995173266620492e-05, + "loss": 1.1656, + "step": 19881 + }, + { + "epoch": 0.02, + "learning_rate": 4.995172778748147e-05, + "loss": 1.1354, + "step": 19882 + }, + { + "epoch": 0.02, + "learning_rate": 4.99517229085117e-05, + "loss": 1.1622, + "step": 19883 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951718029295625e-05, + "loss": 1.0806, + "step": 19884 + }, + { + "epoch": 0.02, + "learning_rate": 4.995171314983324e-05, + "loss": 1.1213, + "step": 19885 + }, + { + "epoch": 0.02, + "learning_rate": 4.995170827012453e-05, + "loss": 0.7129, + "step": 19886 + }, + { + "epoch": 0.02, + "learning_rate": 4.995170339016951e-05, + "loss": 1.279, + "step": 19887 + }, + { + "epoch": 0.02, + "learning_rate": 4.995169850996818e-05, + "loss": 1.1153, + "step": 19888 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951693629520544e-05, + "loss": 0.8464, + "step": 19889 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951688748826584e-05, + "loss": 0.95, + "step": 19890 + }, + { + "epoch": 0.02, + "learning_rate": 4.995168386788632e-05, + "loss": 1.214, + "step": 19891 + }, + { + "epoch": 0.02, + "learning_rate": 4.995167898669974e-05, + "loss": 1.145, + "step": 19892 + }, + { + "epoch": 0.02, + "learning_rate": 4.995167410526685e-05, + "loss": 1.1607, + "step": 19893 + }, + { + "epoch": 0.02, + "learning_rate": 4.995166922358764e-05, + "loss": 1.3608, + "step": 19894 + }, + { + "epoch": 0.02, + "learning_rate": 4.995166434166213e-05, + "loss": 1.2716, + "step": 19895 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951659459490294e-05, + "loss": 1.1194, + "step": 19896 + }, + { + "epoch": 0.02, + "learning_rate": 4.995165457707216e-05, + "loss": 1.3742, + "step": 19897 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951649694407705e-05, + "loss": 1.259, + "step": 19898 + }, + { + "epoch": 0.02, + "learning_rate": 4.995164481149694e-05, + "loss": 1.1134, + "step": 19899 + }, + { + "epoch": 0.02, + "learning_rate": 4.995163992833986e-05, + "loss": 0.8128, + "step": 19900 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951635044936474e-05, + "loss": 1.1163, + "step": 19901 + }, + { + "epoch": 0.02, + "learning_rate": 4.995163016128677e-05, + "loss": 1.0653, + "step": 19902 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951625277390754e-05, + "loss": 1.3579, + "step": 19903 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951620393248435e-05, + "loss": 1.5247, + "step": 19904 + }, + { + "epoch": 0.02, + "learning_rate": 4.995161550885979e-05, + "loss": 1.563, + "step": 19905 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951610624224846e-05, + "loss": 1.371, + "step": 19906 + }, + { + "epoch": 0.02, + "learning_rate": 4.995160573934358e-05, + "loss": 1.1175, + "step": 19907 + }, + { + "epoch": 0.02, + "learning_rate": 4.995160085421601e-05, + "loss": 0.8112, + "step": 19908 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951595968842124e-05, + "loss": 0.9531, + "step": 19909 + }, + { + "epoch": 0.02, + "learning_rate": 4.995159108322193e-05, + "loss": 1.0515, + "step": 19910 + }, + { + "epoch": 0.02, + "learning_rate": 4.995158619735542e-05, + "loss": 1.0934, + "step": 19911 + }, + { + "epoch": 0.02, + "learning_rate": 4.99515813112426e-05, + "loss": 0.6406, + "step": 19912 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951576424883475e-05, + "loss": 0.9396, + "step": 19913 + }, + { + "epoch": 0.02, + "learning_rate": 4.995157153827803e-05, + "loss": 1.1762, + "step": 19914 + }, + { + "epoch": 0.02, + "learning_rate": 4.995156665142627e-05, + "loss": 1.2595, + "step": 19915 + }, + { + "epoch": 0.02, + "learning_rate": 4.995156176432821e-05, + "loss": 1.1789, + "step": 19916 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951556876983827e-05, + "loss": 0.8944, + "step": 19917 + }, + { + "epoch": 0.02, + "learning_rate": 4.995155198939314e-05, + "loss": 1.191, + "step": 19918 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951547101556146e-05, + "loss": 1.2088, + "step": 19919 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951542213472834e-05, + "loss": 1.366, + "step": 19920 + }, + { + "epoch": 0.02, + "learning_rate": 4.995153732514322e-05, + "loss": 1.1819, + "step": 19921 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951532436567285e-05, + "loss": 1.2097, + "step": 19922 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951527547745034e-05, + "loss": 1.3596, + "step": 19923 + }, + { + "epoch": 0.02, + "learning_rate": 4.995152265867649e-05, + "loss": 1.0346, + "step": 19924 + }, + { + "epoch": 0.02, + "learning_rate": 4.995151776936162e-05, + "loss": 0.9956, + "step": 19925 + }, + { + "epoch": 0.02, + "learning_rate": 4.995151287980044e-05, + "loss": 0.7451, + "step": 19926 + }, + { + "epoch": 0.02, + "learning_rate": 4.995150798999295e-05, + "loss": 1.1893, + "step": 19927 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951503099939153e-05, + "loss": 1.1116, + "step": 19928 + }, + { + "epoch": 0.02, + "learning_rate": 4.995149820963905e-05, + "loss": 0.7205, + "step": 19929 + }, + { + "epoch": 0.02, + "learning_rate": 4.995149331909263e-05, + "loss": 1.1302, + "step": 19930 + }, + { + "epoch": 0.02, + "learning_rate": 4.99514884282999e-05, + "loss": 1.229, + "step": 19931 + }, + { + "epoch": 0.02, + "learning_rate": 4.995148353726086e-05, + "loss": 1.3831, + "step": 19932 + }, + { + "epoch": 0.02, + "learning_rate": 4.995147864597551e-05, + "loss": 0.9814, + "step": 19933 + }, + { + "epoch": 0.02, + "learning_rate": 4.995147375444385e-05, + "loss": 1.176, + "step": 19934 + }, + { + "epoch": 0.02, + "learning_rate": 4.995146886266587e-05, + "loss": 1.0441, + "step": 19935 + }, + { + "epoch": 0.02, + "learning_rate": 4.995146397064159e-05, + "loss": 1.398, + "step": 19936 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951459078371e-05, + "loss": 1.1432, + "step": 19937 + }, + { + "epoch": 0.02, + "learning_rate": 4.99514541858541e-05, + "loss": 1.2002, + "step": 19938 + }, + { + "epoch": 0.02, + "learning_rate": 4.995144929309089e-05, + "loss": 1.0776, + "step": 19939 + }, + { + "epoch": 0.02, + "learning_rate": 4.995144440008136e-05, + "loss": 0.7207, + "step": 19940 + }, + { + "epoch": 0.02, + "learning_rate": 4.995143950682553e-05, + "loss": 1.1236, + "step": 19941 + }, + { + "epoch": 0.02, + "learning_rate": 4.995143461332339e-05, + "loss": 0.9496, + "step": 19942 + }, + { + "epoch": 0.02, + "learning_rate": 4.995142971957493e-05, + "loss": 0.9429, + "step": 19943 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951424825580174e-05, + "loss": 1.1929, + "step": 19944 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951419931339095e-05, + "loss": 1.3029, + "step": 19945 + }, + { + "epoch": 0.02, + "learning_rate": 4.995141503685171e-05, + "loss": 1.1513, + "step": 19946 + }, + { + "epoch": 0.02, + "learning_rate": 4.995141014211802e-05, + "loss": 0.9263, + "step": 19947 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951405247138015e-05, + "loss": 1.1595, + "step": 19948 + }, + { + "epoch": 0.02, + "learning_rate": 4.99514003519117e-05, + "loss": 1.2248, + "step": 19949 + }, + { + "epoch": 0.02, + "learning_rate": 4.995139545643908e-05, + "loss": 1.1268, + "step": 19950 + }, + { + "epoch": 0.02, + "learning_rate": 4.995139056072015e-05, + "loss": 1.0567, + "step": 19951 + }, + { + "epoch": 0.02, + "learning_rate": 4.995138566475491e-05, + "loss": 1.1301, + "step": 19952 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951380768543354e-05, + "loss": 1.1523, + "step": 19953 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951375872085494e-05, + "loss": 1.8745, + "step": 19954 + }, + { + "epoch": 0.02, + "learning_rate": 4.995137097538133e-05, + "loss": 1.0256, + "step": 19955 + }, + { + "epoch": 0.02, + "learning_rate": 4.995136607843085e-05, + "loss": 1.0276, + "step": 19956 + }, + { + "epoch": 0.02, + "learning_rate": 4.995136118123406e-05, + "loss": 0.8394, + "step": 19957 + }, + { + "epoch": 0.02, + "learning_rate": 4.995135628379096e-05, + "loss": 0.9696, + "step": 19958 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951351386101555e-05, + "loss": 0.9729, + "step": 19959 + }, + { + "epoch": 0.02, + "learning_rate": 4.995134648816583e-05, + "loss": 1.1364, + "step": 19960 + }, + { + "epoch": 0.02, + "learning_rate": 4.995134158998381e-05, + "loss": 1.1014, + "step": 19961 + }, + { + "epoch": 0.02, + "learning_rate": 4.995133669155547e-05, + "loss": 1.1283, + "step": 19962 + }, + { + "epoch": 0.02, + "learning_rate": 4.995133179288083e-05, + "loss": 1.0353, + "step": 19963 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951326893959875e-05, + "loss": 0.9008, + "step": 19964 + }, + { + "epoch": 0.02, + "learning_rate": 4.995132199479261e-05, + "loss": 1.1224, + "step": 19965 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951317095379044e-05, + "loss": 1.0088, + "step": 19966 + }, + { + "epoch": 0.02, + "learning_rate": 4.995131219571916e-05, + "loss": 1.0762, + "step": 19967 + }, + { + "epoch": 0.02, + "learning_rate": 4.995130729581298e-05, + "loss": 1.0413, + "step": 19968 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951302395660477e-05, + "loss": 0.8766, + "step": 19969 + }, + { + "epoch": 0.02, + "learning_rate": 4.995129749526167e-05, + "loss": 1.0994, + "step": 19970 + }, + { + "epoch": 0.02, + "learning_rate": 4.995129259461655e-05, + "loss": 1.1821, + "step": 19971 + }, + { + "epoch": 0.02, + "learning_rate": 4.995128769372513e-05, + "loss": 1.0293, + "step": 19972 + }, + { + "epoch": 0.02, + "learning_rate": 4.995128279258739e-05, + "loss": 1.1084, + "step": 19973 + }, + { + "epoch": 0.02, + "learning_rate": 4.995127789120336e-05, + "loss": 1.1738, + "step": 19974 + }, + { + "epoch": 0.02, + "learning_rate": 4.995127298957301e-05, + "loss": 0.6615, + "step": 19975 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951268087696346e-05, + "loss": 1.1627, + "step": 19976 + }, + { + "epoch": 0.02, + "learning_rate": 4.995126318557338e-05, + "loss": 1.1334, + "step": 19977 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951258283204106e-05, + "loss": 1.0451, + "step": 19978 + }, + { + "epoch": 0.02, + "learning_rate": 4.995125338058853e-05, + "loss": 0.8299, + "step": 19979 + }, + { + "epoch": 0.02, + "learning_rate": 4.995124847772663e-05, + "loss": 0.9589, + "step": 19980 + }, + { + "epoch": 0.02, + "learning_rate": 4.995124357461843e-05, + "loss": 1.0443, + "step": 19981 + }, + { + "epoch": 0.02, + "learning_rate": 4.995123867126393e-05, + "loss": 1.3036, + "step": 19982 + }, + { + "epoch": 0.02, + "learning_rate": 4.995123376766311e-05, + "loss": 1.1141, + "step": 19983 + }, + { + "epoch": 0.02, + "learning_rate": 4.995122886381599e-05, + "loss": 1.1289, + "step": 19984 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951223959722556e-05, + "loss": 1.2433, + "step": 19985 + }, + { + "epoch": 0.02, + "learning_rate": 4.995121905538281e-05, + "loss": 1.2589, + "step": 19986 + }, + { + "epoch": 0.02, + "learning_rate": 4.995121415079676e-05, + "loss": 1.1457, + "step": 19987 + }, + { + "epoch": 0.02, + "learning_rate": 4.995120924596442e-05, + "loss": 1.1782, + "step": 19988 + }, + { + "epoch": 0.02, + "learning_rate": 4.995120434088575e-05, + "loss": 1.4586, + "step": 19989 + }, + { + "epoch": 0.02, + "learning_rate": 4.995119943556078e-05, + "loss": 1.1532, + "step": 19990 + }, + { + "epoch": 0.02, + "learning_rate": 4.99511945299895e-05, + "loss": 1.1147, + "step": 19991 + }, + { + "epoch": 0.02, + "learning_rate": 4.995118962417191e-05, + "loss": 0.8136, + "step": 19992 + }, + { + "epoch": 0.02, + "learning_rate": 4.995118471810802e-05, + "loss": 0.7862, + "step": 19993 + }, + { + "epoch": 0.02, + "learning_rate": 4.995117981179781e-05, + "loss": 0.8302, + "step": 19994 + }, + { + "epoch": 0.02, + "learning_rate": 4.99511749052413e-05, + "loss": 0.6526, + "step": 19995 + }, + { + "epoch": 0.02, + "learning_rate": 4.995116999843849e-05, + "loss": 0.9735, + "step": 19996 + }, + { + "epoch": 0.02, + "learning_rate": 4.995116509138936e-05, + "loss": 0.8907, + "step": 19997 + }, + { + "epoch": 0.02, + "learning_rate": 4.995116018409394e-05, + "loss": 0.858, + "step": 19998 + }, + { + "epoch": 0.02, + "learning_rate": 4.99511552765522e-05, + "loss": 0.8701, + "step": 19999 + }, + { + "epoch": 0.02, + "learning_rate": 4.995115036876415e-05, + "loss": 1.1036, + "step": 20000 + }, + { + "epoch": 0.02, + "eval_loss": 1.050695538520813, + "eval_runtime": 85.5766, + "eval_samples_per_second": 16.184, + "eval_steps_per_second": 4.055, + "step": 20000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951145460729795e-05, + "loss": 1.0065, + "step": 20001 + }, + { + "epoch": 0.02, + "learning_rate": 4.995114055244914e-05, + "loss": 1.0904, + "step": 20002 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951135643922164e-05, + "loss": 1.1266, + "step": 20003 + }, + { + "epoch": 0.02, + "learning_rate": 4.995113073514889e-05, + "loss": 0.9593, + "step": 20004 + }, + { + "epoch": 0.02, + "learning_rate": 4.995112582612931e-05, + "loss": 1.046, + "step": 20005 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951120916863426e-05, + "loss": 1.0173, + "step": 20006 + }, + { + "epoch": 0.02, + "learning_rate": 4.995111600735123e-05, + "loss": 1.2986, + "step": 20007 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951111097592724e-05, + "loss": 0.9342, + "step": 20008 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951106187587914e-05, + "loss": 1.1265, + "step": 20009 + }, + { + "epoch": 0.02, + "learning_rate": 4.99511012773368e-05, + "loss": 1.1941, + "step": 20010 + }, + { + "epoch": 0.02, + "learning_rate": 4.995109636683937e-05, + "loss": 1.189, + "step": 20011 + }, + { + "epoch": 0.02, + "learning_rate": 4.995109145609564e-05, + "loss": 1.3733, + "step": 20012 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951086545105606e-05, + "loss": 1.0884, + "step": 20013 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951081633869264e-05, + "loss": 0.9061, + "step": 20014 + }, + { + "epoch": 0.02, + "learning_rate": 4.995107672238661e-05, + "loss": 1.0685, + "step": 20015 + }, + { + "epoch": 0.02, + "learning_rate": 4.995107181065766e-05, + "loss": 0.9686, + "step": 20016 + }, + { + "epoch": 0.02, + "learning_rate": 4.99510668986824e-05, + "loss": 0.8972, + "step": 20017 + }, + { + "epoch": 0.02, + "learning_rate": 4.995106198646082e-05, + "loss": 1.0111, + "step": 20018 + }, + { + "epoch": 0.02, + "learning_rate": 4.995105707399295e-05, + "loss": 1.1011, + "step": 20019 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951052161278765e-05, + "loss": 1.2997, + "step": 20020 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951047248318285e-05, + "loss": 1.198, + "step": 20021 + }, + { + "epoch": 0.02, + "learning_rate": 4.995104233511149e-05, + "loss": 1.65, + "step": 20022 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951037421658386e-05, + "loss": 2.1101, + "step": 20023 + }, + { + "epoch": 0.02, + "learning_rate": 4.995103250795898e-05, + "loss": 0.8452, + "step": 20024 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951027594013266e-05, + "loss": 0.653, + "step": 20025 + }, + { + "epoch": 0.02, + "learning_rate": 4.995102267982125e-05, + "loss": 1.1717, + "step": 20026 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951017765382925e-05, + "loss": 1.0334, + "step": 20027 + }, + { + "epoch": 0.02, + "learning_rate": 4.995101285069829e-05, + "loss": 1.0921, + "step": 20028 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951007935767356e-05, + "loss": 1.4456, + "step": 20029 + }, + { + "epoch": 0.02, + "learning_rate": 4.9951003020590116e-05, + "loss": 1.0322, + "step": 20030 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950998105166565e-05, + "loss": 1.2649, + "step": 20031 + }, + { + "epoch": 0.02, + "learning_rate": 4.995099318949671e-05, + "loss": 0.9538, + "step": 20032 + }, + { + "epoch": 0.02, + "learning_rate": 4.995098827358055e-05, + "loss": 1.0172, + "step": 20033 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950983357418085e-05, + "loss": 1.0325, + "step": 20034 + }, + { + "epoch": 0.02, + "learning_rate": 4.995097844100931e-05, + "loss": 1.1101, + "step": 20035 + }, + { + "epoch": 0.02, + "learning_rate": 4.995097352435424e-05, + "loss": 0.8585, + "step": 20036 + }, + { + "epoch": 0.02, + "learning_rate": 4.995096860745285e-05, + "loss": 1.0438, + "step": 20037 + }, + { + "epoch": 0.02, + "learning_rate": 4.995096369030517e-05, + "loss": 0.5976, + "step": 20038 + }, + { + "epoch": 0.02, + "learning_rate": 4.995095877291117e-05, + "loss": 0.7719, + "step": 20039 + }, + { + "epoch": 0.02, + "learning_rate": 4.995095385527088e-05, + "loss": 1.177, + "step": 20040 + }, + { + "epoch": 0.02, + "learning_rate": 4.995094893738427e-05, + "loss": 1.3327, + "step": 20041 + }, + { + "epoch": 0.02, + "learning_rate": 4.995094401925137e-05, + "loss": 0.9201, + "step": 20042 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950939100872154e-05, + "loss": 1.1723, + "step": 20043 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950934182246636e-05, + "loss": 1.1364, + "step": 20044 + }, + { + "epoch": 0.02, + "learning_rate": 4.995092926337481e-05, + "loss": 0.9087, + "step": 20045 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950924344256676e-05, + "loss": 0.9299, + "step": 20046 + }, + { + "epoch": 0.02, + "learning_rate": 4.995091942489225e-05, + "loss": 0.6843, + "step": 20047 + }, + { + "epoch": 0.02, + "learning_rate": 4.995091450528151e-05, + "loss": 0.9089, + "step": 20048 + }, + { + "epoch": 0.02, + "learning_rate": 4.995090958542446e-05, + "loss": 0.9763, + "step": 20049 + }, + { + "epoch": 0.02, + "learning_rate": 4.995090466532111e-05, + "loss": 1.092, + "step": 20050 + }, + { + "epoch": 0.02, + "learning_rate": 4.995089974497146e-05, + "loss": 1.1509, + "step": 20051 + }, + { + "epoch": 0.02, + "learning_rate": 4.99508948243755e-05, + "loss": 0.759, + "step": 20052 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950889903533236e-05, + "loss": 0.7007, + "step": 20053 + }, + { + "epoch": 0.02, + "learning_rate": 4.995088498244467e-05, + "loss": 1.0243, + "step": 20054 + }, + { + "epoch": 0.02, + "learning_rate": 4.99508800611098e-05, + "loss": 1.1457, + "step": 20055 + }, + { + "epoch": 0.02, + "learning_rate": 4.995087513952862e-05, + "loss": 1.1078, + "step": 20056 + }, + { + "epoch": 0.02, + "learning_rate": 4.995087021770114e-05, + "loss": 1.1792, + "step": 20057 + }, + { + "epoch": 0.02, + "learning_rate": 4.995086529562735e-05, + "loss": 0.8517, + "step": 20058 + }, + { + "epoch": 0.02, + "learning_rate": 4.995086037330726e-05, + "loss": 1.0568, + "step": 20059 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950855450740865e-05, + "loss": 0.9297, + "step": 20060 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950850527928165e-05, + "loss": 1.0727, + "step": 20061 + }, + { + "epoch": 0.02, + "learning_rate": 4.995084560486916e-05, + "loss": 1.1094, + "step": 20062 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950840681563854e-05, + "loss": 1.0855, + "step": 20063 + }, + { + "epoch": 0.02, + "learning_rate": 4.995083575801224e-05, + "loss": 0.9035, + "step": 20064 + }, + { + "epoch": 0.02, + "learning_rate": 4.995083083421432e-05, + "loss": 1.1314, + "step": 20065 + }, + { + "epoch": 0.02, + "learning_rate": 4.995082591017011e-05, + "loss": 1.1572, + "step": 20066 + }, + { + "epoch": 0.02, + "learning_rate": 4.995082098587958e-05, + "loss": 1.3336, + "step": 20067 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950816061342756e-05, + "loss": 0.8142, + "step": 20068 + }, + { + "epoch": 0.02, + "learning_rate": 4.995081113655962e-05, + "loss": 0.9544, + "step": 20069 + }, + { + "epoch": 0.02, + "learning_rate": 4.995080621153019e-05, + "loss": 0.9004, + "step": 20070 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950801286254445e-05, + "loss": 0.7088, + "step": 20071 + }, + { + "epoch": 0.02, + "learning_rate": 4.99507963607324e-05, + "loss": 1.2632, + "step": 20072 + }, + { + "epoch": 0.02, + "learning_rate": 4.995079143496405e-05, + "loss": 0.9631, + "step": 20073 + }, + { + "epoch": 0.02, + "learning_rate": 4.995078650894941e-05, + "loss": 0.6777, + "step": 20074 + }, + { + "epoch": 0.02, + "learning_rate": 4.995078158268845e-05, + "loss": 0.9409, + "step": 20075 + }, + { + "epoch": 0.02, + "learning_rate": 4.995077665618119e-05, + "loss": 0.8817, + "step": 20076 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950771729427624e-05, + "loss": 0.9843, + "step": 20077 + }, + { + "epoch": 0.02, + "learning_rate": 4.995076680242776e-05, + "loss": 1.1929, + "step": 20078 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950761875181597e-05, + "loss": 1.1861, + "step": 20079 + }, + { + "epoch": 0.02, + "learning_rate": 4.995075694768912e-05, + "loss": 1.0514, + "step": 20080 + }, + { + "epoch": 0.02, + "learning_rate": 4.995075201995034e-05, + "loss": 1.137, + "step": 20081 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950747091965265e-05, + "loss": 1.1693, + "step": 20082 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950742163733885e-05, + "loss": 1.0282, + "step": 20083 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950737235256195e-05, + "loss": 1.0292, + "step": 20084 + }, + { + "epoch": 0.02, + "learning_rate": 4.995073230653221e-05, + "loss": 1.205, + "step": 20085 + }, + { + "epoch": 0.02, + "learning_rate": 4.995072737756192e-05, + "loss": 1.0222, + "step": 20086 + }, + { + "epoch": 0.02, + "learning_rate": 4.995072244834532e-05, + "loss": 0.9928, + "step": 20087 + }, + { + "epoch": 0.02, + "learning_rate": 4.995071751888243e-05, + "loss": 1.2947, + "step": 20088 + }, + { + "epoch": 0.02, + "learning_rate": 4.995071258917322e-05, + "loss": 1.2765, + "step": 20089 + }, + { + "epoch": 0.02, + "learning_rate": 4.995070765921772e-05, + "loss": 1.2854, + "step": 20090 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950702729015916e-05, + "loss": 1.059, + "step": 20091 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950697798567803e-05, + "loss": 1.0854, + "step": 20092 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950692867873394e-05, + "loss": 1.2086, + "step": 20093 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950687936932674e-05, + "loss": 1.0258, + "step": 20094 + }, + { + "epoch": 0.02, + "learning_rate": 4.995068300574566e-05, + "loss": 1.0001, + "step": 20095 + }, + { + "epoch": 0.02, + "learning_rate": 4.995067807431234e-05, + "loss": 0.8674, + "step": 20096 + }, + { + "epoch": 0.02, + "learning_rate": 4.995067314263272e-05, + "loss": 1.2002, + "step": 20097 + }, + { + "epoch": 0.02, + "learning_rate": 4.995066821070679e-05, + "loss": 1.0499, + "step": 20098 + }, + { + "epoch": 0.02, + "learning_rate": 4.995066327853456e-05, + "loss": 1.2658, + "step": 20099 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950658346116034e-05, + "loss": 1.2492, + "step": 20100 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950653413451195e-05, + "loss": 1.1206, + "step": 20101 + }, + { + "epoch": 0.02, + "learning_rate": 4.995064848054006e-05, + "loss": 1.1142, + "step": 20102 + }, + { + "epoch": 0.02, + "learning_rate": 4.995064354738263e-05, + "loss": 1.0411, + "step": 20103 + }, + { + "epoch": 0.02, + "learning_rate": 4.995063861397889e-05, + "loss": 1.0506, + "step": 20104 + }, + { + "epoch": 0.02, + "learning_rate": 4.995063368032885e-05, + "loss": 0.8861, + "step": 20105 + }, + { + "epoch": 0.02, + "learning_rate": 4.99506287464325e-05, + "loss": 1.3545, + "step": 20106 + }, + { + "epoch": 0.02, + "learning_rate": 4.995062381228986e-05, + "loss": 1.0546, + "step": 20107 + }, + { + "epoch": 0.02, + "learning_rate": 4.995061887790091e-05, + "loss": 1.162, + "step": 20108 + }, + { + "epoch": 0.02, + "learning_rate": 4.995061394326566e-05, + "loss": 1.1609, + "step": 20109 + }, + { + "epoch": 0.02, + "learning_rate": 4.995060900838411e-05, + "loss": 1.2274, + "step": 20110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950604073256254e-05, + "loss": 1.3624, + "step": 20111 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950599137882096e-05, + "loss": 1.1483, + "step": 20112 + }, + { + "epoch": 0.02, + "learning_rate": 4.995059420226164e-05, + "loss": 1.1703, + "step": 20113 + }, + { + "epoch": 0.02, + "learning_rate": 4.995058926639488e-05, + "loss": 0.8651, + "step": 20114 + }, + { + "epoch": 0.02, + "learning_rate": 4.995058433028182e-05, + "loss": 0.9816, + "step": 20115 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950579393922455e-05, + "loss": 0.9014, + "step": 20116 + }, + { + "epoch": 0.02, + "learning_rate": 4.995057445731679e-05, + "loss": 0.9993, + "step": 20117 + }, + { + "epoch": 0.02, + "learning_rate": 4.995056952046483e-05, + "loss": 1.0268, + "step": 20118 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950564583366564e-05, + "loss": 0.8134, + "step": 20119 + }, + { + "epoch": 0.02, + "learning_rate": 4.995055964602199e-05, + "loss": 1.3194, + "step": 20120 + }, + { + "epoch": 0.02, + "learning_rate": 4.995055470843112e-05, + "loss": 0.8554, + "step": 20121 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950549770593946e-05, + "loss": 1.1572, + "step": 20122 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950544832510475e-05, + "loss": 1.0276, + "step": 20123 + }, + { + "epoch": 0.02, + "learning_rate": 4.99505398941807e-05, + "loss": 1.1513, + "step": 20124 + }, + { + "epoch": 0.02, + "learning_rate": 4.995053495560463e-05, + "loss": 1.0222, + "step": 20125 + }, + { + "epoch": 0.02, + "learning_rate": 4.995053001678225e-05, + "loss": 1.0026, + "step": 20126 + }, + { + "epoch": 0.02, + "learning_rate": 4.995052507771357e-05, + "loss": 1.0999, + "step": 20127 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950520138398594e-05, + "loss": 0.9272, + "step": 20128 + }, + { + "epoch": 0.02, + "learning_rate": 4.995051519883731e-05, + "loss": 0.5443, + "step": 20129 + }, + { + "epoch": 0.02, + "learning_rate": 4.995051025902974e-05, + "loss": 0.4532, + "step": 20130 + }, + { + "epoch": 0.02, + "learning_rate": 4.995050531897585e-05, + "loss": 1.1827, + "step": 20131 + }, + { + "epoch": 0.02, + "learning_rate": 4.995050037867567e-05, + "loss": 1.1481, + "step": 20132 + }, + { + "epoch": 0.02, + "learning_rate": 4.995049543812918e-05, + "loss": 0.9864, + "step": 20133 + }, + { + "epoch": 0.02, + "learning_rate": 4.99504904973364e-05, + "loss": 0.9894, + "step": 20134 + }, + { + "epoch": 0.02, + "learning_rate": 4.995048555629732e-05, + "loss": 1.2225, + "step": 20135 + }, + { + "epoch": 0.02, + "learning_rate": 4.995048061501193e-05, + "loss": 1.2814, + "step": 20136 + }, + { + "epoch": 0.02, + "learning_rate": 4.995047567348025e-05, + "loss": 1.23, + "step": 20137 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950470731702256e-05, + "loss": 1.2809, + "step": 20138 + }, + { + "epoch": 0.02, + "learning_rate": 4.995046578967797e-05, + "loss": 1.4328, + "step": 20139 + }, + { + "epoch": 0.02, + "learning_rate": 4.995046084740739e-05, + "loss": 1.3739, + "step": 20140 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950455904890494e-05, + "loss": 1.2828, + "step": 20141 + }, + { + "epoch": 0.02, + "learning_rate": 4.995045096212731e-05, + "loss": 0.9902, + "step": 20142 + }, + { + "epoch": 0.02, + "learning_rate": 4.995044601911782e-05, + "loss": 1.2222, + "step": 20143 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950441075862033e-05, + "loss": 0.5884, + "step": 20144 + }, + { + "epoch": 0.02, + "learning_rate": 4.995043613235994e-05, + "loss": 1.0651, + "step": 20145 + }, + { + "epoch": 0.02, + "learning_rate": 4.995043118861155e-05, + "loss": 1.1243, + "step": 20146 + }, + { + "epoch": 0.02, + "learning_rate": 4.995042624461687e-05, + "loss": 0.9865, + "step": 20147 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950421300375874e-05, + "loss": 1.0428, + "step": 20148 + }, + { + "epoch": 0.02, + "learning_rate": 4.995041635588858e-05, + "loss": 1.1082, + "step": 20149 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950411411154996e-05, + "loss": 1.3345, + "step": 20150 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950406466175104e-05, + "loss": 1.0685, + "step": 20151 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950401520948917e-05, + "loss": 0.8446, + "step": 20152 + }, + { + "epoch": 0.02, + "learning_rate": 4.995039657547642e-05, + "loss": 0.9485, + "step": 20153 + }, + { + "epoch": 0.02, + "learning_rate": 4.995039162975763e-05, + "loss": 1.1102, + "step": 20154 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950386683792545e-05, + "loss": 1.2435, + "step": 20155 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950381737581156e-05, + "loss": 1.1389, + "step": 20156 + }, + { + "epoch": 0.02, + "learning_rate": 4.995037679112346e-05, + "loss": 1.3285, + "step": 20157 + }, + { + "epoch": 0.02, + "learning_rate": 4.995037184441948e-05, + "loss": 0.9419, + "step": 20158 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950366897469194e-05, + "loss": 1.0364, + "step": 20159 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950361950272605e-05, + "loss": 1.1671, + "step": 20160 + }, + { + "epoch": 0.02, + "learning_rate": 4.995035700282972e-05, + "loss": 1.0167, + "step": 20161 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950352055140534e-05, + "loss": 1.1601, + "step": 20162 + }, + { + "epoch": 0.02, + "learning_rate": 4.995034710720505e-05, + "loss": 1.352, + "step": 20163 + }, + { + "epoch": 0.02, + "learning_rate": 4.995034215902326e-05, + "loss": 1.1218, + "step": 20164 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950337210595176e-05, + "loss": 1.4909, + "step": 20165 + }, + { + "epoch": 0.02, + "learning_rate": 4.99503322619208e-05, + "loss": 1.2785, + "step": 20166 + }, + { + "epoch": 0.02, + "learning_rate": 4.995032731300011e-05, + "loss": 1.038, + "step": 20167 + }, + { + "epoch": 0.02, + "learning_rate": 4.995032236383314e-05, + "loss": 0.8988, + "step": 20168 + }, + { + "epoch": 0.02, + "learning_rate": 4.995031741441985e-05, + "loss": 0.8175, + "step": 20169 + }, + { + "epoch": 0.02, + "learning_rate": 4.995031246476027e-05, + "loss": 0.919, + "step": 20170 + }, + { + "epoch": 0.02, + "learning_rate": 4.995030751485439e-05, + "loss": 0.8404, + "step": 20171 + }, + { + "epoch": 0.02, + "learning_rate": 4.995030256470221e-05, + "loss": 1.2985, + "step": 20172 + }, + { + "epoch": 0.02, + "learning_rate": 4.995029761430374e-05, + "loss": 1.2081, + "step": 20173 + }, + { + "epoch": 0.02, + "learning_rate": 4.995029266365896e-05, + "loss": 1.0379, + "step": 20174 + }, + { + "epoch": 0.02, + "learning_rate": 4.995028771276789e-05, + "loss": 0.9316, + "step": 20175 + }, + { + "epoch": 0.02, + "learning_rate": 4.995028276163052e-05, + "loss": 0.7401, + "step": 20176 + }, + { + "epoch": 0.02, + "learning_rate": 4.995027781024685e-05, + "loss": 0.8672, + "step": 20177 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950272858616873e-05, + "loss": 0.9295, + "step": 20178 + }, + { + "epoch": 0.02, + "learning_rate": 4.995026790674061e-05, + "loss": 1.2057, + "step": 20179 + }, + { + "epoch": 0.02, + "learning_rate": 4.995026295461804e-05, + "loss": 0.9457, + "step": 20180 + }, + { + "epoch": 0.02, + "learning_rate": 4.995025800224917e-05, + "loss": 1.0979, + "step": 20181 + }, + { + "epoch": 0.02, + "learning_rate": 4.995025304963401e-05, + "loss": 1.2415, + "step": 20182 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950248096772545e-05, + "loss": 1.1573, + "step": 20183 + }, + { + "epoch": 0.02, + "learning_rate": 4.995024314366479e-05, + "loss": 1.164, + "step": 20184 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950238190310724e-05, + "loss": 1.1329, + "step": 20185 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950233236710376e-05, + "loss": 0.9043, + "step": 20186 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950228282863724e-05, + "loss": 1.0337, + "step": 20187 + }, + { + "epoch": 0.02, + "learning_rate": 4.995022332877076e-05, + "loss": 1.4155, + "step": 20188 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950218374431515e-05, + "loss": 1.2085, + "step": 20189 + }, + { + "epoch": 0.02, + "learning_rate": 4.995021341984596e-05, + "loss": 1.3255, + "step": 20190 + }, + { + "epoch": 0.02, + "learning_rate": 4.995020846501411e-05, + "loss": 0.9157, + "step": 20191 + }, + { + "epoch": 0.02, + "learning_rate": 4.995020350993597e-05, + "loss": 0.9245, + "step": 20192 + }, + { + "epoch": 0.02, + "learning_rate": 4.995019855461153e-05, + "loss": 1.3599, + "step": 20193 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950193599040785e-05, + "loss": 0.6718, + "step": 20194 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950188643223744e-05, + "loss": 0.2402, + "step": 20195 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950183687160407e-05, + "loss": 0.7368, + "step": 20196 + }, + { + "epoch": 0.02, + "learning_rate": 4.995017873085077e-05, + "loss": 1.0651, + "step": 20197 + }, + { + "epoch": 0.02, + "learning_rate": 4.995017377429484e-05, + "loss": 1.0939, + "step": 20198 + }, + { + "epoch": 0.02, + "learning_rate": 4.995016881749261e-05, + "loss": 1.2186, + "step": 20199 + }, + { + "epoch": 0.02, + "learning_rate": 4.995016386044408e-05, + "loss": 1.2096, + "step": 20200 + }, + { + "epoch": 0.02, + "learning_rate": 4.995015890314926e-05, + "loss": 1.23, + "step": 20201 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950153945608134e-05, + "loss": 1.1173, + "step": 20202 + }, + { + "epoch": 0.02, + "learning_rate": 4.995014898782072e-05, + "loss": 1.1534, + "step": 20203 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950144029787e-05, + "loss": 0.979, + "step": 20204 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950139071506984e-05, + "loss": 1.1825, + "step": 20205 + }, + { + "epoch": 0.02, + "learning_rate": 4.995013411298067e-05, + "loss": 1.1874, + "step": 20206 + }, + { + "epoch": 0.02, + "learning_rate": 4.995012915420806e-05, + "loss": 1.1811, + "step": 20207 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950124195189155e-05, + "loss": 1.1252, + "step": 20208 + }, + { + "epoch": 0.02, + "learning_rate": 4.995011923592395e-05, + "loss": 1.206, + "step": 20209 + }, + { + "epoch": 0.02, + "learning_rate": 4.995011427641245e-05, + "loss": 1.2616, + "step": 20210 + }, + { + "epoch": 0.02, + "learning_rate": 4.995010931665466e-05, + "loss": 1.1561, + "step": 20211 + }, + { + "epoch": 0.02, + "learning_rate": 4.995010435665056e-05, + "loss": 1.6784, + "step": 20212 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950099396400176e-05, + "loss": 1.187, + "step": 20213 + }, + { + "epoch": 0.02, + "learning_rate": 4.995009443590348e-05, + "loss": 1.1191, + "step": 20214 + }, + { + "epoch": 0.02, + "learning_rate": 4.99500894751605e-05, + "loss": 0.9832, + "step": 20215 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950084514171216e-05, + "loss": 0.9797, + "step": 20216 + }, + { + "epoch": 0.02, + "learning_rate": 4.995007955293564e-05, + "loss": 1.0893, + "step": 20217 + }, + { + "epoch": 0.02, + "learning_rate": 4.995007459145376e-05, + "loss": 1.1748, + "step": 20218 + }, + { + "epoch": 0.02, + "learning_rate": 4.995006962972559e-05, + "loss": 1.1114, + "step": 20219 + }, + { + "epoch": 0.02, + "learning_rate": 4.995006466775112e-05, + "loss": 1.2227, + "step": 20220 + }, + { + "epoch": 0.02, + "learning_rate": 4.995005970553036e-05, + "loss": 0.9083, + "step": 20221 + }, + { + "epoch": 0.02, + "learning_rate": 4.99500547430633e-05, + "loss": 0.1676, + "step": 20222 + }, + { + "epoch": 0.02, + "learning_rate": 4.995004978034994e-05, + "loss": 0.2449, + "step": 20223 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950044817390285e-05, + "loss": 0.3735, + "step": 20224 + }, + { + "epoch": 0.02, + "learning_rate": 4.995003985418434e-05, + "loss": 1.2023, + "step": 20225 + }, + { + "epoch": 0.02, + "learning_rate": 4.995003489073209e-05, + "loss": 1.1682, + "step": 20226 + }, + { + "epoch": 0.02, + "learning_rate": 4.995002992703355e-05, + "loss": 1.2588, + "step": 20227 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950024963088706e-05, + "loss": 1.335, + "step": 20228 + }, + { + "epoch": 0.02, + "learning_rate": 4.995001999889757e-05, + "loss": 1.1518, + "step": 20229 + }, + { + "epoch": 0.02, + "learning_rate": 4.995001503446014e-05, + "loss": 1.1241, + "step": 20230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9950010069776414e-05, + "loss": 0.9709, + "step": 20231 + }, + { + "epoch": 0.02, + "learning_rate": 4.995000510484639e-05, + "loss": 1.163, + "step": 20232 + }, + { + "epoch": 0.02, + "learning_rate": 4.995000013967007e-05, + "loss": 1.0282, + "step": 20233 + }, + { + "epoch": 0.02, + "learning_rate": 4.994999517424746e-05, + "loss": 1.606, + "step": 20234 + }, + { + "epoch": 0.02, + "learning_rate": 4.994999020857854e-05, + "loss": 1.2887, + "step": 20235 + }, + { + "epoch": 0.02, + "learning_rate": 4.994998524266334e-05, + "loss": 1.0431, + "step": 20236 + }, + { + "epoch": 0.02, + "learning_rate": 4.994998027650184e-05, + "loss": 1.0777, + "step": 20237 + }, + { + "epoch": 0.02, + "learning_rate": 4.994997531009404e-05, + "loss": 0.995, + "step": 20238 + }, + { + "epoch": 0.02, + "learning_rate": 4.994997034343994e-05, + "loss": 0.9552, + "step": 20239 + }, + { + "epoch": 0.02, + "learning_rate": 4.994996537653955e-05, + "loss": 1.3656, + "step": 20240 + }, + { + "epoch": 0.02, + "learning_rate": 4.994996040939287e-05, + "loss": 1.3398, + "step": 20241 + }, + { + "epoch": 0.02, + "learning_rate": 4.994995544199989e-05, + "loss": 1.2442, + "step": 20242 + }, + { + "epoch": 0.02, + "learning_rate": 4.994995047436062e-05, + "loss": 0.702, + "step": 20243 + }, + { + "epoch": 0.02, + "learning_rate": 4.994994550647504e-05, + "loss": 1.2549, + "step": 20244 + }, + { + "epoch": 0.02, + "learning_rate": 4.994994053834318e-05, + "loss": 1.011, + "step": 20245 + }, + { + "epoch": 0.02, + "learning_rate": 4.994993556996501e-05, + "loss": 1.2791, + "step": 20246 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949930601340554e-05, + "loss": 1.1118, + "step": 20247 + }, + { + "epoch": 0.02, + "learning_rate": 4.994992563246981e-05, + "loss": 1.1697, + "step": 20248 + }, + { + "epoch": 0.02, + "learning_rate": 4.994992066335276e-05, + "loss": 1.3886, + "step": 20249 + }, + { + "epoch": 0.02, + "learning_rate": 4.994991569398941e-05, + "loss": 1.0775, + "step": 20250 + }, + { + "epoch": 0.02, + "learning_rate": 4.994991072437978e-05, + "loss": 1.2219, + "step": 20251 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949905754523845e-05, + "loss": 1.4358, + "step": 20252 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949900784421614e-05, + "loss": 0.98, + "step": 20253 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949895814073093e-05, + "loss": 0.8539, + "step": 20254 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949890843478275e-05, + "loss": 0.9379, + "step": 20255 + }, + { + "epoch": 0.02, + "learning_rate": 4.994988587263717e-05, + "loss": 0.5159, + "step": 20256 + }, + { + "epoch": 0.02, + "learning_rate": 4.994988090154976e-05, + "loss": 0.8544, + "step": 20257 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949875930216054e-05, + "loss": 0.6313, + "step": 20258 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949870958636056e-05, + "loss": 0.9562, + "step": 20259 + }, + { + "epoch": 0.02, + "learning_rate": 4.994986598680977e-05, + "loss": 0.8234, + "step": 20260 + }, + { + "epoch": 0.02, + "learning_rate": 4.994986101473719e-05, + "loss": 1.097, + "step": 20261 + }, + { + "epoch": 0.02, + "learning_rate": 4.99498560424183e-05, + "loss": 1.0396, + "step": 20262 + }, + { + "epoch": 0.02, + "learning_rate": 4.994985106985313e-05, + "loss": 1.1175, + "step": 20263 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949846097041664e-05, + "loss": 0.9861, + "step": 20264 + }, + { + "epoch": 0.02, + "learning_rate": 4.99498411239839e-05, + "loss": 1.259, + "step": 20265 + }, + { + "epoch": 0.02, + "learning_rate": 4.994983615067984e-05, + "loss": 1.0801, + "step": 20266 + }, + { + "epoch": 0.02, + "learning_rate": 4.994983117712949e-05, + "loss": 1.0298, + "step": 20267 + }, + { + "epoch": 0.02, + "learning_rate": 4.994982620333284e-05, + "loss": 1.2786, + "step": 20268 + }, + { + "epoch": 0.02, + "learning_rate": 4.99498212292899e-05, + "loss": 1.1202, + "step": 20269 + }, + { + "epoch": 0.02, + "learning_rate": 4.994981625500067e-05, + "loss": 1.2205, + "step": 20270 + }, + { + "epoch": 0.02, + "learning_rate": 4.994981128046514e-05, + "loss": 0.9688, + "step": 20271 + }, + { + "epoch": 0.02, + "learning_rate": 4.994980630568332e-05, + "loss": 0.9288, + "step": 20272 + }, + { + "epoch": 0.02, + "learning_rate": 4.99498013306552e-05, + "loss": 1.2016, + "step": 20273 + }, + { + "epoch": 0.02, + "learning_rate": 4.994979635538079e-05, + "loss": 1.3092, + "step": 20274 + }, + { + "epoch": 0.02, + "learning_rate": 4.994979137986009e-05, + "loss": 1.02, + "step": 20275 + }, + { + "epoch": 0.02, + "learning_rate": 4.994978640409308e-05, + "loss": 0.9183, + "step": 20276 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949781428079795e-05, + "loss": 1.1259, + "step": 20277 + }, + { + "epoch": 0.02, + "learning_rate": 4.994977645182021e-05, + "loss": 1.1506, + "step": 20278 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949771475314335e-05, + "loss": 0.9443, + "step": 20279 + }, + { + "epoch": 0.02, + "learning_rate": 4.994976649856216e-05, + "loss": 1.1103, + "step": 20280 + }, + { + "epoch": 0.02, + "learning_rate": 4.994976152156369e-05, + "loss": 1.1721, + "step": 20281 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949756544318924e-05, + "loss": 1.0795, + "step": 20282 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949751566827875e-05, + "loss": 1.1299, + "step": 20283 + }, + { + "epoch": 0.02, + "learning_rate": 4.994974658909053e-05, + "loss": 0.9313, + "step": 20284 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949741611106895e-05, + "loss": 1.1373, + "step": 20285 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949736632876956e-05, + "loss": 1.0785, + "step": 20286 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949731654400727e-05, + "loss": 0.9043, + "step": 20287 + }, + { + "epoch": 0.02, + "learning_rate": 4.994972667567821e-05, + "loss": 1.1225, + "step": 20288 + }, + { + "epoch": 0.02, + "learning_rate": 4.99497216967094e-05, + "loss": 1.2617, + "step": 20289 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949716717494285e-05, + "loss": 0.9974, + "step": 20290 + }, + { + "epoch": 0.02, + "learning_rate": 4.994971173803289e-05, + "loss": 0.5454, + "step": 20291 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949706758325196e-05, + "loss": 0.9964, + "step": 20292 + }, + { + "epoch": 0.02, + "learning_rate": 4.994970177837121e-05, + "loss": 0.7908, + "step": 20293 + }, + { + "epoch": 0.02, + "learning_rate": 4.994969679817093e-05, + "loss": 1.1258, + "step": 20294 + }, + { + "epoch": 0.02, + "learning_rate": 4.994969181772436e-05, + "loss": 1.3919, + "step": 20295 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949686837031496e-05, + "loss": 1.4259, + "step": 20296 + }, + { + "epoch": 0.02, + "learning_rate": 4.994968185609234e-05, + "loss": 1.1144, + "step": 20297 + }, + { + "epoch": 0.02, + "learning_rate": 4.994967687490689e-05, + "loss": 0.9763, + "step": 20298 + }, + { + "epoch": 0.02, + "learning_rate": 4.994967189347515e-05, + "loss": 0.743, + "step": 20299 + }, + { + "epoch": 0.02, + "learning_rate": 4.994966691179711e-05, + "loss": 1.0759, + "step": 20300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949661929872784e-05, + "loss": 1.1583, + "step": 20301 + }, + { + "epoch": 0.02, + "learning_rate": 4.994965694770216e-05, + "loss": 1.1869, + "step": 20302 + }, + { + "epoch": 0.02, + "learning_rate": 4.994965196528525e-05, + "loss": 1.0493, + "step": 20303 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949646982622044e-05, + "loss": 1.0201, + "step": 20304 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949641999712546e-05, + "loss": 0.8446, + "step": 20305 + }, + { + "epoch": 0.02, + "learning_rate": 4.994963701655676e-05, + "loss": 1.0621, + "step": 20306 + }, + { + "epoch": 0.02, + "learning_rate": 4.994963203315467e-05, + "loss": 1.0534, + "step": 20307 + }, + { + "epoch": 0.02, + "learning_rate": 4.99496270495063e-05, + "loss": 1.2442, + "step": 20308 + }, + { + "epoch": 0.02, + "learning_rate": 4.994962206561163e-05, + "loss": 1.3193, + "step": 20309 + }, + { + "epoch": 0.02, + "learning_rate": 4.994961708147068e-05, + "loss": 1.0634, + "step": 20310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949612097083426e-05, + "loss": 1.3033, + "step": 20311 + }, + { + "epoch": 0.02, + "learning_rate": 4.994960711244988e-05, + "loss": 0.7795, + "step": 20312 + }, + { + "epoch": 0.02, + "learning_rate": 4.994960212757005e-05, + "loss": 1.0243, + "step": 20313 + }, + { + "epoch": 0.02, + "learning_rate": 4.994959714244392e-05, + "loss": 1.2474, + "step": 20314 + }, + { + "epoch": 0.02, + "learning_rate": 4.99495921570715e-05, + "loss": 1.7791, + "step": 20315 + }, + { + "epoch": 0.02, + "learning_rate": 4.994958717145279e-05, + "loss": 1.1594, + "step": 20316 + }, + { + "epoch": 0.02, + "learning_rate": 4.994958218558778e-05, + "loss": 1.1319, + "step": 20317 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949577199476494e-05, + "loss": 1.2082, + "step": 20318 + }, + { + "epoch": 0.02, + "learning_rate": 4.994957221311891e-05, + "loss": 1.3547, + "step": 20319 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949567226515025e-05, + "loss": 1.0767, + "step": 20320 + }, + { + "epoch": 0.02, + "learning_rate": 4.994956223966486e-05, + "loss": 0.8488, + "step": 20321 + }, + { + "epoch": 0.02, + "learning_rate": 4.99495572525684e-05, + "loss": 0.9438, + "step": 20322 + }, + { + "epoch": 0.02, + "learning_rate": 4.994955226522565e-05, + "loss": 1.1886, + "step": 20323 + }, + { + "epoch": 0.02, + "learning_rate": 4.99495472776366e-05, + "loss": 1.1505, + "step": 20324 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949542289801266e-05, + "loss": 1.1204, + "step": 20325 + }, + { + "epoch": 0.02, + "learning_rate": 4.994953730171964e-05, + "loss": 1.0408, + "step": 20326 + }, + { + "epoch": 0.02, + "learning_rate": 4.994953231339172e-05, + "loss": 0.9235, + "step": 20327 + }, + { + "epoch": 0.02, + "learning_rate": 4.994952732481751e-05, + "loss": 1.131, + "step": 20328 + }, + { + "epoch": 0.02, + "learning_rate": 4.994952233599701e-05, + "loss": 0.9634, + "step": 20329 + }, + { + "epoch": 0.02, + "learning_rate": 4.994951734693022e-05, + "loss": 0.9429, + "step": 20330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949512357617134e-05, + "loss": 1.068, + "step": 20331 + }, + { + "epoch": 0.02, + "learning_rate": 4.994950736805776e-05, + "loss": 1.0612, + "step": 20332 + }, + { + "epoch": 0.02, + "learning_rate": 4.99495023782521e-05, + "loss": 0.7488, + "step": 20333 + }, + { + "epoch": 0.02, + "learning_rate": 4.994949738820014e-05, + "loss": 1.2353, + "step": 20334 + }, + { + "epoch": 0.02, + "learning_rate": 4.99494923979019e-05, + "loss": 1.0548, + "step": 20335 + }, + { + "epoch": 0.02, + "learning_rate": 4.994948740735735e-05, + "loss": 1.2048, + "step": 20336 + }, + { + "epoch": 0.02, + "learning_rate": 4.994948241656653e-05, + "loss": 1.2339, + "step": 20337 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949477425529414e-05, + "loss": 0.9382, + "step": 20338 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949472434246006e-05, + "loss": 1.261, + "step": 20339 + }, + { + "epoch": 0.02, + "learning_rate": 4.99494674427163e-05, + "loss": 1.3775, + "step": 20340 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949462450940307e-05, + "loss": 1.1623, + "step": 20341 + }, + { + "epoch": 0.02, + "learning_rate": 4.994945745891803e-05, + "loss": 1.1393, + "step": 20342 + }, + { + "epoch": 0.02, + "learning_rate": 4.994945246664945e-05, + "loss": 1.0106, + "step": 20343 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949447474134595e-05, + "loss": 1.083, + "step": 20344 + }, + { + "epoch": 0.02, + "learning_rate": 4.994944248137344e-05, + "loss": 0.4456, + "step": 20345 + }, + { + "epoch": 0.02, + "learning_rate": 4.994943748836599e-05, + "loss": 0.5227, + "step": 20346 + }, + { + "epoch": 0.02, + "learning_rate": 4.994943249511226e-05, + "loss": 1.5341, + "step": 20347 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949427501612234e-05, + "loss": 1.0367, + "step": 20348 + }, + { + "epoch": 0.02, + "learning_rate": 4.994942250786592e-05, + "loss": 1.4389, + "step": 20349 + }, + { + "epoch": 0.02, + "learning_rate": 4.994941751387331e-05, + "loss": 1.0551, + "step": 20350 + }, + { + "epoch": 0.02, + "learning_rate": 4.994941251963442e-05, + "loss": 0.9989, + "step": 20351 + }, + { + "epoch": 0.02, + "learning_rate": 4.994940752514924e-05, + "loss": 1.0123, + "step": 20352 + }, + { + "epoch": 0.02, + "learning_rate": 4.994940253041776e-05, + "loss": 0.7292, + "step": 20353 + }, + { + "epoch": 0.02, + "learning_rate": 4.994939753544e-05, + "loss": 0.2389, + "step": 20354 + }, + { + "epoch": 0.02, + "learning_rate": 4.994939254021594e-05, + "loss": 1.0091, + "step": 20355 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949387544745595e-05, + "loss": 1.1038, + "step": 20356 + }, + { + "epoch": 0.02, + "learning_rate": 4.994938254902896e-05, + "loss": 1.1386, + "step": 20357 + }, + { + "epoch": 0.02, + "learning_rate": 4.994937755306603e-05, + "loss": 0.7092, + "step": 20358 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949372556856824e-05, + "loss": 1.0237, + "step": 20359 + }, + { + "epoch": 0.02, + "learning_rate": 4.994936756040132e-05, + "loss": 0.908, + "step": 20360 + }, + { + "epoch": 0.02, + "learning_rate": 4.994936256369952e-05, + "loss": 1.0243, + "step": 20361 + }, + { + "epoch": 0.02, + "learning_rate": 4.994935756675144e-05, + "loss": 1.0143, + "step": 20362 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949352569557065e-05, + "loss": 0.9909, + "step": 20363 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949347572116405e-05, + "loss": 1.0071, + "step": 20364 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949342574429455e-05, + "loss": 1.1139, + "step": 20365 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949337576496215e-05, + "loss": 0.8215, + "step": 20366 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949332578316685e-05, + "loss": 0.9461, + "step": 20367 + }, + { + "epoch": 0.02, + "learning_rate": 4.994932757989086e-05, + "loss": 1.1085, + "step": 20368 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949322581218755e-05, + "loss": 0.9859, + "step": 20369 + }, + { + "epoch": 0.02, + "learning_rate": 4.994931758230036e-05, + "loss": 1.2495, + "step": 20370 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949312583135664e-05, + "loss": 1.1785, + "step": 20371 + }, + { + "epoch": 0.02, + "learning_rate": 4.994930758372469e-05, + "loss": 1.0522, + "step": 20372 + }, + { + "epoch": 0.02, + "learning_rate": 4.994930258406743e-05, + "loss": 1.1065, + "step": 20373 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949297584163866e-05, + "loss": 0.8667, + "step": 20374 + }, + { + "epoch": 0.02, + "learning_rate": 4.994929258401403e-05, + "loss": 0.9229, + "step": 20375 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949287583617895e-05, + "loss": 1.069, + "step": 20376 + }, + { + "epoch": 0.02, + "learning_rate": 4.994928258297548e-05, + "loss": 1.2157, + "step": 20377 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949277582086764e-05, + "loss": 1.5214, + "step": 20378 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949272580951767e-05, + "loss": 1.3871, + "step": 20379 + }, + { + "epoch": 0.02, + "learning_rate": 4.994926757957048e-05, + "loss": 1.3439, + "step": 20380 + }, + { + "epoch": 0.02, + "learning_rate": 4.99492625779429e-05, + "loss": 1.1913, + "step": 20381 + }, + { + "epoch": 0.02, + "learning_rate": 4.994925757606904e-05, + "loss": 1.139, + "step": 20382 + }, + { + "epoch": 0.02, + "learning_rate": 4.994925257394888e-05, + "loss": 1.1586, + "step": 20383 + }, + { + "epoch": 0.02, + "learning_rate": 4.994924757158244e-05, + "loss": 1.0063, + "step": 20384 + }, + { + "epoch": 0.02, + "learning_rate": 4.994924256896971e-05, + "loss": 1.0054, + "step": 20385 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949237566110697e-05, + "loss": 0.9568, + "step": 20386 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949232563005385e-05, + "loss": 0.7738, + "step": 20387 + }, + { + "epoch": 0.02, + "learning_rate": 4.994922755965379e-05, + "loss": 0.801, + "step": 20388 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949222556055906e-05, + "loss": 0.9423, + "step": 20389 + }, + { + "epoch": 0.02, + "learning_rate": 4.994921755221174e-05, + "loss": 1.141, + "step": 20390 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949212548121274e-05, + "loss": 0.4064, + "step": 20391 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949207543784526e-05, + "loss": 0.2079, + "step": 20392 + }, + { + "epoch": 0.02, + "learning_rate": 4.994920253920149e-05, + "loss": 1.0039, + "step": 20393 + }, + { + "epoch": 0.02, + "learning_rate": 4.994919753437217e-05, + "loss": 0.8507, + "step": 20394 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949192529296554e-05, + "loss": 1.1805, + "step": 20395 + }, + { + "epoch": 0.02, + "learning_rate": 4.994918752397465e-05, + "loss": 1.0034, + "step": 20396 + }, + { + "epoch": 0.02, + "learning_rate": 4.994918251840647e-05, + "loss": 1.0121, + "step": 20397 + }, + { + "epoch": 0.02, + "learning_rate": 4.994917751259199e-05, + "loss": 1.1833, + "step": 20398 + }, + { + "epoch": 0.02, + "learning_rate": 4.994917250653123e-05, + "loss": 1.2649, + "step": 20399 + }, + { + "epoch": 0.02, + "learning_rate": 4.994916750022418e-05, + "loss": 1.0091, + "step": 20400 + }, + { + "epoch": 0.02, + "learning_rate": 4.994916249367084e-05, + "loss": 0.8859, + "step": 20401 + }, + { + "epoch": 0.02, + "learning_rate": 4.994915748687121e-05, + "loss": 1.243, + "step": 20402 + }, + { + "epoch": 0.02, + "learning_rate": 4.99491524798253e-05, + "loss": 1.1594, + "step": 20403 + }, + { + "epoch": 0.02, + "learning_rate": 4.99491474725331e-05, + "loss": 1.0349, + "step": 20404 + }, + { + "epoch": 0.02, + "learning_rate": 4.994914246499461e-05, + "loss": 1.0105, + "step": 20405 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949137457209825e-05, + "loss": 1.2083, + "step": 20406 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949132449178773e-05, + "loss": 1.1124, + "step": 20407 + }, + { + "epoch": 0.02, + "learning_rate": 4.994912744090142e-05, + "loss": 1.094, + "step": 20408 + }, + { + "epoch": 0.02, + "learning_rate": 4.994912243237778e-05, + "loss": 1.0577, + "step": 20409 + }, + { + "epoch": 0.02, + "learning_rate": 4.994911742360785e-05, + "loss": 1.2048, + "step": 20410 + }, + { + "epoch": 0.02, + "learning_rate": 4.994911241459164e-05, + "loss": 1.0037, + "step": 20411 + }, + { + "epoch": 0.02, + "learning_rate": 4.994910740532914e-05, + "loss": 1.5609, + "step": 20412 + }, + { + "epoch": 0.02, + "learning_rate": 4.994910239582035e-05, + "loss": 1.313, + "step": 20413 + }, + { + "epoch": 0.02, + "learning_rate": 4.994909738606528e-05, + "loss": 1.2168, + "step": 20414 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949092376063917e-05, + "loss": 1.0983, + "step": 20415 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949087365816274e-05, + "loss": 1.0738, + "step": 20416 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949082355322336e-05, + "loss": 1.2525, + "step": 20417 + }, + { + "epoch": 0.02, + "learning_rate": 4.994907734458211e-05, + "loss": 1.1444, + "step": 20418 + }, + { + "epoch": 0.02, + "learning_rate": 4.99490723335956e-05, + "loss": 0.984, + "step": 20419 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949067322362805e-05, + "loss": 0.7995, + "step": 20420 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949062310883726e-05, + "loss": 0.8166, + "step": 20421 + }, + { + "epoch": 0.02, + "learning_rate": 4.994905729915836e-05, + "loss": 1.0289, + "step": 20422 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949052287186705e-05, + "loss": 0.7589, + "step": 20423 + }, + { + "epoch": 0.02, + "learning_rate": 4.994904727496876e-05, + "loss": 1.2308, + "step": 20424 + }, + { + "epoch": 0.02, + "learning_rate": 4.994904226250453e-05, + "loss": 1.1788, + "step": 20425 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949037249794014e-05, + "loss": 1.2371, + "step": 20426 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949032236837215e-05, + "loss": 0.9588, + "step": 20427 + }, + { + "epoch": 0.02, + "learning_rate": 4.994902722363413e-05, + "loss": 1.2215, + "step": 20428 + }, + { + "epoch": 0.02, + "learning_rate": 4.994902221018476e-05, + "loss": 0.9776, + "step": 20429 + }, + { + "epoch": 0.02, + "learning_rate": 4.99490171964891e-05, + "loss": 1.0478, + "step": 20430 + }, + { + "epoch": 0.02, + "learning_rate": 4.994901218254715e-05, + "loss": 0.6612, + "step": 20431 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949007168358916e-05, + "loss": 1.0912, + "step": 20432 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949002153924404e-05, + "loss": 1.0184, + "step": 20433 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948997139243595e-05, + "loss": 0.7621, + "step": 20434 + }, + { + "epoch": 0.02, + "learning_rate": 4.994899212431651e-05, + "loss": 0.9941, + "step": 20435 + }, + { + "epoch": 0.02, + "learning_rate": 4.994898710914313e-05, + "loss": 1.4804, + "step": 20436 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948982093723474e-05, + "loss": 1.0024, + "step": 20437 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948977078057524e-05, + "loss": 1.078, + "step": 20438 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948972062145285e-05, + "loss": 1.07, + "step": 20439 + }, + { + "epoch": 0.02, + "learning_rate": 4.994896704598677e-05, + "loss": 1.1107, + "step": 20440 + }, + { + "epoch": 0.02, + "learning_rate": 4.994896202958196e-05, + "loss": 1.5577, + "step": 20441 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948957012930874e-05, + "loss": 1.255, + "step": 20442 + }, + { + "epoch": 0.02, + "learning_rate": 4.99489519960335e-05, + "loss": 0.9073, + "step": 20443 + }, + { + "epoch": 0.02, + "learning_rate": 4.994894697888983e-05, + "loss": 1.0193, + "step": 20444 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948941961499886e-05, + "loss": 1.0198, + "step": 20445 + }, + { + "epoch": 0.02, + "learning_rate": 4.994893694386366e-05, + "loss": 1.1247, + "step": 20446 + }, + { + "epoch": 0.02, + "learning_rate": 4.994893192598114e-05, + "loss": 0.8786, + "step": 20447 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948926907852335e-05, + "loss": 0.8272, + "step": 20448 + }, + { + "epoch": 0.02, + "learning_rate": 4.994892188947724e-05, + "loss": 0.8434, + "step": 20449 + }, + { + "epoch": 0.02, + "learning_rate": 4.994891687085587e-05, + "loss": 0.4277, + "step": 20450 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948911851988214e-05, + "loss": 0.3038, + "step": 20451 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948906832874265e-05, + "loss": 0.7202, + "step": 20452 + }, + { + "epoch": 0.02, + "learning_rate": 4.994890181351404e-05, + "loss": 1.1243, + "step": 20453 + }, + { + "epoch": 0.02, + "learning_rate": 4.994889679390752e-05, + "loss": 1.0209, + "step": 20454 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948891774054724e-05, + "loss": 1.2765, + "step": 20455 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948886753955635e-05, + "loss": 1.1735, + "step": 20456 + }, + { + "epoch": 0.02, + "learning_rate": 4.994888173361027e-05, + "loss": 1.4854, + "step": 20457 + }, + { + "epoch": 0.02, + "learning_rate": 4.994887671301862e-05, + "loss": 0.9207, + "step": 20458 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948871692180674e-05, + "loss": 0.8616, + "step": 20459 + }, + { + "epoch": 0.02, + "learning_rate": 4.994886667109645e-05, + "loss": 1.0523, + "step": 20460 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948861649765945e-05, + "loss": 1.0666, + "step": 20461 + }, + { + "epoch": 0.02, + "learning_rate": 4.994885662818915e-05, + "loss": 1.4422, + "step": 20462 + }, + { + "epoch": 0.02, + "learning_rate": 4.994885160636608e-05, + "loss": 4.3306, + "step": 20463 + }, + { + "epoch": 0.02, + "learning_rate": 4.994884658429671e-05, + "loss": 1.1483, + "step": 20464 + }, + { + "epoch": 0.02, + "learning_rate": 4.994884156198106e-05, + "loss": 1.2823, + "step": 20465 + }, + { + "epoch": 0.02, + "learning_rate": 4.994883653941913e-05, + "loss": 1.417, + "step": 20466 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948831516610914e-05, + "loss": 1.1768, + "step": 20467 + }, + { + "epoch": 0.02, + "learning_rate": 4.994882649355642e-05, + "loss": 1.0893, + "step": 20468 + }, + { + "epoch": 0.02, + "learning_rate": 4.994882147025563e-05, + "loss": 1.1775, + "step": 20469 + }, + { + "epoch": 0.02, + "learning_rate": 4.994881644670857e-05, + "loss": 1.2037, + "step": 20470 + }, + { + "epoch": 0.02, + "learning_rate": 4.994881142291521e-05, + "loss": 1.2072, + "step": 20471 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948806398875575e-05, + "loss": 1.1105, + "step": 20472 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948801374589656e-05, + "loss": 1.5748, + "step": 20473 + }, + { + "epoch": 0.02, + "learning_rate": 4.994879635005745e-05, + "loss": 1.3014, + "step": 20474 + }, + { + "epoch": 0.02, + "learning_rate": 4.994879132527896e-05, + "loss": 1.1553, + "step": 20475 + }, + { + "epoch": 0.02, + "learning_rate": 4.994878630025419e-05, + "loss": 1.1796, + "step": 20476 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948781274983134e-05, + "loss": 1.0419, + "step": 20477 + }, + { + "epoch": 0.02, + "learning_rate": 4.994877624946579e-05, + "loss": 1.2093, + "step": 20478 + }, + { + "epoch": 0.02, + "learning_rate": 4.994877122370216e-05, + "loss": 1.057, + "step": 20479 + }, + { + "epoch": 0.02, + "learning_rate": 4.994876619769225e-05, + "loss": 1.323, + "step": 20480 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948761171436065e-05, + "loss": 1.0633, + "step": 20481 + }, + { + "epoch": 0.02, + "learning_rate": 4.994875614493359e-05, + "loss": 1.1679, + "step": 20482 + }, + { + "epoch": 0.02, + "learning_rate": 4.994875111818483e-05, + "loss": 0.966, + "step": 20483 + }, + { + "epoch": 0.02, + "learning_rate": 4.994874609118979e-05, + "loss": 1.1301, + "step": 20484 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948741063948464e-05, + "loss": 0.9325, + "step": 20485 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948736036460855e-05, + "loss": 1.4471, + "step": 20486 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948731008726957e-05, + "loss": 1.1489, + "step": 20487 + }, + { + "epoch": 0.02, + "learning_rate": 4.994872598074678e-05, + "loss": 1.1858, + "step": 20488 + }, + { + "epoch": 0.02, + "learning_rate": 4.994872095252032e-05, + "loss": 1.2127, + "step": 20489 + }, + { + "epoch": 0.02, + "learning_rate": 4.994871592404758e-05, + "loss": 1.0272, + "step": 20490 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948710895328556e-05, + "loss": 1.9511, + "step": 20491 + }, + { + "epoch": 0.02, + "learning_rate": 4.994870586636324e-05, + "loss": 1.0792, + "step": 20492 + }, + { + "epoch": 0.02, + "learning_rate": 4.994870083715165e-05, + "loss": 1.1621, + "step": 20493 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948695807693774e-05, + "loss": 1.2359, + "step": 20494 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948690777989616e-05, + "loss": 0.9695, + "step": 20495 + }, + { + "epoch": 0.02, + "learning_rate": 4.994868574803917e-05, + "loss": 1.0185, + "step": 20496 + }, + { + "epoch": 0.02, + "learning_rate": 4.994868071784245e-05, + "loss": 1.0531, + "step": 20497 + }, + { + "epoch": 0.02, + "learning_rate": 4.994867568739944e-05, + "loss": 1.0265, + "step": 20498 + }, + { + "epoch": 0.02, + "learning_rate": 4.994867065671015e-05, + "loss": 1.1798, + "step": 20499 + }, + { + "epoch": 0.02, + "learning_rate": 4.994866562577458e-05, + "loss": 1.0785, + "step": 20500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0215940475463867, + "eval_runtime": 83.917, + "eval_samples_per_second": 16.504, + "eval_steps_per_second": 4.135, + "step": 20500 + }, + { + "epoch": 0.02, + "learning_rate": 4.994866059459272e-05, + "loss": 1.1798, + "step": 20501 + }, + { + "epoch": 0.02, + "learning_rate": 4.994865556316458e-05, + "loss": 1.4237, + "step": 20502 + }, + { + "epoch": 0.02, + "learning_rate": 4.994865053149016e-05, + "loss": 1.1493, + "step": 20503 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948645499569456e-05, + "loss": 1.0907, + "step": 20504 + }, + { + "epoch": 0.02, + "learning_rate": 4.994864046740247e-05, + "loss": 0.9533, + "step": 20505 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948635434989203e-05, + "loss": 1.5278, + "step": 20506 + }, + { + "epoch": 0.02, + "learning_rate": 4.994863040232965e-05, + "loss": 1.2102, + "step": 20507 + }, + { + "epoch": 0.02, + "learning_rate": 4.994862536942382e-05, + "loss": 1.0935, + "step": 20508 + }, + { + "epoch": 0.02, + "learning_rate": 4.994862033627171e-05, + "loss": 0.9425, + "step": 20509 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948615302873306e-05, + "loss": 1.1779, + "step": 20510 + }, + { + "epoch": 0.02, + "learning_rate": 4.994861026922862e-05, + "loss": 1.0389, + "step": 20511 + }, + { + "epoch": 0.02, + "learning_rate": 4.994860523533767e-05, + "loss": 1.2344, + "step": 20512 + }, + { + "epoch": 0.02, + "learning_rate": 4.994860020120042e-05, + "loss": 1.1296, + "step": 20513 + }, + { + "epoch": 0.02, + "learning_rate": 4.994859516681689e-05, + "loss": 1.0571, + "step": 20514 + }, + { + "epoch": 0.02, + "learning_rate": 4.994859013218709e-05, + "loss": 0.9802, + "step": 20515 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948585097311e-05, + "loss": 0.9266, + "step": 20516 + }, + { + "epoch": 0.02, + "learning_rate": 4.994858006218862e-05, + "loss": 0.9232, + "step": 20517 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948575026819966e-05, + "loss": 0.6939, + "step": 20518 + }, + { + "epoch": 0.02, + "learning_rate": 4.994856999120503e-05, + "loss": 1.3959, + "step": 20519 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948564955343816e-05, + "loss": 1.2354, + "step": 20520 + }, + { + "epoch": 0.02, + "learning_rate": 4.994855991923631e-05, + "loss": 1.1157, + "step": 20521 + }, + { + "epoch": 0.02, + "learning_rate": 4.994855488288254e-05, + "loss": 1.1328, + "step": 20522 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948549846282475e-05, + "loss": 1.8559, + "step": 20523 + }, + { + "epoch": 0.02, + "learning_rate": 4.994854480943613e-05, + "loss": 0.9372, + "step": 20524 + }, + { + "epoch": 0.02, + "learning_rate": 4.99485397723435e-05, + "loss": 1.2147, + "step": 20525 + }, + { + "epoch": 0.02, + "learning_rate": 4.994853473500459e-05, + "loss": 1.1951, + "step": 20526 + }, + { + "epoch": 0.02, + "learning_rate": 4.994852969741941e-05, + "loss": 1.1203, + "step": 20527 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948524659587934e-05, + "loss": 1.246, + "step": 20528 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948519621510184e-05, + "loss": 1.1379, + "step": 20529 + }, + { + "epoch": 0.02, + "learning_rate": 4.994851458318615e-05, + "loss": 0.9786, + "step": 20530 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948509544615835e-05, + "loss": 1.0192, + "step": 20531 + }, + { + "epoch": 0.02, + "learning_rate": 4.994850450579924e-05, + "loss": 0.9764, + "step": 20532 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948499466736366e-05, + "loss": 1.2278, + "step": 20533 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948494427427206e-05, + "loss": 1.1132, + "step": 20534 + }, + { + "epoch": 0.02, + "learning_rate": 4.994848938787176e-05, + "loss": 1.045, + "step": 20535 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948484348070044e-05, + "loss": 1.0171, + "step": 20536 + }, + { + "epoch": 0.02, + "learning_rate": 4.994847930802205e-05, + "loss": 1.2686, + "step": 20537 + }, + { + "epoch": 0.02, + "learning_rate": 4.994847426772777e-05, + "loss": 0.9343, + "step": 20538 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948469227187206e-05, + "loss": 1.1846, + "step": 20539 + }, + { + "epoch": 0.02, + "learning_rate": 4.994846418640036e-05, + "loss": 0.8699, + "step": 20540 + }, + { + "epoch": 0.02, + "learning_rate": 4.994845914536724e-05, + "loss": 1.2019, + "step": 20541 + }, + { + "epoch": 0.02, + "learning_rate": 4.994845410408784e-05, + "loss": 1.214, + "step": 20542 + }, + { + "epoch": 0.02, + "learning_rate": 4.994844906256215e-05, + "loss": 1.165, + "step": 20543 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948444020790185e-05, + "loss": 0.9873, + "step": 20544 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948438978771936e-05, + "loss": 1.2067, + "step": 20545 + }, + { + "epoch": 0.02, + "learning_rate": 4.994843393650741e-05, + "loss": 1.1716, + "step": 20546 + }, + { + "epoch": 0.02, + "learning_rate": 4.99484288939966e-05, + "loss": 1.0789, + "step": 20547 + }, + { + "epoch": 0.02, + "learning_rate": 4.994842385123952e-05, + "loss": 1.0884, + "step": 20548 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948418808236155e-05, + "loss": 0.8562, + "step": 20549 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948413764986503e-05, + "loss": 1.1415, + "step": 20550 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948408721490575e-05, + "loss": 1.2349, + "step": 20551 + }, + { + "epoch": 0.02, + "learning_rate": 4.994840367774837e-05, + "loss": 1.1789, + "step": 20552 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948398633759876e-05, + "loss": 0.9627, + "step": 20553 + }, + { + "epoch": 0.02, + "learning_rate": 4.994839358952511e-05, + "loss": 0.2376, + "step": 20554 + }, + { + "epoch": 0.02, + "learning_rate": 4.994838854504406e-05, + "loss": 0.0839, + "step": 20555 + }, + { + "epoch": 0.02, + "learning_rate": 4.994838350031673e-05, + "loss": 0.0495, + "step": 20556 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948378455343124e-05, + "loss": 0.0677, + "step": 20557 + }, + { + "epoch": 0.02, + "learning_rate": 4.994837341012324e-05, + "loss": 0.075, + "step": 20558 + }, + { + "epoch": 0.02, + "learning_rate": 4.994836836465707e-05, + "loss": 0.7124, + "step": 20559 + }, + { + "epoch": 0.02, + "learning_rate": 4.994836331894462e-05, + "loss": 0.945, + "step": 20560 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948358272985894e-05, + "loss": 0.8929, + "step": 20561 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948353226780884e-05, + "loss": 0.94, + "step": 20562 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948348180329596e-05, + "loss": 1.283, + "step": 20563 + }, + { + "epoch": 0.02, + "learning_rate": 4.994834313363204e-05, + "loss": 0.8967, + "step": 20564 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948338086688186e-05, + "loss": 0.839, + "step": 20565 + }, + { + "epoch": 0.02, + "learning_rate": 4.994833303949806e-05, + "loss": 0.9629, + "step": 20566 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948327992061655e-05, + "loss": 0.7622, + "step": 20567 + }, + { + "epoch": 0.02, + "learning_rate": 4.994832294437897e-05, + "loss": 0.5821, + "step": 20568 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948317896450005e-05, + "loss": 1.0234, + "step": 20569 + }, + { + "epoch": 0.02, + "learning_rate": 4.994831284827476e-05, + "loss": 1.3592, + "step": 20570 + }, + { + "epoch": 0.02, + "learning_rate": 4.994830779985324e-05, + "loss": 1.2687, + "step": 20571 + }, + { + "epoch": 0.02, + "learning_rate": 4.994830275118544e-05, + "loss": 1.1834, + "step": 20572 + }, + { + "epoch": 0.02, + "learning_rate": 4.994829770227136e-05, + "loss": 0.8404, + "step": 20573 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948292653111004e-05, + "loss": 0.9492, + "step": 20574 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948287603704365e-05, + "loss": 0.9871, + "step": 20575 + }, + { + "epoch": 0.02, + "learning_rate": 4.994828255405144e-05, + "loss": 0.8609, + "step": 20576 + }, + { + "epoch": 0.02, + "learning_rate": 4.994827750415225e-05, + "loss": 1.1345, + "step": 20577 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948272454006774e-05, + "loss": 1.1334, + "step": 20578 + }, + { + "epoch": 0.02, + "learning_rate": 4.994826740361502e-05, + "loss": 1.3962, + "step": 20579 + }, + { + "epoch": 0.02, + "learning_rate": 4.994826235297698e-05, + "loss": 1.6432, + "step": 20580 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948257302092674e-05, + "loss": 1.2254, + "step": 20581 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948252250962086e-05, + "loss": 0.9394, + "step": 20582 + }, + { + "epoch": 0.02, + "learning_rate": 4.994824719958522e-05, + "loss": 1.1011, + "step": 20583 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948242147962065e-05, + "loss": 1.0529, + "step": 20584 + }, + { + "epoch": 0.02, + "learning_rate": 4.994823709609264e-05, + "loss": 1.1848, + "step": 20585 + }, + { + "epoch": 0.02, + "learning_rate": 4.994823204397694e-05, + "loss": 1.1384, + "step": 20586 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948226991614954e-05, + "loss": 1.1495, + "step": 20587 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948221939006686e-05, + "loss": 0.9372, + "step": 20588 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948216886152155e-05, + "loss": 1.1513, + "step": 20589 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948211833051334e-05, + "loss": 0.8972, + "step": 20590 + }, + { + "epoch": 0.02, + "learning_rate": 4.994820677970424e-05, + "loss": 1.0895, + "step": 20591 + }, + { + "epoch": 0.02, + "learning_rate": 4.994820172611086e-05, + "loss": 1.1794, + "step": 20592 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948196672271205e-05, + "loss": 1.3706, + "step": 20593 + }, + { + "epoch": 0.02, + "learning_rate": 4.994819161818528e-05, + "loss": 1.1998, + "step": 20594 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948186563853074e-05, + "loss": 1.2926, + "step": 20595 + }, + { + "epoch": 0.02, + "learning_rate": 4.994818150927458e-05, + "loss": 1.0652, + "step": 20596 + }, + { + "epoch": 0.02, + "learning_rate": 4.994817645444982e-05, + "loss": 1.0558, + "step": 20597 + }, + { + "epoch": 0.02, + "learning_rate": 4.994817139937878e-05, + "loss": 0.9876, + "step": 20598 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948166344061454e-05, + "loss": 1.1109, + "step": 20599 + }, + { + "epoch": 0.02, + "learning_rate": 4.994816128849786e-05, + "loss": 1.2917, + "step": 20600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948156232687984e-05, + "loss": 0.9461, + "step": 20601 + }, + { + "epoch": 0.02, + "learning_rate": 4.994815117663183e-05, + "loss": 0.7874, + "step": 20602 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948146120329395e-05, + "loss": 1.0423, + "step": 20603 + }, + { + "epoch": 0.02, + "learning_rate": 4.994814106378069e-05, + "loss": 1.3804, + "step": 20604 + }, + { + "epoch": 0.02, + "learning_rate": 4.99481360069857e-05, + "loss": 1.2473, + "step": 20605 + }, + { + "epoch": 0.02, + "learning_rate": 4.994813094994444e-05, + "loss": 1.3151, + "step": 20606 + }, + { + "epoch": 0.02, + "learning_rate": 4.994812589265689e-05, + "loss": 0.7192, + "step": 20607 + }, + { + "epoch": 0.02, + "learning_rate": 4.994812083512308e-05, + "loss": 0.3976, + "step": 20608 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948115777342984e-05, + "loss": 0.2892, + "step": 20609 + }, + { + "epoch": 0.02, + "learning_rate": 4.994811071931661e-05, + "loss": 0.2133, + "step": 20610 + }, + { + "epoch": 0.02, + "learning_rate": 4.994810566104396e-05, + "loss": 0.3406, + "step": 20611 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948100602525026e-05, + "loss": 0.3587, + "step": 20612 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948095543759824e-05, + "loss": 0.8555, + "step": 20613 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948090484748346e-05, + "loss": 0.7441, + "step": 20614 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948085425490585e-05, + "loss": 1.0809, + "step": 20615 + }, + { + "epoch": 0.02, + "learning_rate": 4.994808036598655e-05, + "loss": 1.1309, + "step": 20616 + }, + { + "epoch": 0.02, + "learning_rate": 4.994807530623623e-05, + "loss": 1.0297, + "step": 20617 + }, + { + "epoch": 0.02, + "learning_rate": 4.994807024623965e-05, + "loss": 1.2328, + "step": 20618 + }, + { + "epoch": 0.02, + "learning_rate": 4.994806518599678e-05, + "loss": 0.9495, + "step": 20619 + }, + { + "epoch": 0.02, + "learning_rate": 4.994806012550764e-05, + "loss": 0.8014, + "step": 20620 + }, + { + "epoch": 0.02, + "learning_rate": 4.994805506477222e-05, + "loss": 1.0521, + "step": 20621 + }, + { + "epoch": 0.02, + "learning_rate": 4.994805000379052e-05, + "loss": 1.2195, + "step": 20622 + }, + { + "epoch": 0.02, + "learning_rate": 4.994804494256255e-05, + "loss": 1.0379, + "step": 20623 + }, + { + "epoch": 0.02, + "learning_rate": 4.994803988108829e-05, + "loss": 0.5961, + "step": 20624 + }, + { + "epoch": 0.02, + "learning_rate": 4.994803481936777e-05, + "loss": 1.0857, + "step": 20625 + }, + { + "epoch": 0.02, + "learning_rate": 4.994802975740097e-05, + "loss": 0.9888, + "step": 20626 + }, + { + "epoch": 0.02, + "learning_rate": 4.994802469518789e-05, + "loss": 1.0407, + "step": 20627 + }, + { + "epoch": 0.02, + "learning_rate": 4.994801963272853e-05, + "loss": 1.2679, + "step": 20628 + }, + { + "epoch": 0.02, + "learning_rate": 4.99480145700229e-05, + "loss": 1.2779, + "step": 20629 + }, + { + "epoch": 0.02, + "learning_rate": 4.994800950707099e-05, + "loss": 1.1004, + "step": 20630 + }, + { + "epoch": 0.02, + "learning_rate": 4.994800444387281e-05, + "loss": 1.0953, + "step": 20631 + }, + { + "epoch": 0.02, + "learning_rate": 4.994799938042834e-05, + "loss": 1.261, + "step": 20632 + }, + { + "epoch": 0.02, + "learning_rate": 4.994799431673761e-05, + "loss": 1.0813, + "step": 20633 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947989252800595e-05, + "loss": 0.4703, + "step": 20634 + }, + { + "epoch": 0.02, + "learning_rate": 4.99479841886173e-05, + "loss": 0.4115, + "step": 20635 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947979124187735e-05, + "loss": 1.1072, + "step": 20636 + }, + { + "epoch": 0.02, + "learning_rate": 4.99479740595119e-05, + "loss": 1.0784, + "step": 20637 + }, + { + "epoch": 0.02, + "learning_rate": 4.994796899458978e-05, + "loss": 0.7266, + "step": 20638 + }, + { + "epoch": 0.02, + "learning_rate": 4.994796392942139e-05, + "loss": 0.6355, + "step": 20639 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947958864006724e-05, + "loss": 1.3791, + "step": 20640 + }, + { + "epoch": 0.02, + "learning_rate": 4.994795379834577e-05, + "loss": 1.2974, + "step": 20641 + }, + { + "epoch": 0.02, + "learning_rate": 4.994794873243855e-05, + "loss": 0.9291, + "step": 20642 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947943666285055e-05, + "loss": 1.1866, + "step": 20643 + }, + { + "epoch": 0.02, + "learning_rate": 4.994793859988529e-05, + "loss": 0.9061, + "step": 20644 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947933533239245e-05, + "loss": 1.0333, + "step": 20645 + }, + { + "epoch": 0.02, + "learning_rate": 4.994792846634692e-05, + "loss": 1.0324, + "step": 20646 + }, + { + "epoch": 0.02, + "learning_rate": 4.994792339920832e-05, + "loss": 0.4977, + "step": 20647 + }, + { + "epoch": 0.02, + "learning_rate": 4.994791833182345e-05, + "loss": 0.9781, + "step": 20648 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947913264192306e-05, + "loss": 1.2779, + "step": 20649 + }, + { + "epoch": 0.02, + "learning_rate": 4.994790819631487e-05, + "loss": 0.9247, + "step": 20650 + }, + { + "epoch": 0.02, + "learning_rate": 4.994790312819118e-05, + "loss": 1.1156, + "step": 20651 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947898059821205e-05, + "loss": 1.1309, + "step": 20652 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947892991204956e-05, + "loss": 1.1929, + "step": 20653 + }, + { + "epoch": 0.02, + "learning_rate": 4.994788792234243e-05, + "loss": 1.0149, + "step": 20654 + }, + { + "epoch": 0.02, + "learning_rate": 4.994788285323363e-05, + "loss": 0.7167, + "step": 20655 + }, + { + "epoch": 0.02, + "learning_rate": 4.994787778387856e-05, + "loss": 0.8417, + "step": 20656 + }, + { + "epoch": 0.02, + "learning_rate": 4.994787271427721e-05, + "loss": 1.0342, + "step": 20657 + }, + { + "epoch": 0.02, + "learning_rate": 4.994786764442958e-05, + "loss": 1.3453, + "step": 20658 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947862574335677e-05, + "loss": 1.0122, + "step": 20659 + }, + { + "epoch": 0.02, + "learning_rate": 4.994785750399551e-05, + "loss": 1.1082, + "step": 20660 + }, + { + "epoch": 0.02, + "learning_rate": 4.994785243340906e-05, + "loss": 1.1208, + "step": 20661 + }, + { + "epoch": 0.02, + "learning_rate": 4.994784736257634e-05, + "loss": 1.1874, + "step": 20662 + }, + { + "epoch": 0.02, + "learning_rate": 4.994784229149734e-05, + "loss": 1.1206, + "step": 20663 + }, + { + "epoch": 0.02, + "learning_rate": 4.994783722017207e-05, + "loss": 0.9856, + "step": 20664 + }, + { + "epoch": 0.02, + "learning_rate": 4.994783214860052e-05, + "loss": 0.9931, + "step": 20665 + }, + { + "epoch": 0.02, + "learning_rate": 4.99478270767827e-05, + "loss": 1.3226, + "step": 20666 + }, + { + "epoch": 0.02, + "learning_rate": 4.99478220047186e-05, + "loss": 1.5303, + "step": 20667 + }, + { + "epoch": 0.02, + "learning_rate": 4.994781693240823e-05, + "loss": 1.0815, + "step": 20668 + }, + { + "epoch": 0.02, + "learning_rate": 4.994781185985159e-05, + "loss": 0.9836, + "step": 20669 + }, + { + "epoch": 0.02, + "learning_rate": 4.994780678704867e-05, + "loss": 1.2532, + "step": 20670 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947801713999476e-05, + "loss": 1.0708, + "step": 20671 + }, + { + "epoch": 0.02, + "learning_rate": 4.994779664070401e-05, + "loss": 0.9388, + "step": 20672 + }, + { + "epoch": 0.02, + "learning_rate": 4.994779156716227e-05, + "loss": 0.9802, + "step": 20673 + }, + { + "epoch": 0.02, + "learning_rate": 4.994778649337425e-05, + "loss": 1.5807, + "step": 20674 + }, + { + "epoch": 0.02, + "learning_rate": 4.994778141933996e-05, + "loss": 1.1361, + "step": 20675 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947776345059394e-05, + "loss": 0.9857, + "step": 20676 + }, + { + "epoch": 0.02, + "learning_rate": 4.994777127053256e-05, + "loss": 1.2357, + "step": 20677 + }, + { + "epoch": 0.02, + "learning_rate": 4.994776619575945e-05, + "loss": 1.1093, + "step": 20678 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947761120740064e-05, + "loss": 1.1785, + "step": 20679 + }, + { + "epoch": 0.02, + "learning_rate": 4.99477560454744e-05, + "loss": 1.1368, + "step": 20680 + }, + { + "epoch": 0.02, + "learning_rate": 4.994775096996247e-05, + "loss": 1.0208, + "step": 20681 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947745894204266e-05, + "loss": 1.1131, + "step": 20682 + }, + { + "epoch": 0.02, + "learning_rate": 4.994774081819978e-05, + "loss": 1.0333, + "step": 20683 + }, + { + "epoch": 0.02, + "learning_rate": 4.994773574194903e-05, + "loss": 0.9327, + "step": 20684 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947730665452006e-05, + "loss": 1.1032, + "step": 20685 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947725588708706e-05, + "loss": 1.271, + "step": 20686 + }, + { + "epoch": 0.02, + "learning_rate": 4.994772051171913e-05, + "loss": 1.1184, + "step": 20687 + }, + { + "epoch": 0.02, + "learning_rate": 4.994771543448328e-05, + "loss": 0.5272, + "step": 20688 + }, + { + "epoch": 0.02, + "learning_rate": 4.994771035700116e-05, + "loss": 1.1593, + "step": 20689 + }, + { + "epoch": 0.02, + "learning_rate": 4.994770527927276e-05, + "loss": 1.1369, + "step": 20690 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947700201298094e-05, + "loss": 1.0505, + "step": 20691 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947695123077156e-05, + "loss": 0.7023, + "step": 20692 + }, + { + "epoch": 0.02, + "learning_rate": 4.994769004460994e-05, + "loss": 0.9648, + "step": 20693 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947684965896456e-05, + "loss": 1.2624, + "step": 20694 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947679886936695e-05, + "loss": 1.0662, + "step": 20695 + }, + { + "epoch": 0.02, + "learning_rate": 4.994767480773066e-05, + "loss": 1.0596, + "step": 20696 + }, + { + "epoch": 0.02, + "learning_rate": 4.994766972827836e-05, + "loss": 0.838, + "step": 20697 + }, + { + "epoch": 0.02, + "learning_rate": 4.994766464857978e-05, + "loss": 1.13, + "step": 20698 + }, + { + "epoch": 0.02, + "learning_rate": 4.994765956863493e-05, + "loss": 1.0402, + "step": 20699 + }, + { + "epoch": 0.02, + "learning_rate": 4.994765448844381e-05, + "loss": 1.1582, + "step": 20700 + }, + { + "epoch": 0.02, + "learning_rate": 4.99476494080064e-05, + "loss": 1.8826, + "step": 20701 + }, + { + "epoch": 0.02, + "learning_rate": 4.994764432732274e-05, + "loss": 1.3675, + "step": 20702 + }, + { + "epoch": 0.02, + "learning_rate": 4.994763924639279e-05, + "loss": 1.1388, + "step": 20703 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947634165216584e-05, + "loss": 1.1243, + "step": 20704 + }, + { + "epoch": 0.02, + "learning_rate": 4.994762908379409e-05, + "loss": 1.1758, + "step": 20705 + }, + { + "epoch": 0.02, + "learning_rate": 4.994762400212533e-05, + "loss": 1.0824, + "step": 20706 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947618920210304e-05, + "loss": 1.1573, + "step": 20707 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947613838048996e-05, + "loss": 0.9419, + "step": 20708 + }, + { + "epoch": 0.02, + "learning_rate": 4.994760875564142e-05, + "loss": 1.3454, + "step": 20709 + }, + { + "epoch": 0.02, + "learning_rate": 4.994760367298757e-05, + "loss": 0.9501, + "step": 20710 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947598590087445e-05, + "loss": 0.9463, + "step": 20711 + }, + { + "epoch": 0.02, + "learning_rate": 4.994759350694106e-05, + "loss": 1.1179, + "step": 20712 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947588423548386e-05, + "loss": 0.9973, + "step": 20713 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947583339909446e-05, + "loss": 1.3399, + "step": 20714 + }, + { + "epoch": 0.02, + "learning_rate": 4.994757825602424e-05, + "loss": 0.9823, + "step": 20715 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947573171892755e-05, + "loss": 1.2456, + "step": 20716 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947568087515005e-05, + "loss": 1.1046, + "step": 20717 + }, + { + "epoch": 0.02, + "learning_rate": 4.994756300289098e-05, + "loss": 0.9354, + "step": 20718 + }, + { + "epoch": 0.02, + "learning_rate": 4.994755791802068e-05, + "loss": 1.2016, + "step": 20719 + }, + { + "epoch": 0.02, + "learning_rate": 4.994755283290411e-05, + "loss": 1.1467, + "step": 20720 + }, + { + "epoch": 0.02, + "learning_rate": 4.994754774754127e-05, + "loss": 0.9806, + "step": 20721 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947542661932156e-05, + "loss": 1.1871, + "step": 20722 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947537576076774e-05, + "loss": 1.0602, + "step": 20723 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947532489975116e-05, + "loss": 1.1738, + "step": 20724 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947527403627194e-05, + "loss": 0.9801, + "step": 20725 + }, + { + "epoch": 0.02, + "learning_rate": 4.994752231703299e-05, + "loss": 0.9521, + "step": 20726 + }, + { + "epoch": 0.02, + "learning_rate": 4.994751723019252e-05, + "loss": 1.2318, + "step": 20727 + }, + { + "epoch": 0.02, + "learning_rate": 4.994751214310578e-05, + "loss": 1.1267, + "step": 20728 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947507055772764e-05, + "loss": 1.092, + "step": 20729 + }, + { + "epoch": 0.02, + "learning_rate": 4.994750196819348e-05, + "loss": 1.1567, + "step": 20730 + }, + { + "epoch": 0.02, + "learning_rate": 4.994749688036793e-05, + "loss": 1.1191, + "step": 20731 + }, + { + "epoch": 0.02, + "learning_rate": 4.99474917922961e-05, + "loss": 1.0886, + "step": 20732 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947486703978e-05, + "loss": 1.0951, + "step": 20733 + }, + { + "epoch": 0.02, + "learning_rate": 4.994748161541363e-05, + "loss": 1.0129, + "step": 20734 + }, + { + "epoch": 0.02, + "learning_rate": 4.994747652660299e-05, + "loss": 1.0814, + "step": 20735 + }, + { + "epoch": 0.02, + "learning_rate": 4.994747143754608e-05, + "loss": 1.2244, + "step": 20736 + }, + { + "epoch": 0.02, + "learning_rate": 4.99474663482429e-05, + "loss": 1.23, + "step": 20737 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947461258693446e-05, + "loss": 1.2448, + "step": 20738 + }, + { + "epoch": 0.02, + "learning_rate": 4.994745616889772e-05, + "loss": 1.104, + "step": 20739 + }, + { + "epoch": 0.02, + "learning_rate": 4.994745107885572e-05, + "loss": 1.0084, + "step": 20740 + }, + { + "epoch": 0.02, + "learning_rate": 4.994744598856746e-05, + "loss": 1.0627, + "step": 20741 + }, + { + "epoch": 0.02, + "learning_rate": 4.994744089803293e-05, + "loss": 0.8839, + "step": 20742 + }, + { + "epoch": 0.02, + "learning_rate": 4.994743580725212e-05, + "loss": 0.6835, + "step": 20743 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947430716225044e-05, + "loss": 1.103, + "step": 20744 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947425624951694e-05, + "loss": 0.9938, + "step": 20745 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947420533432074e-05, + "loss": 0.9369, + "step": 20746 + }, + { + "epoch": 0.02, + "learning_rate": 4.994741544166619e-05, + "loss": 0.9632, + "step": 20747 + }, + { + "epoch": 0.02, + "learning_rate": 4.994741034965403e-05, + "loss": 0.8797, + "step": 20748 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947405257395595e-05, + "loss": 1.1272, + "step": 20749 + }, + { + "epoch": 0.02, + "learning_rate": 4.99474001648909e-05, + "loss": 1.2303, + "step": 20750 + }, + { + "epoch": 0.02, + "learning_rate": 4.994739507213993e-05, + "loss": 1.2516, + "step": 20751 + }, + { + "epoch": 0.02, + "learning_rate": 4.994738997914269e-05, + "loss": 1.2519, + "step": 20752 + }, + { + "epoch": 0.02, + "learning_rate": 4.994738488589918e-05, + "loss": 1.3436, + "step": 20753 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947379792409397e-05, + "loss": 1.5545, + "step": 20754 + }, + { + "epoch": 0.02, + "learning_rate": 4.994737469867335e-05, + "loss": 0.9951, + "step": 20755 + }, + { + "epoch": 0.02, + "learning_rate": 4.994736960469103e-05, + "loss": 1.3711, + "step": 20756 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947364510462444e-05, + "loss": 1.37, + "step": 20757 + }, + { + "epoch": 0.02, + "learning_rate": 4.994735941598758e-05, + "loss": 1.3354, + "step": 20758 + }, + { + "epoch": 0.02, + "learning_rate": 4.994735432126645e-05, + "loss": 1.0918, + "step": 20759 + }, + { + "epoch": 0.02, + "learning_rate": 4.994734922629905e-05, + "loss": 1.0659, + "step": 20760 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947344131085385e-05, + "loss": 1.1695, + "step": 20761 + }, + { + "epoch": 0.02, + "learning_rate": 4.994733903562544e-05, + "loss": 1.1368, + "step": 20762 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947333939919236e-05, + "loss": 0.9634, + "step": 20763 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947328843966754e-05, + "loss": 1.0036, + "step": 20764 + }, + { + "epoch": 0.02, + "learning_rate": 4.994732374776801e-05, + "loss": 1.1566, + "step": 20765 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947318651322994e-05, + "loss": 1.5064, + "step": 20766 + }, + { + "epoch": 0.02, + "learning_rate": 4.99473135546317e-05, + "loss": 1.0103, + "step": 20767 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947308457694155e-05, + "loss": 1.0233, + "step": 20768 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947303360510324e-05, + "loss": 1.0324, + "step": 20769 + }, + { + "epoch": 0.02, + "learning_rate": 4.994729826308023e-05, + "loss": 1.0067, + "step": 20770 + }, + { + "epoch": 0.02, + "learning_rate": 4.994729316540387e-05, + "loss": 0.9207, + "step": 20771 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947288067481234e-05, + "loss": 1.109, + "step": 20772 + }, + { + "epoch": 0.02, + "learning_rate": 4.994728296931234e-05, + "loss": 1.1022, + "step": 20773 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947277870897164e-05, + "loss": 0.9956, + "step": 20774 + }, + { + "epoch": 0.02, + "learning_rate": 4.994727277223572e-05, + "loss": 1.0054, + "step": 20775 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947267673328016e-05, + "loss": 1.0118, + "step": 20776 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947262574174034e-05, + "loss": 0.8171, + "step": 20777 + }, + { + "epoch": 0.02, + "learning_rate": 4.994725747477379e-05, + "loss": 0.8109, + "step": 20778 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947252375127274e-05, + "loss": 1.1602, + "step": 20779 + }, + { + "epoch": 0.02, + "learning_rate": 4.994724727523449e-05, + "loss": 1.2367, + "step": 20780 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947242175095436e-05, + "loss": 1.3149, + "step": 20781 + }, + { + "epoch": 0.02, + "learning_rate": 4.994723707471012e-05, + "loss": 1.0764, + "step": 20782 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947231974078524e-05, + "loss": 1.169, + "step": 20783 + }, + { + "epoch": 0.02, + "learning_rate": 4.994722687320067e-05, + "loss": 0.9984, + "step": 20784 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947221772076534e-05, + "loss": 1.0858, + "step": 20785 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947216670706145e-05, + "loss": 1.0889, + "step": 20786 + }, + { + "epoch": 0.02, + "learning_rate": 4.994721156908948e-05, + "loss": 1.1517, + "step": 20787 + }, + { + "epoch": 0.02, + "learning_rate": 4.994720646722655e-05, + "loss": 1.0422, + "step": 20788 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947201365117344e-05, + "loss": 1.7072, + "step": 20789 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947196262761876e-05, + "loss": 2.0981, + "step": 20790 + }, + { + "epoch": 0.02, + "learning_rate": 4.994719116016014e-05, + "loss": 5.9634, + "step": 20791 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947186057312137e-05, + "loss": 5.8435, + "step": 20792 + }, + { + "epoch": 0.02, + "learning_rate": 4.994718095421786e-05, + "loss": 5.8926, + "step": 20793 + }, + { + "epoch": 0.02, + "learning_rate": 4.994717585087731e-05, + "loss": 4.4387, + "step": 20794 + }, + { + "epoch": 0.02, + "learning_rate": 4.994717074729051e-05, + "loss": 1.0005, + "step": 20795 + }, + { + "epoch": 0.02, + "learning_rate": 4.994716564345743e-05, + "loss": 1.1464, + "step": 20796 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947160539378084e-05, + "loss": 1.0787, + "step": 20797 + }, + { + "epoch": 0.02, + "learning_rate": 4.994715543505247e-05, + "loss": 1.4069, + "step": 20798 + }, + { + "epoch": 0.02, + "learning_rate": 4.994715033048059e-05, + "loss": 1.0131, + "step": 20799 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947145225662436e-05, + "loss": 1.1181, + "step": 20800 + }, + { + "epoch": 0.02, + "learning_rate": 4.994714012059802e-05, + "loss": 1.2829, + "step": 20801 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947135015287336e-05, + "loss": 1.1655, + "step": 20802 + }, + { + "epoch": 0.02, + "learning_rate": 4.994712990973039e-05, + "loss": 0.9592, + "step": 20803 + }, + { + "epoch": 0.02, + "learning_rate": 4.994712480392717e-05, + "loss": 0.6747, + "step": 20804 + }, + { + "epoch": 0.02, + "learning_rate": 4.994711969787769e-05, + "loss": 0.9551, + "step": 20805 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947114591581925e-05, + "loss": 1.1996, + "step": 20806 + }, + { + "epoch": 0.02, + "learning_rate": 4.994710948503991e-05, + "loss": 1.004, + "step": 20807 + }, + { + "epoch": 0.02, + "learning_rate": 4.994710437825162e-05, + "loss": 1.195, + "step": 20808 + }, + { + "epoch": 0.02, + "learning_rate": 4.994709927121706e-05, + "loss": 0.9958, + "step": 20809 + }, + { + "epoch": 0.02, + "learning_rate": 4.994709416393624e-05, + "loss": 1.2091, + "step": 20810 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947089056409147e-05, + "loss": 1.1386, + "step": 20811 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947083948635784e-05, + "loss": 1.045, + "step": 20812 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947078840616166e-05, + "loss": 1.1134, + "step": 20813 + }, + { + "epoch": 0.02, + "learning_rate": 4.994707373235027e-05, + "loss": 1.4223, + "step": 20814 + }, + { + "epoch": 0.02, + "learning_rate": 4.994706862383811e-05, + "loss": 1.1985, + "step": 20815 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947063515079685e-05, + "loss": 1.2451, + "step": 20816 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947058406074995e-05, + "loss": 1.0046, + "step": 20817 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947053296824034e-05, + "loss": 0.8655, + "step": 20818 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947048187326804e-05, + "loss": 1.1277, + "step": 20819 + }, + { + "epoch": 0.02, + "learning_rate": 4.994704307758331e-05, + "loss": 0.9173, + "step": 20820 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947037967593555e-05, + "loss": 0.9538, + "step": 20821 + }, + { + "epoch": 0.02, + "learning_rate": 4.994703285735752e-05, + "loss": 0.7732, + "step": 20822 + }, + { + "epoch": 0.02, + "learning_rate": 4.994702774687523e-05, + "loss": 1.0398, + "step": 20823 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947022636146675e-05, + "loss": 1.1848, + "step": 20824 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947017525171847e-05, + "loss": 0.7015, + "step": 20825 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947012413950755e-05, + "loss": 0.8922, + "step": 20826 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947007302483394e-05, + "loss": 1.1208, + "step": 20827 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947002190769763e-05, + "loss": 1.0976, + "step": 20828 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946997078809876e-05, + "loss": 1.2356, + "step": 20829 + }, + { + "epoch": 0.02, + "learning_rate": 4.994699196660372e-05, + "loss": 0.8612, + "step": 20830 + }, + { + "epoch": 0.02, + "learning_rate": 4.994698685415129e-05, + "loss": 1.0709, + "step": 20831 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946981741452604e-05, + "loss": 0.7906, + "step": 20832 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946976628507645e-05, + "loss": 0.9268, + "step": 20833 + }, + { + "epoch": 0.02, + "learning_rate": 4.994697151531643e-05, + "loss": 1.2107, + "step": 20834 + }, + { + "epoch": 0.02, + "learning_rate": 4.994696640187894e-05, + "loss": 1.335, + "step": 20835 + }, + { + "epoch": 0.02, + "learning_rate": 4.994696128819518e-05, + "loss": 1.0986, + "step": 20836 + }, + { + "epoch": 0.02, + "learning_rate": 4.994695617426516e-05, + "loss": 1.1421, + "step": 20837 + }, + { + "epoch": 0.02, + "learning_rate": 4.994695106008888e-05, + "loss": 1.0623, + "step": 20838 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946945945666327e-05, + "loss": 1.326, + "step": 20839 + }, + { + "epoch": 0.02, + "learning_rate": 4.99469408309975e-05, + "loss": 1.2441, + "step": 20840 + }, + { + "epoch": 0.02, + "learning_rate": 4.994693571608242e-05, + "loss": 0.9912, + "step": 20841 + }, + { + "epoch": 0.02, + "learning_rate": 4.994693060092107e-05, + "loss": 0.9385, + "step": 20842 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946925485513454e-05, + "loss": 1.2431, + "step": 20843 + }, + { + "epoch": 0.02, + "learning_rate": 4.994692036985958e-05, + "loss": 1.3795, + "step": 20844 + }, + { + "epoch": 0.02, + "learning_rate": 4.994691525395943e-05, + "loss": 0.9865, + "step": 20845 + }, + { + "epoch": 0.02, + "learning_rate": 4.994691013781302e-05, + "loss": 1.1457, + "step": 20846 + }, + { + "epoch": 0.02, + "learning_rate": 4.994690502142034e-05, + "loss": 0.9403, + "step": 20847 + }, + { + "epoch": 0.02, + "learning_rate": 4.99468999047814e-05, + "loss": 1.2468, + "step": 20848 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946894787896195e-05, + "loss": 0.9696, + "step": 20849 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946889670764725e-05, + "loss": 1.0227, + "step": 20850 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946884553386984e-05, + "loss": 1.1223, + "step": 20851 + }, + { + "epoch": 0.02, + "learning_rate": 4.994687943576298e-05, + "loss": 1.1746, + "step": 20852 + }, + { + "epoch": 0.02, + "learning_rate": 4.994687431789271e-05, + "loss": 1.2417, + "step": 20853 + }, + { + "epoch": 0.02, + "learning_rate": 4.994686919977618e-05, + "loss": 0.9782, + "step": 20854 + }, + { + "epoch": 0.02, + "learning_rate": 4.994686408141338e-05, + "loss": 1.1173, + "step": 20855 + }, + { + "epoch": 0.02, + "learning_rate": 4.994685896280432e-05, + "loss": 1.0773, + "step": 20856 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946853843948994e-05, + "loss": 1.2182, + "step": 20857 + }, + { + "epoch": 0.02, + "learning_rate": 4.99468487248474e-05, + "loss": 0.6974, + "step": 20858 + }, + { + "epoch": 0.02, + "learning_rate": 4.994684360549954e-05, + "loss": 1.0724, + "step": 20859 + }, + { + "epoch": 0.02, + "learning_rate": 4.994683848590542e-05, + "loss": 0.9102, + "step": 20860 + }, + { + "epoch": 0.02, + "learning_rate": 4.994683336606504e-05, + "loss": 0.9819, + "step": 20861 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946828245978386e-05, + "loss": 0.8667, + "step": 20862 + }, + { + "epoch": 0.02, + "learning_rate": 4.994682312564547e-05, + "loss": 0.9272, + "step": 20863 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946818005066284e-05, + "loss": 1.0001, + "step": 20864 + }, + { + "epoch": 0.02, + "learning_rate": 4.994681288424085e-05, + "loss": 1.0791, + "step": 20865 + }, + { + "epoch": 0.02, + "learning_rate": 4.994680776316914e-05, + "loss": 1.0464, + "step": 20866 + }, + { + "epoch": 0.02, + "learning_rate": 4.994680264185116e-05, + "loss": 1.0748, + "step": 20867 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946797520286924e-05, + "loss": 1.1578, + "step": 20868 + }, + { + "epoch": 0.02, + "learning_rate": 4.994679239847642e-05, + "loss": 1.4008, + "step": 20869 + }, + { + "epoch": 0.02, + "learning_rate": 4.994678727641965e-05, + "loss": 1.103, + "step": 20870 + }, + { + "epoch": 0.02, + "learning_rate": 4.994678215411662e-05, + "loss": 1.4026, + "step": 20871 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946777031567323e-05, + "loss": 1.2187, + "step": 20872 + }, + { + "epoch": 0.02, + "learning_rate": 4.994677190877177e-05, + "loss": 1.0401, + "step": 20873 + }, + { + "epoch": 0.02, + "learning_rate": 4.994676678572995e-05, + "loss": 1.1067, + "step": 20874 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946761662441857e-05, + "loss": 0.8927, + "step": 20875 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946756538907515e-05, + "loss": 1.4942, + "step": 20876 + }, + { + "epoch": 0.02, + "learning_rate": 4.994675141512689e-05, + "loss": 1.0961, + "step": 20877 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946746291100016e-05, + "loss": 1.2383, + "step": 20878 + }, + { + "epoch": 0.02, + "learning_rate": 4.994674116682687e-05, + "loss": 0.9971, + "step": 20879 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946736042307465e-05, + "loss": 1.0888, + "step": 20880 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946730917541795e-05, + "loss": 1.0409, + "step": 20881 + }, + { + "epoch": 0.02, + "learning_rate": 4.994672579252987e-05, + "loss": 1.1712, + "step": 20882 + }, + { + "epoch": 0.02, + "learning_rate": 4.994672066727167e-05, + "loss": 0.9231, + "step": 20883 + }, + { + "epoch": 0.02, + "learning_rate": 4.994671554176721e-05, + "loss": 1.1859, + "step": 20884 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946710416016486e-05, + "loss": 1.0145, + "step": 20885 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946705290019495e-05, + "loss": 1.2621, + "step": 20886 + }, + { + "epoch": 0.02, + "learning_rate": 4.994670016377625e-05, + "loss": 1.0818, + "step": 20887 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946695037286736e-05, + "loss": 1.1749, + "step": 20888 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946689910550956e-05, + "loss": 0.9284, + "step": 20889 + }, + { + "epoch": 0.02, + "learning_rate": 4.994668478356892e-05, + "loss": 1.0909, + "step": 20890 + }, + { + "epoch": 0.02, + "learning_rate": 4.994667965634061e-05, + "loss": 0.9114, + "step": 20891 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946674528866044e-05, + "loss": 1.1396, + "step": 20892 + }, + { + "epoch": 0.02, + "learning_rate": 4.994666940114522e-05, + "loss": 1.0086, + "step": 20893 + }, + { + "epoch": 0.02, + "learning_rate": 4.994666427317812e-05, + "loss": 1.1465, + "step": 20894 + }, + { + "epoch": 0.02, + "learning_rate": 4.994665914496477e-05, + "loss": 0.9023, + "step": 20895 + }, + { + "epoch": 0.02, + "learning_rate": 4.994665401650515e-05, + "loss": 1.3454, + "step": 20896 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946648887799266e-05, + "loss": 1.3141, + "step": 20897 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946643758847125e-05, + "loss": 0.9223, + "step": 20898 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946638629648715e-05, + "loss": 0.8794, + "step": 20899 + }, + { + "epoch": 0.02, + "learning_rate": 4.994663350020405e-05, + "loss": 0.8245, + "step": 20900 + }, + { + "epoch": 0.02, + "learning_rate": 4.994662837051311e-05, + "loss": 0.86, + "step": 20901 + }, + { + "epoch": 0.02, + "learning_rate": 4.994662324057592e-05, + "loss": 1.3174, + "step": 20902 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946618110392465e-05, + "loss": 0.5888, + "step": 20903 + }, + { + "epoch": 0.02, + "learning_rate": 4.994661297996274e-05, + "loss": 1.3055, + "step": 20904 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946607849286766e-05, + "loss": 1.1049, + "step": 20905 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946602718364515e-05, + "loss": 0.8983, + "step": 20906 + }, + { + "epoch": 0.02, + "learning_rate": 4.994659758719601e-05, + "loss": 0.8022, + "step": 20907 + }, + { + "epoch": 0.02, + "learning_rate": 4.994659245578124e-05, + "loss": 0.9775, + "step": 20908 + }, + { + "epoch": 0.02, + "learning_rate": 4.994658732412021e-05, + "loss": 1.0935, + "step": 20909 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946582192212916e-05, + "loss": 0.9607, + "step": 20910 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946577060059357e-05, + "loss": 1.119, + "step": 20911 + }, + { + "epoch": 0.02, + "learning_rate": 4.994657192765954e-05, + "loss": 0.9225, + "step": 20912 + }, + { + "epoch": 0.02, + "learning_rate": 4.994656679501346e-05, + "loss": 1.0327, + "step": 20913 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946561662121115e-05, + "loss": 0.8572, + "step": 20914 + }, + { + "epoch": 0.02, + "learning_rate": 4.994655652898251e-05, + "loss": 0.6497, + "step": 20915 + }, + { + "epoch": 0.02, + "learning_rate": 4.994655139559765e-05, + "loss": 0.772, + "step": 20916 + }, + { + "epoch": 0.02, + "learning_rate": 4.994654626196652e-05, + "loss": 1.2574, + "step": 20917 + }, + { + "epoch": 0.02, + "learning_rate": 4.994654112808913e-05, + "loss": 1.2265, + "step": 20918 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946535993965485e-05, + "loss": 1.2345, + "step": 20919 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946530859595566e-05, + "loss": 0.9126, + "step": 20920 + }, + { + "epoch": 0.02, + "learning_rate": 4.994652572497939e-05, + "loss": 2.0427, + "step": 20921 + }, + { + "epoch": 0.02, + "learning_rate": 4.994652059011695e-05, + "loss": 1.504, + "step": 20922 + }, + { + "epoch": 0.02, + "learning_rate": 4.994651545500826e-05, + "loss": 1.0284, + "step": 20923 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946510319653295e-05, + "loss": 1.3978, + "step": 20924 + }, + { + "epoch": 0.02, + "learning_rate": 4.994650518405208e-05, + "loss": 1.101, + "step": 20925 + }, + { + "epoch": 0.02, + "learning_rate": 4.994650004820459e-05, + "loss": 1.1041, + "step": 20926 + }, + { + "epoch": 0.02, + "learning_rate": 4.994649491211085e-05, + "loss": 0.9689, + "step": 20927 + }, + { + "epoch": 0.02, + "learning_rate": 4.994648977577084e-05, + "loss": 1.1889, + "step": 20928 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946484639184584e-05, + "loss": 1.1946, + "step": 20929 + }, + { + "epoch": 0.02, + "learning_rate": 4.994647950235205e-05, + "loss": 1.0847, + "step": 20930 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946474365273264e-05, + "loss": 1.0617, + "step": 20931 + }, + { + "epoch": 0.02, + "learning_rate": 4.994646922794822e-05, + "loss": 1.1662, + "step": 20932 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946464090376906e-05, + "loss": 0.985, + "step": 20933 + }, + { + "epoch": 0.02, + "learning_rate": 4.994645895255933e-05, + "loss": 1.4368, + "step": 20934 + }, + { + "epoch": 0.02, + "learning_rate": 4.99464538144955e-05, + "loss": 1.1823, + "step": 20935 + }, + { + "epoch": 0.02, + "learning_rate": 4.994644867618541e-05, + "loss": 1.4004, + "step": 20936 + }, + { + "epoch": 0.02, + "learning_rate": 4.994644353762905e-05, + "loss": 1.1122, + "step": 20937 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946438398826436e-05, + "loss": 1.186, + "step": 20938 + }, + { + "epoch": 0.02, + "learning_rate": 4.994643325977756e-05, + "loss": 1.205, + "step": 20939 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946428120482424e-05, + "loss": 1.3062, + "step": 20940 + }, + { + "epoch": 0.02, + "learning_rate": 4.994642298094103e-05, + "loss": 1.2884, + "step": 20941 + }, + { + "epoch": 0.02, + "learning_rate": 4.994641784115337e-05, + "loss": 1.0721, + "step": 20942 + }, + { + "epoch": 0.02, + "learning_rate": 4.994641270111945e-05, + "loss": 1.2893, + "step": 20943 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946407560839284e-05, + "loss": 0.955, + "step": 20944 + }, + { + "epoch": 0.02, + "learning_rate": 4.994640242031284e-05, + "loss": 0.9796, + "step": 20945 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946397279540136e-05, + "loss": 0.9761, + "step": 20946 + }, + { + "epoch": 0.02, + "learning_rate": 4.994639213852118e-05, + "loss": 0.8201, + "step": 20947 + }, + { + "epoch": 0.02, + "learning_rate": 4.994638699725596e-05, + "loss": 0.7255, + "step": 20948 + }, + { + "epoch": 0.02, + "learning_rate": 4.994638185574448e-05, + "loss": 1.8115, + "step": 20949 + }, + { + "epoch": 0.02, + "learning_rate": 4.994637671398674e-05, + "loss": 1.7783, + "step": 20950 + }, + { + "epoch": 0.02, + "learning_rate": 4.994637157198274e-05, + "loss": 1.1134, + "step": 20951 + }, + { + "epoch": 0.02, + "learning_rate": 4.994636642973248e-05, + "loss": 1.4805, + "step": 20952 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946361287235954e-05, + "loss": 1.0357, + "step": 20953 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946356144493175e-05, + "loss": 1.0688, + "step": 20954 + }, + { + "epoch": 0.02, + "learning_rate": 4.994635100150414e-05, + "loss": 1.239, + "step": 20955 + }, + { + "epoch": 0.02, + "learning_rate": 4.994634585826884e-05, + "loss": 1.0338, + "step": 20956 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946340714787275e-05, + "loss": 1.1258, + "step": 20957 + }, + { + "epoch": 0.02, + "learning_rate": 4.994633557105946e-05, + "loss": 1.0223, + "step": 20958 + }, + { + "epoch": 0.02, + "learning_rate": 4.994633042708538e-05, + "loss": 1.1425, + "step": 20959 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946325282865035e-05, + "loss": 1.1216, + "step": 20960 + }, + { + "epoch": 0.02, + "learning_rate": 4.994632013839844e-05, + "loss": 1.3265, + "step": 20961 + }, + { + "epoch": 0.02, + "learning_rate": 4.994631499368558e-05, + "loss": 1.1231, + "step": 20962 + }, + { + "epoch": 0.02, + "learning_rate": 4.994630984872646e-05, + "loss": 1.2164, + "step": 20963 + }, + { + "epoch": 0.02, + "learning_rate": 4.994630470352109e-05, + "loss": 0.9801, + "step": 20964 + }, + { + "epoch": 0.02, + "learning_rate": 4.994629955806946e-05, + "loss": 1.1029, + "step": 20965 + }, + { + "epoch": 0.02, + "learning_rate": 4.994629441237155e-05, + "loss": 1.0052, + "step": 20966 + }, + { + "epoch": 0.02, + "learning_rate": 4.99462892664274e-05, + "loss": 0.9723, + "step": 20967 + }, + { + "epoch": 0.02, + "learning_rate": 4.994628412023699e-05, + "loss": 1.3189, + "step": 20968 + }, + { + "epoch": 0.02, + "learning_rate": 4.994627897380031e-05, + "loss": 1.1371, + "step": 20969 + }, + { + "epoch": 0.02, + "learning_rate": 4.994627382711738e-05, + "loss": 1.1727, + "step": 20970 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946268680188184e-05, + "loss": 1.2555, + "step": 20971 + }, + { + "epoch": 0.02, + "learning_rate": 4.994626353301274e-05, + "loss": 1.277, + "step": 20972 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946258385591026e-05, + "loss": 1.3507, + "step": 20973 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946253237923055e-05, + "loss": 1.1635, + "step": 20974 + }, + { + "epoch": 0.02, + "learning_rate": 4.994624809000883e-05, + "loss": 1.1027, + "step": 20975 + }, + { + "epoch": 0.02, + "learning_rate": 4.994624294184834e-05, + "loss": 1.1862, + "step": 20976 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946237793441594e-05, + "loss": 0.8513, + "step": 20977 + }, + { + "epoch": 0.02, + "learning_rate": 4.994623264478859e-05, + "loss": 1.0352, + "step": 20978 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946227495889334e-05, + "loss": 1.2291, + "step": 20979 + }, + { + "epoch": 0.02, + "learning_rate": 4.994622234674381e-05, + "loss": 1.0522, + "step": 20980 + }, + { + "epoch": 0.02, + "learning_rate": 4.994621719735203e-05, + "loss": 1.1142, + "step": 20981 + }, + { + "epoch": 0.02, + "learning_rate": 4.994621204771399e-05, + "loss": 0.9774, + "step": 20982 + }, + { + "epoch": 0.02, + "learning_rate": 4.994620689782969e-05, + "loss": 0.9852, + "step": 20983 + }, + { + "epoch": 0.02, + "learning_rate": 4.994620174769913e-05, + "loss": 1.09, + "step": 20984 + }, + { + "epoch": 0.02, + "learning_rate": 4.994619659732232e-05, + "loss": 0.8063, + "step": 20985 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946191446699245e-05, + "loss": 0.9329, + "step": 20986 + }, + { + "epoch": 0.02, + "learning_rate": 4.994618629582992e-05, + "loss": 1.4585, + "step": 20987 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946181144714326e-05, + "loss": 1.1385, + "step": 20988 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946175993352486e-05, + "loss": 0.9578, + "step": 20989 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946170841744376e-05, + "loss": 0.6807, + "step": 20990 + }, + { + "epoch": 0.02, + "learning_rate": 4.994616568989001e-05, + "loss": 1.2238, + "step": 20991 + }, + { + "epoch": 0.02, + "learning_rate": 4.994616053778939e-05, + "loss": 1.1575, + "step": 20992 + }, + { + "epoch": 0.02, + "learning_rate": 4.994615538544251e-05, + "loss": 1.133, + "step": 20993 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946150232849374e-05, + "loss": 0.9837, + "step": 20994 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946145080009976e-05, + "loss": 1.0795, + "step": 20995 + }, + { + "epoch": 0.02, + "learning_rate": 4.994613992692432e-05, + "loss": 0.9672, + "step": 20996 + }, + { + "epoch": 0.02, + "learning_rate": 4.994613477359241e-05, + "loss": 1.1224, + "step": 20997 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946129620014245e-05, + "loss": 0.8909, + "step": 20998 + }, + { + "epoch": 0.02, + "learning_rate": 4.994612446618982e-05, + "loss": 1.0183, + "step": 20999 + }, + { + "epoch": 0.02, + "learning_rate": 4.994611931211913e-05, + "loss": 1.1505, + "step": 21000 + }, + { + "epoch": 0.02, + "eval_loss": 1.017486810684204, + "eval_runtime": 85.53, + "eval_samples_per_second": 16.193, + "eval_steps_per_second": 4.057, + "step": 21000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946114157802195e-05, + "loss": 1.0313, + "step": 21001 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946109003239e-05, + "loss": 1.0053, + "step": 21002 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946103848429536e-05, + "loss": 1.2735, + "step": 21003 + }, + { + "epoch": 0.02, + "learning_rate": 4.994609869337382e-05, + "loss": 1.0377, + "step": 21004 + }, + { + "epoch": 0.02, + "learning_rate": 4.994609353807185e-05, + "loss": 1.2023, + "step": 21005 + }, + { + "epoch": 0.02, + "learning_rate": 4.994608838252362e-05, + "loss": 1.3537, + "step": 21006 + }, + { + "epoch": 0.02, + "learning_rate": 4.994608322672914e-05, + "loss": 1.1081, + "step": 21007 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946078070688395e-05, + "loss": 1.1041, + "step": 21008 + }, + { + "epoch": 0.02, + "learning_rate": 4.99460729144014e-05, + "loss": 0.9523, + "step": 21009 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946067757868136e-05, + "loss": 1.0675, + "step": 21010 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946062601088625e-05, + "loss": 0.8807, + "step": 21011 + }, + { + "epoch": 0.02, + "learning_rate": 4.994605744406285e-05, + "loss": 1.1967, + "step": 21012 + }, + { + "epoch": 0.02, + "learning_rate": 4.994605228679082e-05, + "loss": 1.1674, + "step": 21013 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946047129272536e-05, + "loss": 1.2564, + "step": 21014 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946041971507994e-05, + "loss": 0.8709, + "step": 21015 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946036813497196e-05, + "loss": 0.9843, + "step": 21016 + }, + { + "epoch": 0.02, + "learning_rate": 4.994603165524014e-05, + "loss": 1.355, + "step": 21017 + }, + { + "epoch": 0.02, + "learning_rate": 4.994602649673683e-05, + "loss": 1.0709, + "step": 21018 + }, + { + "epoch": 0.02, + "learning_rate": 4.994602133798726e-05, + "loss": 1.2502, + "step": 21019 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946016178991434e-05, + "loss": 0.9942, + "step": 21020 + }, + { + "epoch": 0.02, + "learning_rate": 4.994601101974935e-05, + "loss": 1.094, + "step": 21021 + }, + { + "epoch": 0.02, + "learning_rate": 4.994600586026101e-05, + "loss": 1.4539, + "step": 21022 + }, + { + "epoch": 0.02, + "learning_rate": 4.994600070052642e-05, + "loss": 0.8906, + "step": 21023 + }, + { + "epoch": 0.02, + "learning_rate": 4.994599554054557e-05, + "loss": 1.0852, + "step": 21024 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945990380318456e-05, + "loss": 1.0328, + "step": 21025 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945985219845096e-05, + "loss": 1.1608, + "step": 21026 + }, + { + "epoch": 0.02, + "learning_rate": 4.994598005912547e-05, + "loss": 0.935, + "step": 21027 + }, + { + "epoch": 0.02, + "learning_rate": 4.99459748981596e-05, + "loss": 1.1968, + "step": 21028 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945969736947464e-05, + "loss": 1.0615, + "step": 21029 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945964575489066e-05, + "loss": 0.7513, + "step": 21030 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945959413784425e-05, + "loss": 1.2473, + "step": 21031 + }, + { + "epoch": 0.02, + "learning_rate": 4.994595425183353e-05, + "loss": 1.0366, + "step": 21032 + }, + { + "epoch": 0.02, + "learning_rate": 4.994594908963637e-05, + "loss": 0.4786, + "step": 21033 + }, + { + "epoch": 0.02, + "learning_rate": 4.994594392719296e-05, + "loss": 1.1505, + "step": 21034 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945938764503284e-05, + "loss": 1.268, + "step": 21035 + }, + { + "epoch": 0.02, + "learning_rate": 4.994593360156736e-05, + "loss": 1.0142, + "step": 21036 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945928438385184e-05, + "loss": 0.8381, + "step": 21037 + }, + { + "epoch": 0.02, + "learning_rate": 4.994592327495675e-05, + "loss": 1.4257, + "step": 21038 + }, + { + "epoch": 0.02, + "learning_rate": 4.994591811128205e-05, + "loss": 1.1497, + "step": 21039 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945912947361105e-05, + "loss": 1.245, + "step": 21040 + }, + { + "epoch": 0.02, + "learning_rate": 4.99459077831939e-05, + "loss": 0.8979, + "step": 21041 + }, + { + "epoch": 0.02, + "learning_rate": 4.994590261878045e-05, + "loss": 0.8957, + "step": 21042 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945897454120734e-05, + "loss": 0.9957, + "step": 21043 + }, + { + "epoch": 0.02, + "learning_rate": 4.994589228921476e-05, + "loss": 1.072, + "step": 21044 + }, + { + "epoch": 0.02, + "learning_rate": 4.994588712406254e-05, + "loss": 1.0411, + "step": 21045 + }, + { + "epoch": 0.02, + "learning_rate": 4.994588195866406e-05, + "loss": 1.0436, + "step": 21046 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945876793019316e-05, + "loss": 1.1942, + "step": 21047 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945871627128333e-05, + "loss": 1.1678, + "step": 21048 + }, + { + "epoch": 0.02, + "learning_rate": 4.994586646099109e-05, + "loss": 1.0374, + "step": 21049 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945861294607585e-05, + "loss": 1.183, + "step": 21050 + }, + { + "epoch": 0.02, + "learning_rate": 4.994585612797783e-05, + "loss": 0.7339, + "step": 21051 + }, + { + "epoch": 0.02, + "learning_rate": 4.994585096110182e-05, + "loss": 0.9819, + "step": 21052 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945845793979555e-05, + "loss": 0.898, + "step": 21053 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945840626611035e-05, + "loss": 1.0127, + "step": 21054 + }, + { + "epoch": 0.02, + "learning_rate": 4.994583545899626e-05, + "loss": 0.9624, + "step": 21055 + }, + { + "epoch": 0.02, + "learning_rate": 4.994583029113523e-05, + "loss": 1.2786, + "step": 21056 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945825123027945e-05, + "loss": 1.4213, + "step": 21057 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945819954674407e-05, + "loss": 1.2429, + "step": 21058 + }, + { + "epoch": 0.02, + "learning_rate": 4.994581478607461e-05, + "loss": 1.0678, + "step": 21059 + }, + { + "epoch": 0.02, + "learning_rate": 4.994580961722857e-05, + "loss": 1.065, + "step": 21060 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945804448136255e-05, + "loss": 1.0489, + "step": 21061 + }, + { + "epoch": 0.02, + "learning_rate": 4.99457992787977e-05, + "loss": 1.2093, + "step": 21062 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945794109212893e-05, + "loss": 1.161, + "step": 21063 + }, + { + "epoch": 0.02, + "learning_rate": 4.994578893938182e-05, + "loss": 1.096, + "step": 21064 + }, + { + "epoch": 0.02, + "learning_rate": 4.994578376930451e-05, + "loss": 0.9711, + "step": 21065 + }, + { + "epoch": 0.02, + "learning_rate": 4.994577859898093e-05, + "loss": 0.6296, + "step": 21066 + }, + { + "epoch": 0.02, + "learning_rate": 4.99457734284111e-05, + "loss": 0.614, + "step": 21067 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945768257595016e-05, + "loss": 1.9061, + "step": 21068 + }, + { + "epoch": 0.02, + "learning_rate": 4.994576308653268e-05, + "loss": 1.7768, + "step": 21069 + }, + { + "epoch": 0.02, + "learning_rate": 4.994575791522409e-05, + "loss": 0.7634, + "step": 21070 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945752743669246e-05, + "loss": 1.1721, + "step": 21071 + }, + { + "epoch": 0.02, + "learning_rate": 4.994574757186815e-05, + "loss": 1.1355, + "step": 21072 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945742399820795e-05, + "loss": 0.7708, + "step": 21073 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945737227527185e-05, + "loss": 1.0259, + "step": 21074 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945732054987325e-05, + "loss": 1.265, + "step": 21075 + }, + { + "epoch": 0.02, + "learning_rate": 4.99457268822012e-05, + "loss": 0.7292, + "step": 21076 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945721709168844e-05, + "loss": 0.7803, + "step": 21077 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945716535890216e-05, + "loss": 1.243, + "step": 21078 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945711362365345e-05, + "loss": 1.183, + "step": 21079 + }, + { + "epoch": 0.02, + "learning_rate": 4.994570618859421e-05, + "loss": 1.0076, + "step": 21080 + }, + { + "epoch": 0.02, + "learning_rate": 4.994570101457683e-05, + "loss": 0.7705, + "step": 21081 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945695840313196e-05, + "loss": 1.2117, + "step": 21082 + }, + { + "epoch": 0.02, + "learning_rate": 4.99456906658033e-05, + "loss": 1.142, + "step": 21083 + }, + { + "epoch": 0.02, + "learning_rate": 4.994568549104716e-05, + "loss": 1.1821, + "step": 21084 + }, + { + "epoch": 0.02, + "learning_rate": 4.994568031604476e-05, + "loss": 1.1167, + "step": 21085 + }, + { + "epoch": 0.02, + "learning_rate": 4.994567514079612e-05, + "loss": 1.0798, + "step": 21086 + }, + { + "epoch": 0.02, + "learning_rate": 4.994566996530121e-05, + "loss": 0.9731, + "step": 21087 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945664789560054e-05, + "loss": 1.0906, + "step": 21088 + }, + { + "epoch": 0.02, + "learning_rate": 4.994565961357265e-05, + "loss": 1.0388, + "step": 21089 + }, + { + "epoch": 0.02, + "learning_rate": 4.994565443733899e-05, + "loss": 1.334, + "step": 21090 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945649260859076e-05, + "loss": 1.0248, + "step": 21091 + }, + { + "epoch": 0.02, + "learning_rate": 4.99456440841329e-05, + "loss": 1.0796, + "step": 21092 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945638907160485e-05, + "loss": 1.0, + "step": 21093 + }, + { + "epoch": 0.02, + "learning_rate": 4.994563372994181e-05, + "loss": 1.1476, + "step": 21094 + }, + { + "epoch": 0.02, + "learning_rate": 4.994562855247689e-05, + "loss": 1.2354, + "step": 21095 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945623374765704e-05, + "loss": 1.214, + "step": 21096 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945618196808276e-05, + "loss": 1.0097, + "step": 21097 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945613018604585e-05, + "loss": 1.1555, + "step": 21098 + }, + { + "epoch": 0.02, + "learning_rate": 4.994560784015465e-05, + "loss": 0.9204, + "step": 21099 + }, + { + "epoch": 0.02, + "learning_rate": 4.994560266145846e-05, + "loss": 1.1744, + "step": 21100 + }, + { + "epoch": 0.02, + "learning_rate": 4.994559748251602e-05, + "loss": 1.0138, + "step": 21101 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945592303327326e-05, + "loss": 1.0485, + "step": 21102 + }, + { + "epoch": 0.02, + "learning_rate": 4.994558712389238e-05, + "loss": 1.0806, + "step": 21103 + }, + { + "epoch": 0.02, + "learning_rate": 4.994558194421119e-05, + "loss": 1.2148, + "step": 21104 + }, + { + "epoch": 0.02, + "learning_rate": 4.994557676428373e-05, + "loss": 0.9992, + "step": 21105 + }, + { + "epoch": 0.02, + "learning_rate": 4.994557158411003e-05, + "loss": 0.9681, + "step": 21106 + }, + { + "epoch": 0.02, + "learning_rate": 4.994556640369007e-05, + "loss": 0.9423, + "step": 21107 + }, + { + "epoch": 0.02, + "learning_rate": 4.994556122302386e-05, + "loss": 1.0427, + "step": 21108 + }, + { + "epoch": 0.02, + "learning_rate": 4.99455560421114e-05, + "loss": 1.0163, + "step": 21109 + }, + { + "epoch": 0.02, + "learning_rate": 4.994555086095269e-05, + "loss": 0.9803, + "step": 21110 + }, + { + "epoch": 0.02, + "learning_rate": 4.994554567954772e-05, + "loss": 0.6944, + "step": 21111 + }, + { + "epoch": 0.02, + "learning_rate": 4.994554049789651e-05, + "loss": 1.1024, + "step": 21112 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945535315999045e-05, + "loss": 1.3173, + "step": 21113 + }, + { + "epoch": 0.02, + "learning_rate": 4.994553013385532e-05, + "loss": 1.1826, + "step": 21114 + }, + { + "epoch": 0.02, + "learning_rate": 4.994552495146535e-05, + "loss": 1.214, + "step": 21115 + }, + { + "epoch": 0.02, + "learning_rate": 4.994551976882913e-05, + "loss": 1.0601, + "step": 21116 + }, + { + "epoch": 0.02, + "learning_rate": 4.994551458594665e-05, + "loss": 1.2321, + "step": 21117 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945509402817925e-05, + "loss": 1.2789, + "step": 21118 + }, + { + "epoch": 0.02, + "learning_rate": 4.994550421944295e-05, + "loss": 1.4251, + "step": 21119 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945499035821717e-05, + "loss": 1.2614, + "step": 21120 + }, + { + "epoch": 0.02, + "learning_rate": 4.994549385195424e-05, + "loss": 1.2809, + "step": 21121 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945488667840503e-05, + "loss": 0.9847, + "step": 21122 + }, + { + "epoch": 0.02, + "learning_rate": 4.994548348348053e-05, + "loss": 1.2301, + "step": 21123 + }, + { + "epoch": 0.02, + "learning_rate": 4.994547829887429e-05, + "loss": 0.8903, + "step": 21124 + }, + { + "epoch": 0.02, + "learning_rate": 4.99454731140218e-05, + "loss": 0.8109, + "step": 21125 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945467928923064e-05, + "loss": 1.1551, + "step": 21126 + }, + { + "epoch": 0.02, + "learning_rate": 4.994546274357808e-05, + "loss": 0.5476, + "step": 21127 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945457557986844e-05, + "loss": 0.5172, + "step": 21128 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945452372149347e-05, + "loss": 0.2947, + "step": 21129 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945447186065606e-05, + "loss": 0.8786, + "step": 21130 + }, + { + "epoch": 0.02, + "learning_rate": 4.994544199973561e-05, + "loss": 0.7853, + "step": 21131 + }, + { + "epoch": 0.02, + "learning_rate": 4.994543681315937e-05, + "loss": 1.1334, + "step": 21132 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945431626336876e-05, + "loss": 0.8781, + "step": 21133 + }, + { + "epoch": 0.02, + "learning_rate": 4.994542643926813e-05, + "loss": 1.1763, + "step": 21134 + }, + { + "epoch": 0.02, + "learning_rate": 4.994542125195314e-05, + "loss": 1.0743, + "step": 21135 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945416064391894e-05, + "loss": 1.3162, + "step": 21136 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945410876584394e-05, + "loss": 1.4024, + "step": 21137 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945405688530645e-05, + "loss": 1.2238, + "step": 21138 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945400500230646e-05, + "loss": 1.0513, + "step": 21139 + }, + { + "epoch": 0.02, + "learning_rate": 4.99453953116844e-05, + "loss": 0.9626, + "step": 21140 + }, + { + "epoch": 0.02, + "learning_rate": 4.99453901228919e-05, + "loss": 1.3468, + "step": 21141 + }, + { + "epoch": 0.02, + "learning_rate": 4.994538493385315e-05, + "loss": 1.1838, + "step": 21142 + }, + { + "epoch": 0.02, + "learning_rate": 4.994537974456815e-05, + "loss": 1.0936, + "step": 21143 + }, + { + "epoch": 0.02, + "learning_rate": 4.994537455503689e-05, + "loss": 0.8794, + "step": 21144 + }, + { + "epoch": 0.02, + "learning_rate": 4.99453693652594e-05, + "loss": 0.962, + "step": 21145 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945364175235646e-05, + "loss": 0.8169, + "step": 21146 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945358984965645e-05, + "loss": 1.2008, + "step": 21147 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945353794449395e-05, + "loss": 1.0444, + "step": 21148 + }, + { + "epoch": 0.02, + "learning_rate": 4.994534860368689e-05, + "loss": 1.0054, + "step": 21149 + }, + { + "epoch": 0.02, + "learning_rate": 4.994534341267815e-05, + "loss": 1.4666, + "step": 21150 + }, + { + "epoch": 0.02, + "learning_rate": 4.994533822142314e-05, + "loss": 1.2751, + "step": 21151 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945333029921894e-05, + "loss": 1.3218, + "step": 21152 + }, + { + "epoch": 0.02, + "learning_rate": 4.99453278381744e-05, + "loss": 1.28, + "step": 21153 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945322646180644e-05, + "loss": 1.2512, + "step": 21154 + }, + { + "epoch": 0.02, + "learning_rate": 4.994531745394064e-05, + "loss": 1.118, + "step": 21155 + }, + { + "epoch": 0.02, + "learning_rate": 4.994531226145439e-05, + "loss": 1.2186, + "step": 21156 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945307068721894e-05, + "loss": 1.0428, + "step": 21157 + }, + { + "epoch": 0.02, + "learning_rate": 4.994530187574314e-05, + "loss": 1.0928, + "step": 21158 + }, + { + "epoch": 0.02, + "learning_rate": 4.994529668251815e-05, + "loss": 1.1575, + "step": 21159 + }, + { + "epoch": 0.02, + "learning_rate": 4.99452914890469e-05, + "loss": 1.1642, + "step": 21160 + }, + { + "epoch": 0.02, + "learning_rate": 4.994528629532941e-05, + "loss": 1.0627, + "step": 21161 + }, + { + "epoch": 0.02, + "learning_rate": 4.994528110136566e-05, + "loss": 0.9902, + "step": 21162 + }, + { + "epoch": 0.02, + "learning_rate": 4.994527590715567e-05, + "loss": 1.2233, + "step": 21163 + }, + { + "epoch": 0.02, + "learning_rate": 4.994527071269941e-05, + "loss": 0.5616, + "step": 21164 + }, + { + "epoch": 0.02, + "learning_rate": 4.994526551799692e-05, + "loss": 0.7776, + "step": 21165 + }, + { + "epoch": 0.02, + "learning_rate": 4.994526032304818e-05, + "loss": 1.1388, + "step": 21166 + }, + { + "epoch": 0.02, + "learning_rate": 4.994525512785319e-05, + "loss": 1.1785, + "step": 21167 + }, + { + "epoch": 0.02, + "learning_rate": 4.994524993241194e-05, + "loss": 1.2019, + "step": 21168 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945244736724454e-05, + "loss": 0.9579, + "step": 21169 + }, + { + "epoch": 0.02, + "learning_rate": 4.994523954079072e-05, + "loss": 1.1858, + "step": 21170 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945234344610724e-05, + "loss": 1.0865, + "step": 21171 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945229148184494e-05, + "loss": 1.0308, + "step": 21172 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945223951512e-05, + "loss": 1.0123, + "step": 21173 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945218754593274e-05, + "loss": 0.9677, + "step": 21174 + }, + { + "epoch": 0.02, + "learning_rate": 4.994521355742828e-05, + "loss": 1.0348, + "step": 21175 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945208360017056e-05, + "loss": 1.0258, + "step": 21176 + }, + { + "epoch": 0.02, + "learning_rate": 4.994520316235958e-05, + "loss": 1.04, + "step": 21177 + }, + { + "epoch": 0.02, + "learning_rate": 4.994519796445585e-05, + "loss": 0.9936, + "step": 21178 + }, + { + "epoch": 0.02, + "learning_rate": 4.994519276630587e-05, + "loss": 0.9591, + "step": 21179 + }, + { + "epoch": 0.02, + "learning_rate": 4.994518756790965e-05, + "loss": 0.9519, + "step": 21180 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945182369267166e-05, + "loss": 0.8666, + "step": 21181 + }, + { + "epoch": 0.02, + "learning_rate": 4.994517717037844e-05, + "loss": 1.0334, + "step": 21182 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945171971243476e-05, + "loss": 0.8654, + "step": 21183 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945166771862254e-05, + "loss": 0.85, + "step": 21184 + }, + { + "epoch": 0.02, + "learning_rate": 4.994516157223479e-05, + "loss": 0.9523, + "step": 21185 + }, + { + "epoch": 0.02, + "learning_rate": 4.994515637236107e-05, + "loss": 0.8205, + "step": 21186 + }, + { + "epoch": 0.02, + "learning_rate": 4.994515117224111e-05, + "loss": 0.9746, + "step": 21187 + }, + { + "epoch": 0.02, + "learning_rate": 4.99451459718749e-05, + "loss": 0.9048, + "step": 21188 + }, + { + "epoch": 0.02, + "learning_rate": 4.994514077126244e-05, + "loss": 0.8659, + "step": 21189 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945135570403736e-05, + "loss": 0.9844, + "step": 21190 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945130369298775e-05, + "loss": 0.8789, + "step": 21191 + }, + { + "epoch": 0.02, + "learning_rate": 4.994512516794757e-05, + "loss": 0.7788, + "step": 21192 + }, + { + "epoch": 0.02, + "learning_rate": 4.994511996635012e-05, + "loss": 0.9424, + "step": 21193 + }, + { + "epoch": 0.02, + "learning_rate": 4.994511476450642e-05, + "loss": 0.8791, + "step": 21194 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945109562416475e-05, + "loss": 0.9264, + "step": 21195 + }, + { + "epoch": 0.02, + "learning_rate": 4.994510436008029e-05, + "loss": 0.9351, + "step": 21196 + }, + { + "epoch": 0.02, + "learning_rate": 4.994509915749784e-05, + "loss": 0.8969, + "step": 21197 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945093954669156e-05, + "loss": 0.8645, + "step": 21198 + }, + { + "epoch": 0.02, + "learning_rate": 4.994508875159421e-05, + "loss": 0.8501, + "step": 21199 + }, + { + "epoch": 0.02, + "learning_rate": 4.994508354827303e-05, + "loss": 0.8759, + "step": 21200 + }, + { + "epoch": 0.02, + "learning_rate": 4.99450783447056e-05, + "loss": 1.0809, + "step": 21201 + }, + { + "epoch": 0.02, + "learning_rate": 4.994507314089192e-05, + "loss": 1.0119, + "step": 21202 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945067936832e-05, + "loss": 0.9412, + "step": 21203 + }, + { + "epoch": 0.02, + "learning_rate": 4.994506273252583e-05, + "loss": 0.9274, + "step": 21204 + }, + { + "epoch": 0.02, + "learning_rate": 4.99450575279734e-05, + "loss": 0.9335, + "step": 21205 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945052323174734e-05, + "loss": 0.8489, + "step": 21206 + }, + { + "epoch": 0.02, + "learning_rate": 4.994504711812982e-05, + "loss": 0.8045, + "step": 21207 + }, + { + "epoch": 0.02, + "learning_rate": 4.994504191283866e-05, + "loss": 0.9587, + "step": 21208 + }, + { + "epoch": 0.02, + "learning_rate": 4.994503670730125e-05, + "loss": 0.8599, + "step": 21209 + }, + { + "epoch": 0.02, + "learning_rate": 4.99450315015176e-05, + "loss": 0.7227, + "step": 21210 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945026295487696e-05, + "loss": 0.8593, + "step": 21211 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945021089211544e-05, + "loss": 0.3204, + "step": 21212 + }, + { + "epoch": 0.02, + "learning_rate": 4.994501588268915e-05, + "loss": 0.9194, + "step": 21213 + }, + { + "epoch": 0.02, + "learning_rate": 4.994501067592051e-05, + "loss": 1.0652, + "step": 21214 + }, + { + "epoch": 0.02, + "learning_rate": 4.994500546890562e-05, + "loss": 1.0129, + "step": 21215 + }, + { + "epoch": 0.02, + "learning_rate": 4.994500026164449e-05, + "loss": 0.8645, + "step": 21216 + }, + { + "epoch": 0.02, + "learning_rate": 4.99449950541371e-05, + "loss": 0.9588, + "step": 21217 + }, + { + "epoch": 0.02, + "learning_rate": 4.994498984638347e-05, + "loss": 1.009, + "step": 21218 + }, + { + "epoch": 0.02, + "learning_rate": 4.99449846383836e-05, + "loss": 0.9509, + "step": 21219 + }, + { + "epoch": 0.02, + "learning_rate": 4.994497943013748e-05, + "loss": 0.9426, + "step": 21220 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944974221645116e-05, + "loss": 0.8943, + "step": 21221 + }, + { + "epoch": 0.02, + "learning_rate": 4.99449690129065e-05, + "loss": 0.9066, + "step": 21222 + }, + { + "epoch": 0.02, + "learning_rate": 4.994496380392164e-05, + "loss": 1.6434, + "step": 21223 + }, + { + "epoch": 0.02, + "learning_rate": 4.994495859469053e-05, + "loss": 0.8702, + "step": 21224 + }, + { + "epoch": 0.02, + "learning_rate": 4.994495338521318e-05, + "loss": 0.918, + "step": 21225 + }, + { + "epoch": 0.02, + "learning_rate": 4.994494817548958e-05, + "loss": 0.8908, + "step": 21226 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944942965519736e-05, + "loss": 0.8662, + "step": 21227 + }, + { + "epoch": 0.02, + "learning_rate": 4.994493775530365e-05, + "loss": 0.9552, + "step": 21228 + }, + { + "epoch": 0.02, + "learning_rate": 4.994493254484131e-05, + "loss": 1.226, + "step": 21229 + }, + { + "epoch": 0.02, + "learning_rate": 4.994492733413273e-05, + "loss": 1.2979, + "step": 21230 + }, + { + "epoch": 0.02, + "learning_rate": 4.99449221231779e-05, + "loss": 1.03, + "step": 21231 + }, + { + "epoch": 0.02, + "learning_rate": 4.994491691197683e-05, + "loss": 1.143, + "step": 21232 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944911700529506e-05, + "loss": 0.8046, + "step": 21233 + }, + { + "epoch": 0.02, + "learning_rate": 4.994490648883594e-05, + "loss": 1.08, + "step": 21234 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944901276896134e-05, + "loss": 0.9966, + "step": 21235 + }, + { + "epoch": 0.02, + "learning_rate": 4.994489606471008e-05, + "loss": 0.5478, + "step": 21236 + }, + { + "epoch": 0.02, + "learning_rate": 4.994489085227778e-05, + "loss": 0.4291, + "step": 21237 + }, + { + "epoch": 0.02, + "learning_rate": 4.994488563959922e-05, + "loss": 1.1664, + "step": 21238 + }, + { + "epoch": 0.02, + "learning_rate": 4.994488042667443e-05, + "loss": 0.7748, + "step": 21239 + }, + { + "epoch": 0.02, + "learning_rate": 4.99448752135034e-05, + "loss": 1.1067, + "step": 21240 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944870000086114e-05, + "loss": 1.3567, + "step": 21241 + }, + { + "epoch": 0.02, + "learning_rate": 4.994486478642259e-05, + "loss": 0.7858, + "step": 21242 + }, + { + "epoch": 0.02, + "learning_rate": 4.994485957251281e-05, + "loss": 0.9862, + "step": 21243 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944854358356795e-05, + "loss": 0.9686, + "step": 21244 + }, + { + "epoch": 0.02, + "learning_rate": 4.994484914395453e-05, + "loss": 1.1012, + "step": 21245 + }, + { + "epoch": 0.02, + "learning_rate": 4.994484392930602e-05, + "loss": 0.9071, + "step": 21246 + }, + { + "epoch": 0.02, + "learning_rate": 4.994483871441127e-05, + "loss": 0.9202, + "step": 21247 + }, + { + "epoch": 0.02, + "learning_rate": 4.994483349927027e-05, + "loss": 0.7442, + "step": 21248 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944828283883024e-05, + "loss": 0.7375, + "step": 21249 + }, + { + "epoch": 0.02, + "learning_rate": 4.994482306824954e-05, + "loss": 0.6829, + "step": 21250 + }, + { + "epoch": 0.02, + "learning_rate": 4.99448178523698e-05, + "loss": 0.8778, + "step": 21251 + }, + { + "epoch": 0.02, + "learning_rate": 4.994481263624382e-05, + "loss": 0.9076, + "step": 21252 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944807419871605e-05, + "loss": 0.782, + "step": 21253 + }, + { + "epoch": 0.02, + "learning_rate": 4.994480220325314e-05, + "loss": 0.7479, + "step": 21254 + }, + { + "epoch": 0.02, + "learning_rate": 4.994479698638842e-05, + "loss": 0.7892, + "step": 21255 + }, + { + "epoch": 0.02, + "learning_rate": 4.994479176927746e-05, + "loss": 0.8916, + "step": 21256 + }, + { + "epoch": 0.02, + "learning_rate": 4.994478655192026e-05, + "loss": 0.8197, + "step": 21257 + }, + { + "epoch": 0.02, + "learning_rate": 4.994478133431682e-05, + "loss": 0.5922, + "step": 21258 + }, + { + "epoch": 0.02, + "learning_rate": 4.994477611646713e-05, + "loss": 0.6607, + "step": 21259 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944770898371195e-05, + "loss": 0.5339, + "step": 21260 + }, + { + "epoch": 0.02, + "learning_rate": 4.994476568002902e-05, + "loss": 0.6611, + "step": 21261 + }, + { + "epoch": 0.02, + "learning_rate": 4.99447604614406e-05, + "loss": 0.6938, + "step": 21262 + }, + { + "epoch": 0.02, + "learning_rate": 4.994475524260593e-05, + "loss": 0.7736, + "step": 21263 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944750023525025e-05, + "loss": 0.9005, + "step": 21264 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944744804197864e-05, + "loss": 0.6891, + "step": 21265 + }, + { + "epoch": 0.02, + "learning_rate": 4.994473958462447e-05, + "loss": 0.6536, + "step": 21266 + }, + { + "epoch": 0.02, + "learning_rate": 4.994473436480482e-05, + "loss": 0.4722, + "step": 21267 + }, + { + "epoch": 0.02, + "learning_rate": 4.994472914473893e-05, + "loss": 0.7118, + "step": 21268 + }, + { + "epoch": 0.02, + "learning_rate": 4.994472392442681e-05, + "loss": 1.021, + "step": 21269 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944718703868434e-05, + "loss": 0.9973, + "step": 21270 + }, + { + "epoch": 0.02, + "learning_rate": 4.994471348306381e-05, + "loss": 1.0374, + "step": 21271 + }, + { + "epoch": 0.02, + "learning_rate": 4.994470826201295e-05, + "loss": 1.1298, + "step": 21272 + }, + { + "epoch": 0.02, + "learning_rate": 4.994470304071585e-05, + "loss": 0.8766, + "step": 21273 + }, + { + "epoch": 0.02, + "learning_rate": 4.994469781917249e-05, + "loss": 0.9596, + "step": 21274 + }, + { + "epoch": 0.02, + "learning_rate": 4.99446925973829e-05, + "loss": 1.1949, + "step": 21275 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944687375347066e-05, + "loss": 0.8763, + "step": 21276 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944682153064987e-05, + "loss": 0.7254, + "step": 21277 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944676930536665e-05, + "loss": 0.3957, + "step": 21278 + }, + { + "epoch": 0.02, + "learning_rate": 4.99446717077621e-05, + "loss": 0.2739, + "step": 21279 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944666484741286e-05, + "loss": 0.1371, + "step": 21280 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944661261474236e-05, + "loss": 0.1692, + "step": 21281 + }, + { + "epoch": 0.02, + "learning_rate": 4.994465603796094e-05, + "loss": 0.1512, + "step": 21282 + }, + { + "epoch": 0.02, + "learning_rate": 4.99446508142014e-05, + "loss": 0.0791, + "step": 21283 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944645590195616e-05, + "loss": 0.0448, + "step": 21284 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944640365943596e-05, + "loss": 0.07, + "step": 21285 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944635141445326e-05, + "loss": 1.1381, + "step": 21286 + }, + { + "epoch": 0.02, + "learning_rate": 4.994462991670081e-05, + "loss": 0.8569, + "step": 21287 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944624691710065e-05, + "loss": 1.1895, + "step": 21288 + }, + { + "epoch": 0.02, + "learning_rate": 4.994461946647307e-05, + "loss": 1.3355, + "step": 21289 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944614240989826e-05, + "loss": 1.318, + "step": 21290 + }, + { + "epoch": 0.02, + "learning_rate": 4.994460901526034e-05, + "loss": 0.9115, + "step": 21291 + }, + { + "epoch": 0.02, + "learning_rate": 4.994460378928462e-05, + "loss": 1.0316, + "step": 21292 + }, + { + "epoch": 0.02, + "learning_rate": 4.994459856306265e-05, + "loss": 1.1097, + "step": 21293 + }, + { + "epoch": 0.02, + "learning_rate": 4.994459333659444e-05, + "loss": 1.0897, + "step": 21294 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944588109879984e-05, + "loss": 1.1458, + "step": 21295 + }, + { + "epoch": 0.02, + "learning_rate": 4.994458288291929e-05, + "loss": 0.9417, + "step": 21296 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944577655712355e-05, + "loss": 1.0377, + "step": 21297 + }, + { + "epoch": 0.02, + "learning_rate": 4.994457242825917e-05, + "loss": 1.9576, + "step": 21298 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944567200559756e-05, + "loss": 1.3086, + "step": 21299 + }, + { + "epoch": 0.02, + "learning_rate": 4.994456197261409e-05, + "loss": 1.1091, + "step": 21300 + }, + { + "epoch": 0.02, + "learning_rate": 4.994455674442218e-05, + "loss": 1.067, + "step": 21301 + }, + { + "epoch": 0.02, + "learning_rate": 4.994455151598403e-05, + "loss": 1.6588, + "step": 21302 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944546287299646e-05, + "loss": 1.4143, + "step": 21303 + }, + { + "epoch": 0.02, + "learning_rate": 4.994454105836901e-05, + "loss": 1.1955, + "step": 21304 + }, + { + "epoch": 0.02, + "learning_rate": 4.994453582919213e-05, + "loss": 1.0721, + "step": 21305 + }, + { + "epoch": 0.02, + "learning_rate": 4.994453059976901e-05, + "loss": 0.9138, + "step": 21306 + }, + { + "epoch": 0.02, + "learning_rate": 4.994452537009966e-05, + "loss": 0.7385, + "step": 21307 + }, + { + "epoch": 0.02, + "learning_rate": 4.994452014018406e-05, + "loss": 0.9718, + "step": 21308 + }, + { + "epoch": 0.02, + "learning_rate": 4.994451491002221e-05, + "loss": 0.5418, + "step": 21309 + }, + { + "epoch": 0.02, + "learning_rate": 4.994450967961413e-05, + "loss": 0.8399, + "step": 21310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944504448959805e-05, + "loss": 0.9506, + "step": 21311 + }, + { + "epoch": 0.02, + "learning_rate": 4.994449921805924e-05, + "loss": 0.9537, + "step": 21312 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944493986912426e-05, + "loss": 0.9808, + "step": 21313 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944488755519375e-05, + "loss": 1.3362, + "step": 21314 + }, + { + "epoch": 0.02, + "learning_rate": 4.994448352388008e-05, + "loss": 1.1291, + "step": 21315 + }, + { + "epoch": 0.02, + "learning_rate": 4.994447829199455e-05, + "loss": 1.224, + "step": 21316 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944473059862775e-05, + "loss": 1.323, + "step": 21317 + }, + { + "epoch": 0.02, + "learning_rate": 4.994446782748476e-05, + "loss": 1.2361, + "step": 21318 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944462594860504e-05, + "loss": 1.1337, + "step": 21319 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944457361990004e-05, + "loss": 1.1519, + "step": 21320 + }, + { + "epoch": 0.02, + "learning_rate": 4.994445212887326e-05, + "loss": 0.569, + "step": 21321 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944446895510284e-05, + "loss": 1.0727, + "step": 21322 + }, + { + "epoch": 0.02, + "learning_rate": 4.994444166190106e-05, + "loss": 1.3189, + "step": 21323 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944436428045594e-05, + "loss": 0.9177, + "step": 21324 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944431193943894e-05, + "loss": 0.7438, + "step": 21325 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944425959595946e-05, + "loss": 0.4636, + "step": 21326 + }, + { + "epoch": 0.02, + "learning_rate": 4.994442072500176e-05, + "loss": 1.1284, + "step": 21327 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944415490161334e-05, + "loss": 1.3429, + "step": 21328 + }, + { + "epoch": 0.02, + "learning_rate": 4.994441025507467e-05, + "loss": 1.1576, + "step": 21329 + }, + { + "epoch": 0.02, + "learning_rate": 4.994440501974176e-05, + "loss": 1.081, + "step": 21330 + }, + { + "epoch": 0.02, + "learning_rate": 4.994439978416261e-05, + "loss": 1.3083, + "step": 21331 + }, + { + "epoch": 0.02, + "learning_rate": 4.994439454833722e-05, + "loss": 1.1263, + "step": 21332 + }, + { + "epoch": 0.02, + "learning_rate": 4.994438931226559e-05, + "loss": 1.2432, + "step": 21333 + }, + { + "epoch": 0.02, + "learning_rate": 4.994438407594772e-05, + "loss": 0.8692, + "step": 21334 + }, + { + "epoch": 0.02, + "learning_rate": 4.994437883938361e-05, + "loss": 0.9011, + "step": 21335 + }, + { + "epoch": 0.02, + "learning_rate": 4.994437360257326e-05, + "loss": 0.619, + "step": 21336 + }, + { + "epoch": 0.02, + "learning_rate": 4.994436836551667e-05, + "loss": 1.0444, + "step": 21337 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944363128213833e-05, + "loss": 1.13, + "step": 21338 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944357890664764e-05, + "loss": 1.2477, + "step": 21339 + }, + { + "epoch": 0.02, + "learning_rate": 4.994435265286945e-05, + "loss": 1.5688, + "step": 21340 + }, + { + "epoch": 0.02, + "learning_rate": 4.99443474148279e-05, + "loss": 1.1908, + "step": 21341 + }, + { + "epoch": 0.02, + "learning_rate": 4.99443421765401e-05, + "loss": 1.0464, + "step": 21342 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944336938006075e-05, + "loss": 0.8116, + "step": 21343 + }, + { + "epoch": 0.02, + "learning_rate": 4.99443316992258e-05, + "loss": 1.2522, + "step": 21344 + }, + { + "epoch": 0.02, + "learning_rate": 4.994432646019929e-05, + "loss": 1.6597, + "step": 21345 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944321220926535e-05, + "loss": 0.7934, + "step": 21346 + }, + { + "epoch": 0.02, + "learning_rate": 4.994431598140754e-05, + "loss": 0.7231, + "step": 21347 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944310741642306e-05, + "loss": 0.8085, + "step": 21348 + }, + { + "epoch": 0.02, + "learning_rate": 4.994430550163084e-05, + "loss": 1.2041, + "step": 21349 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944300261373126e-05, + "loss": 0.9433, + "step": 21350 + }, + { + "epoch": 0.02, + "learning_rate": 4.994429502086917e-05, + "loss": 0.2579, + "step": 21351 + }, + { + "epoch": 0.02, + "learning_rate": 4.994428978011898e-05, + "loss": 1.0741, + "step": 21352 + }, + { + "epoch": 0.02, + "learning_rate": 4.994428453912256e-05, + "loss": 0.961, + "step": 21353 + }, + { + "epoch": 0.02, + "learning_rate": 4.994427929787989e-05, + "loss": 0.3312, + "step": 21354 + }, + { + "epoch": 0.02, + "learning_rate": 4.994427405639098e-05, + "loss": 1.118, + "step": 21355 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944268814655826e-05, + "loss": 1.3152, + "step": 21356 + }, + { + "epoch": 0.02, + "learning_rate": 4.994426357267444e-05, + "loss": 1.1632, + "step": 21357 + }, + { + "epoch": 0.02, + "learning_rate": 4.994425833044681e-05, + "loss": 0.8464, + "step": 21358 + }, + { + "epoch": 0.02, + "learning_rate": 4.994425308797295e-05, + "loss": 1.1619, + "step": 21359 + }, + { + "epoch": 0.02, + "learning_rate": 4.994424784525285e-05, + "loss": 0.9892, + "step": 21360 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944242602286494e-05, + "loss": 1.1089, + "step": 21361 + }, + { + "epoch": 0.02, + "learning_rate": 4.994423735907391e-05, + "loss": 1.0009, + "step": 21362 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944232115615094e-05, + "loss": 0.9223, + "step": 21363 + }, + { + "epoch": 0.02, + "learning_rate": 4.994422687191003e-05, + "loss": 1.1184, + "step": 21364 + }, + { + "epoch": 0.02, + "learning_rate": 4.994422162795873e-05, + "loss": 1.1963, + "step": 21365 + }, + { + "epoch": 0.02, + "learning_rate": 4.994421638376119e-05, + "loss": 1.1248, + "step": 21366 + }, + { + "epoch": 0.02, + "learning_rate": 4.994421113931741e-05, + "loss": 1.0048, + "step": 21367 + }, + { + "epoch": 0.02, + "learning_rate": 4.994420589462739e-05, + "loss": 0.6783, + "step": 21368 + }, + { + "epoch": 0.02, + "learning_rate": 4.994420064969114e-05, + "loss": 1.1281, + "step": 21369 + }, + { + "epoch": 0.02, + "learning_rate": 4.994419540450865e-05, + "loss": 1.1271, + "step": 21370 + }, + { + "epoch": 0.02, + "learning_rate": 4.994419015907992e-05, + "loss": 0.8529, + "step": 21371 + }, + { + "epoch": 0.02, + "learning_rate": 4.994418491340494e-05, + "loss": 1.0933, + "step": 21372 + }, + { + "epoch": 0.02, + "learning_rate": 4.994417966748374e-05, + "loss": 1.3416, + "step": 21373 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944174421316286e-05, + "loss": 0.6393, + "step": 21374 + }, + { + "epoch": 0.02, + "learning_rate": 4.99441691749026e-05, + "loss": 0.9645, + "step": 21375 + }, + { + "epoch": 0.02, + "learning_rate": 4.994416392824267e-05, + "loss": 1.0948, + "step": 21376 + }, + { + "epoch": 0.02, + "learning_rate": 4.994415868133651e-05, + "loss": 1.3385, + "step": 21377 + }, + { + "epoch": 0.02, + "learning_rate": 4.994415343418411e-05, + "loss": 1.0865, + "step": 21378 + }, + { + "epoch": 0.02, + "learning_rate": 4.994414818678547e-05, + "loss": 1.1815, + "step": 21379 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944142939140594e-05, + "loss": 1.3544, + "step": 21380 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944137691249476e-05, + "loss": 1.1664, + "step": 21381 + }, + { + "epoch": 0.02, + "learning_rate": 4.994413244311212e-05, + "loss": 1.2454, + "step": 21382 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944127194728527e-05, + "loss": 1.1638, + "step": 21383 + }, + { + "epoch": 0.02, + "learning_rate": 4.99441219460987e-05, + "loss": 1.2228, + "step": 21384 + }, + { + "epoch": 0.02, + "learning_rate": 4.994411669722263e-05, + "loss": 0.6998, + "step": 21385 + }, + { + "epoch": 0.02, + "learning_rate": 4.994411144810033e-05, + "loss": 0.9502, + "step": 21386 + }, + { + "epoch": 0.02, + "learning_rate": 4.994410619873179e-05, + "loss": 1.0736, + "step": 21387 + }, + { + "epoch": 0.02, + "learning_rate": 4.994410094911701e-05, + "loss": 1.1984, + "step": 21388 + }, + { + "epoch": 0.02, + "learning_rate": 4.994409569925599e-05, + "loss": 1.1979, + "step": 21389 + }, + { + "epoch": 0.02, + "learning_rate": 4.994409044914873e-05, + "loss": 0.9893, + "step": 21390 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944085198795245e-05, + "loss": 1.0458, + "step": 21391 + }, + { + "epoch": 0.02, + "learning_rate": 4.994407994819551e-05, + "loss": 0.8051, + "step": 21392 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944074697349544e-05, + "loss": 0.941, + "step": 21393 + }, + { + "epoch": 0.02, + "learning_rate": 4.994406944625733e-05, + "loss": 1.0936, + "step": 21394 + }, + { + "epoch": 0.02, + "learning_rate": 4.994406419491889e-05, + "loss": 1.3327, + "step": 21395 + }, + { + "epoch": 0.02, + "learning_rate": 4.994405894333421e-05, + "loss": 1.1432, + "step": 21396 + }, + { + "epoch": 0.02, + "learning_rate": 4.994405369150329e-05, + "loss": 1.0211, + "step": 21397 + }, + { + "epoch": 0.02, + "learning_rate": 4.994404843942614e-05, + "loss": 1.0848, + "step": 21398 + }, + { + "epoch": 0.02, + "learning_rate": 4.994404318710275e-05, + "loss": 1.3489, + "step": 21399 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944037934533114e-05, + "loss": 1.2683, + "step": 21400 + }, + { + "epoch": 0.02, + "learning_rate": 4.994403268171725e-05, + "loss": 1.0208, + "step": 21401 + }, + { + "epoch": 0.02, + "learning_rate": 4.994402742865515e-05, + "loss": 1.1155, + "step": 21402 + }, + { + "epoch": 0.02, + "learning_rate": 4.9944022175346806e-05, + "loss": 1.0715, + "step": 21403 + }, + { + "epoch": 0.02, + "learning_rate": 4.994401692179223e-05, + "loss": 1.2818, + "step": 21404 + }, + { + "epoch": 0.02, + "learning_rate": 4.994401166799142e-05, + "loss": 1.1193, + "step": 21405 + }, + { + "epoch": 0.02, + "learning_rate": 4.994400641394437e-05, + "loss": 1.0586, + "step": 21406 + }, + { + "epoch": 0.02, + "learning_rate": 4.994400115965108e-05, + "loss": 0.8666, + "step": 21407 + }, + { + "epoch": 0.02, + "learning_rate": 4.994399590511155e-05, + "loss": 1.2661, + "step": 21408 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943990650325796e-05, + "loss": 0.9074, + "step": 21409 + }, + { + "epoch": 0.02, + "learning_rate": 4.99439853952938e-05, + "loss": 0.8745, + "step": 21410 + }, + { + "epoch": 0.02, + "learning_rate": 4.994398014001557e-05, + "loss": 1.0076, + "step": 21411 + }, + { + "epoch": 0.02, + "learning_rate": 4.994397488449109e-05, + "loss": 1.0941, + "step": 21412 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943969628720387e-05, + "loss": 1.1989, + "step": 21413 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943964372703445e-05, + "loss": 1.2982, + "step": 21414 + }, + { + "epoch": 0.02, + "learning_rate": 4.994395911644027e-05, + "loss": 1.0213, + "step": 21415 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943953859930846e-05, + "loss": 1.0046, + "step": 21416 + }, + { + "epoch": 0.02, + "learning_rate": 4.99439486031752e-05, + "loss": 1.0804, + "step": 21417 + }, + { + "epoch": 0.02, + "learning_rate": 4.994394334617331e-05, + "loss": 1.077, + "step": 21418 + }, + { + "epoch": 0.02, + "learning_rate": 4.994393808892519e-05, + "loss": 1.0331, + "step": 21419 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943932831430826e-05, + "loss": 1.1398, + "step": 21420 + }, + { + "epoch": 0.02, + "learning_rate": 4.994392757369023e-05, + "loss": 0.9519, + "step": 21421 + }, + { + "epoch": 0.02, + "learning_rate": 4.99439223157034e-05, + "loss": 0.9076, + "step": 21422 + }, + { + "epoch": 0.02, + "learning_rate": 4.994391705747033e-05, + "loss": 1.0135, + "step": 21423 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943911798991025e-05, + "loss": 1.2365, + "step": 21424 + }, + { + "epoch": 0.02, + "learning_rate": 4.994390654026549e-05, + "loss": 1.1807, + "step": 21425 + }, + { + "epoch": 0.02, + "learning_rate": 4.994390128129372e-05, + "loss": 0.921, + "step": 21426 + }, + { + "epoch": 0.02, + "learning_rate": 4.99438960220757e-05, + "loss": 1.1299, + "step": 21427 + }, + { + "epoch": 0.02, + "learning_rate": 4.994389076261146e-05, + "loss": 0.8181, + "step": 21428 + }, + { + "epoch": 0.02, + "learning_rate": 4.994388550290098e-05, + "loss": 1.1476, + "step": 21429 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943880242944255e-05, + "loss": 1.2777, + "step": 21430 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943874982741304e-05, + "loss": 1.1631, + "step": 21431 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943869722292116e-05, + "loss": 1.1281, + "step": 21432 + }, + { + "epoch": 0.02, + "learning_rate": 4.994386446159669e-05, + "loss": 1.5449, + "step": 21433 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943859200655027e-05, + "loss": 1.1031, + "step": 21434 + }, + { + "epoch": 0.02, + "learning_rate": 4.994385393946714e-05, + "loss": 1.1158, + "step": 21435 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943848678033014e-05, + "loss": 1.04, + "step": 21436 + }, + { + "epoch": 0.02, + "learning_rate": 4.994384341635264e-05, + "loss": 0.9918, + "step": 21437 + }, + { + "epoch": 0.02, + "learning_rate": 4.994383815442605e-05, + "loss": 1.0878, + "step": 21438 + }, + { + "epoch": 0.02, + "learning_rate": 4.994383289225321e-05, + "loss": 1.1274, + "step": 21439 + }, + { + "epoch": 0.02, + "learning_rate": 4.994382762983414e-05, + "loss": 1.1882, + "step": 21440 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943822367168835e-05, + "loss": 1.0224, + "step": 21441 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943817104257295e-05, + "loss": 1.0642, + "step": 21442 + }, + { + "epoch": 0.02, + "learning_rate": 4.994381184109952e-05, + "loss": 1.0736, + "step": 21443 + }, + { + "epoch": 0.02, + "learning_rate": 4.994380657769551e-05, + "loss": 1.015, + "step": 21444 + }, + { + "epoch": 0.02, + "learning_rate": 4.994380131404527e-05, + "loss": 1.1421, + "step": 21445 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943796050148786e-05, + "loss": 0.9571, + "step": 21446 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943790786006074e-05, + "loss": 1.1894, + "step": 21447 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943785521617126e-05, + "loss": 1.0957, + "step": 21448 + }, + { + "epoch": 0.02, + "learning_rate": 4.994378025698194e-05, + "loss": 0.5327, + "step": 21449 + }, + { + "epoch": 0.02, + "learning_rate": 4.994377499210053e-05, + "loss": 1.0091, + "step": 21450 + }, + { + "epoch": 0.02, + "learning_rate": 4.994376972697288e-05, + "loss": 1.0431, + "step": 21451 + }, + { + "epoch": 0.02, + "learning_rate": 4.994376446159899e-05, + "loss": 1.2426, + "step": 21452 + }, + { + "epoch": 0.02, + "learning_rate": 4.994375919597887e-05, + "loss": 1.1802, + "step": 21453 + }, + { + "epoch": 0.02, + "learning_rate": 4.994375393011251e-05, + "loss": 0.9929, + "step": 21454 + }, + { + "epoch": 0.02, + "learning_rate": 4.994374866399992e-05, + "loss": 1.2309, + "step": 21455 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943743397641106e-05, + "loss": 1.2299, + "step": 21456 + }, + { + "epoch": 0.02, + "learning_rate": 4.994373813103604e-05, + "loss": 1.1763, + "step": 21457 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943732864184756e-05, + "loss": 1.1846, + "step": 21458 + }, + { + "epoch": 0.02, + "learning_rate": 4.994372759708723e-05, + "loss": 1.2441, + "step": 21459 + }, + { + "epoch": 0.02, + "learning_rate": 4.994372232974347e-05, + "loss": 1.2999, + "step": 21460 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943717062153476e-05, + "loss": 0.8639, + "step": 21461 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943711794317246e-05, + "loss": 0.8381, + "step": 21462 + }, + { + "epoch": 0.02, + "learning_rate": 4.994370652623479e-05, + "loss": 1.1953, + "step": 21463 + }, + { + "epoch": 0.02, + "learning_rate": 4.994370125790609e-05, + "loss": 1.1012, + "step": 21464 + }, + { + "epoch": 0.02, + "learning_rate": 4.994369598933117e-05, + "loss": 1.0871, + "step": 21465 + }, + { + "epoch": 0.02, + "learning_rate": 4.994369072051e-05, + "loss": 0.8221, + "step": 21466 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943685451442614e-05, + "loss": 1.1163, + "step": 21467 + }, + { + "epoch": 0.02, + "learning_rate": 4.994368018212898e-05, + "loss": 1.0373, + "step": 21468 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943674912569116e-05, + "loss": 1.0757, + "step": 21469 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943669642763027e-05, + "loss": 0.9568, + "step": 21470 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943664372710695e-05, + "loss": 1.0369, + "step": 21471 + }, + { + "epoch": 0.02, + "learning_rate": 4.994365910241213e-05, + "loss": 1.0439, + "step": 21472 + }, + { + "epoch": 0.02, + "learning_rate": 4.994365383186733e-05, + "loss": 1.3323, + "step": 21473 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943648561076303e-05, + "loss": 2.0937, + "step": 21474 + }, + { + "epoch": 0.02, + "learning_rate": 4.994364329003904e-05, + "loss": 1.8081, + "step": 21475 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943638018755544e-05, + "loss": 1.2279, + "step": 21476 + }, + { + "epoch": 0.02, + "learning_rate": 4.994363274722582e-05, + "loss": 1.3869, + "step": 21477 + }, + { + "epoch": 0.02, + "learning_rate": 4.994362747544986e-05, + "loss": 1.3084, + "step": 21478 + }, + { + "epoch": 0.02, + "learning_rate": 4.994362220342766e-05, + "loss": 1.1648, + "step": 21479 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943616931159234e-05, + "loss": 1.0312, + "step": 21480 + }, + { + "epoch": 0.02, + "learning_rate": 4.994361165864457e-05, + "loss": 0.9607, + "step": 21481 + }, + { + "epoch": 0.02, + "learning_rate": 4.994360638588368e-05, + "loss": 1.1924, + "step": 21482 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943601112876556e-05, + "loss": 1.3385, + "step": 21483 + }, + { + "epoch": 0.02, + "learning_rate": 4.99435958396232e-05, + "loss": 1.1178, + "step": 21484 + }, + { + "epoch": 0.02, + "learning_rate": 4.994359056612361e-05, + "loss": 1.0333, + "step": 21485 + }, + { + "epoch": 0.02, + "learning_rate": 4.994358529237778e-05, + "loss": 0.9613, + "step": 21486 + }, + { + "epoch": 0.02, + "learning_rate": 4.994358001838573e-05, + "loss": 0.8821, + "step": 21487 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943574744147436e-05, + "loss": 0.778, + "step": 21488 + }, + { + "epoch": 0.02, + "learning_rate": 4.994356946966291e-05, + "loss": 0.8237, + "step": 21489 + }, + { + "epoch": 0.02, + "learning_rate": 4.994356419493216e-05, + "loss": 1.2051, + "step": 21490 + }, + { + "epoch": 0.02, + "learning_rate": 4.994355891995517e-05, + "loss": 1.2017, + "step": 21491 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943553644731954e-05, + "loss": 1.2945, + "step": 21492 + }, + { + "epoch": 0.02, + "learning_rate": 4.994354836926251e-05, + "loss": 1.2878, + "step": 21493 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943543093546825e-05, + "loss": 1.1788, + "step": 21494 + }, + { + "epoch": 0.02, + "learning_rate": 4.994353781758491e-05, + "loss": 0.9545, + "step": 21495 + }, + { + "epoch": 0.02, + "learning_rate": 4.994353254137676e-05, + "loss": 1.2799, + "step": 21496 + }, + { + "epoch": 0.02, + "learning_rate": 4.994352726492238e-05, + "loss": 0.9301, + "step": 21497 + }, + { + "epoch": 0.02, + "learning_rate": 4.994352198822177e-05, + "loss": 0.8946, + "step": 21498 + }, + { + "epoch": 0.02, + "learning_rate": 4.994351671127493e-05, + "loss": 0.99, + "step": 21499 + }, + { + "epoch": 0.02, + "learning_rate": 4.994351143408185e-05, + "loss": 1.1911, + "step": 21500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0204905271530151, + "eval_runtime": 82.6423, + "eval_samples_per_second": 16.759, + "eval_steps_per_second": 4.199, + "step": 21500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943506156642545e-05, + "loss": 0.8441, + "step": 21501 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943500878957e-05, + "loss": 0.92, + "step": 21502 + }, + { + "epoch": 0.02, + "learning_rate": 4.994349560102524e-05, + "loss": 1.5358, + "step": 21503 + }, + { + "epoch": 0.02, + "learning_rate": 4.994349032284723e-05, + "loss": 1.3599, + "step": 21504 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943485044423e-05, + "loss": 1.368, + "step": 21505 + }, + { + "epoch": 0.02, + "learning_rate": 4.994347976575253e-05, + "loss": 0.9406, + "step": 21506 + }, + { + "epoch": 0.02, + "learning_rate": 4.994347448683584e-05, + "loss": 1.2045, + "step": 21507 + }, + { + "epoch": 0.02, + "learning_rate": 4.994346920767291e-05, + "loss": 1.2324, + "step": 21508 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943463928263745e-05, + "loss": 1.2825, + "step": 21509 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943458648608356e-05, + "loss": 1.3985, + "step": 21510 + }, + { + "epoch": 0.02, + "learning_rate": 4.994345336870674e-05, + "loss": 1.1605, + "step": 21511 + }, + { + "epoch": 0.02, + "learning_rate": 4.994344808855888e-05, + "loss": 0.8541, + "step": 21512 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943442808164796e-05, + "loss": 1.2308, + "step": 21513 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943437527524484e-05, + "loss": 1.1285, + "step": 21514 + }, + { + "epoch": 0.02, + "learning_rate": 4.994343224663794e-05, + "loss": 1.236, + "step": 21515 + }, + { + "epoch": 0.02, + "learning_rate": 4.994342696550516e-05, + "loss": 0.999, + "step": 21516 + }, + { + "epoch": 0.02, + "learning_rate": 4.994342168412615e-05, + "loss": 0.7226, + "step": 21517 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943416402500906e-05, + "loss": 0.5034, + "step": 21518 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943411120629435e-05, + "loss": 0.744, + "step": 21519 + }, + { + "epoch": 0.02, + "learning_rate": 4.994340583851173e-05, + "loss": 0.6641, + "step": 21520 + }, + { + "epoch": 0.02, + "learning_rate": 4.99434005561478e-05, + "loss": 1.3566, + "step": 21521 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943395273537635e-05, + "loss": 1.2232, + "step": 21522 + }, + { + "epoch": 0.02, + "learning_rate": 4.994338999068124e-05, + "loss": 1.0332, + "step": 21523 + }, + { + "epoch": 0.02, + "learning_rate": 4.994338470757862e-05, + "loss": 1.7143, + "step": 21524 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943379424229766e-05, + "loss": 1.3351, + "step": 21525 + }, + { + "epoch": 0.02, + "learning_rate": 4.994337414063467e-05, + "loss": 1.1161, + "step": 21526 + }, + { + "epoch": 0.02, + "learning_rate": 4.994336885679336e-05, + "loss": 1.1575, + "step": 21527 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943363572705815e-05, + "loss": 1.1081, + "step": 21528 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943358288372034e-05, + "loss": 0.7882, + "step": 21529 + }, + { + "epoch": 0.02, + "learning_rate": 4.994335300379203e-05, + "loss": 1.0401, + "step": 21530 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943347718965796e-05, + "loss": 1.0647, + "step": 21531 + }, + { + "epoch": 0.02, + "learning_rate": 4.994334243389333e-05, + "loss": 1.048, + "step": 21532 + }, + { + "epoch": 0.02, + "learning_rate": 4.994333714857462e-05, + "loss": 1.0985, + "step": 21533 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943331863009694e-05, + "loss": 0.6156, + "step": 21534 + }, + { + "epoch": 0.02, + "learning_rate": 4.994332657719854e-05, + "loss": 1.0441, + "step": 21535 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943321291141146e-05, + "loss": 1.1048, + "step": 21536 + }, + { + "epoch": 0.02, + "learning_rate": 4.994331600483753e-05, + "loss": 0.5624, + "step": 21537 + }, + { + "epoch": 0.02, + "learning_rate": 4.994331071828768e-05, + "loss": 0.113, + "step": 21538 + }, + { + "epoch": 0.02, + "learning_rate": 4.994330543149161e-05, + "loss": 0.0646, + "step": 21539 + }, + { + "epoch": 0.02, + "learning_rate": 4.994330014444929e-05, + "loss": 0.0759, + "step": 21540 + }, + { + "epoch": 0.02, + "learning_rate": 4.994329485716076e-05, + "loss": 0.1033, + "step": 21541 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943289569625994e-05, + "loss": 0.153, + "step": 21542 + }, + { + "epoch": 0.02, + "learning_rate": 4.994328428184499e-05, + "loss": 0.4491, + "step": 21543 + }, + { + "epoch": 0.02, + "learning_rate": 4.994327899381777e-05, + "loss": 0.947, + "step": 21544 + }, + { + "epoch": 0.02, + "learning_rate": 4.994327370554431e-05, + "loss": 1.0054, + "step": 21545 + }, + { + "epoch": 0.02, + "learning_rate": 4.994326841702462e-05, + "loss": 0.9873, + "step": 21546 + }, + { + "epoch": 0.02, + "learning_rate": 4.994326312825871e-05, + "loss": 1.0444, + "step": 21547 + }, + { + "epoch": 0.02, + "learning_rate": 4.994325783924657e-05, + "loss": 1.0811, + "step": 21548 + }, + { + "epoch": 0.02, + "learning_rate": 4.99432525499882e-05, + "loss": 1.2559, + "step": 21549 + }, + { + "epoch": 0.02, + "learning_rate": 4.994324726048359e-05, + "loss": 1.1211, + "step": 21550 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943241970732757e-05, + "loss": 1.2957, + "step": 21551 + }, + { + "epoch": 0.02, + "learning_rate": 4.994323668073569e-05, + "loss": 1.2087, + "step": 21552 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943231390492405e-05, + "loss": 0.7734, + "step": 21553 + }, + { + "epoch": 0.02, + "learning_rate": 4.994322610000289e-05, + "loss": 1.0999, + "step": 21554 + }, + { + "epoch": 0.02, + "learning_rate": 4.994322080926714e-05, + "loss": 0.8471, + "step": 21555 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943215518285157e-05, + "loss": 1.0168, + "step": 21556 + }, + { + "epoch": 0.02, + "learning_rate": 4.994321022705696e-05, + "loss": 0.9685, + "step": 21557 + }, + { + "epoch": 0.02, + "learning_rate": 4.994320493558252e-05, + "loss": 1.2167, + "step": 21558 + }, + { + "epoch": 0.02, + "learning_rate": 4.994319964386185e-05, + "loss": 0.9677, + "step": 21559 + }, + { + "epoch": 0.02, + "learning_rate": 4.994319435189496e-05, + "loss": 1.1441, + "step": 21560 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943189059681836e-05, + "loss": 0.9623, + "step": 21561 + }, + { + "epoch": 0.02, + "learning_rate": 4.994318376722249e-05, + "loss": 1.0047, + "step": 21562 + }, + { + "epoch": 0.02, + "learning_rate": 4.994317847451691e-05, + "loss": 1.1001, + "step": 21563 + }, + { + "epoch": 0.02, + "learning_rate": 4.99431731815651e-05, + "loss": 1.2923, + "step": 21564 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943167888367074e-05, + "loss": 1.0494, + "step": 21565 + }, + { + "epoch": 0.02, + "learning_rate": 4.99431625949228e-05, + "loss": 0.7959, + "step": 21566 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943157301232315e-05, + "loss": 0.9435, + "step": 21567 + }, + { + "epoch": 0.02, + "learning_rate": 4.994315200729559e-05, + "loss": 1.0393, + "step": 21568 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943146713112646e-05, + "loss": 0.9602, + "step": 21569 + }, + { + "epoch": 0.02, + "learning_rate": 4.994314141868346e-05, + "loss": 1.1554, + "step": 21570 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943136124008054e-05, + "loss": 1.2093, + "step": 21571 + }, + { + "epoch": 0.02, + "learning_rate": 4.994313082908643e-05, + "loss": 1.3478, + "step": 21572 + }, + { + "epoch": 0.02, + "learning_rate": 4.994312553391856e-05, + "loss": 0.9962, + "step": 21573 + }, + { + "epoch": 0.02, + "learning_rate": 4.994312023850447e-05, + "loss": 1.2062, + "step": 21574 + }, + { + "epoch": 0.02, + "learning_rate": 4.994311494284415e-05, + "loss": 1.3346, + "step": 21575 + }, + { + "epoch": 0.02, + "learning_rate": 4.99431096469376e-05, + "loss": 1.4078, + "step": 21576 + }, + { + "epoch": 0.02, + "learning_rate": 4.994310435078483e-05, + "loss": 1.3449, + "step": 21577 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943099054385834e-05, + "loss": 1.2704, + "step": 21578 + }, + { + "epoch": 0.02, + "learning_rate": 4.99430937577406e-05, + "loss": 1.1559, + "step": 21579 + }, + { + "epoch": 0.02, + "learning_rate": 4.994308846084914e-05, + "loss": 1.3784, + "step": 21580 + }, + { + "epoch": 0.02, + "learning_rate": 4.994308316371146e-05, + "loss": 1.2439, + "step": 21581 + }, + { + "epoch": 0.02, + "learning_rate": 4.994307786632755e-05, + "loss": 1.356, + "step": 21582 + }, + { + "epoch": 0.02, + "learning_rate": 4.994307256869741e-05, + "loss": 1.3247, + "step": 21583 + }, + { + "epoch": 0.02, + "learning_rate": 4.994306727082104e-05, + "loss": 1.3048, + "step": 21584 + }, + { + "epoch": 0.02, + "learning_rate": 4.994306197269844e-05, + "loss": 1.1362, + "step": 21585 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943056674329623e-05, + "loss": 1.1682, + "step": 21586 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943051375714575e-05, + "loss": 1.0061, + "step": 21587 + }, + { + "epoch": 0.02, + "learning_rate": 4.99430460768533e-05, + "loss": 1.3011, + "step": 21588 + }, + { + "epoch": 0.02, + "learning_rate": 4.994304077774579e-05, + "loss": 1.6349, + "step": 21589 + }, + { + "epoch": 0.02, + "learning_rate": 4.994303547839206e-05, + "loss": 0.9376, + "step": 21590 + }, + { + "epoch": 0.02, + "learning_rate": 4.99430301787921e-05, + "loss": 1.2217, + "step": 21591 + }, + { + "epoch": 0.02, + "learning_rate": 4.994302487894592e-05, + "loss": 1.2279, + "step": 21592 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943019578853505e-05, + "loss": 1.4965, + "step": 21593 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943014278514866e-05, + "loss": 0.8856, + "step": 21594 + }, + { + "epoch": 0.02, + "learning_rate": 4.994300897793e-05, + "loss": 1.0858, + "step": 21595 + }, + { + "epoch": 0.02, + "learning_rate": 4.994300367709891e-05, + "loss": 1.3529, + "step": 21596 + }, + { + "epoch": 0.02, + "learning_rate": 4.994299837602159e-05, + "loss": 1.1038, + "step": 21597 + }, + { + "epoch": 0.02, + "learning_rate": 4.994299307469804e-05, + "loss": 1.4503, + "step": 21598 + }, + { + "epoch": 0.02, + "learning_rate": 4.994298777312827e-05, + "loss": 1.052, + "step": 21599 + }, + { + "epoch": 0.02, + "learning_rate": 4.994298247131226e-05, + "loss": 1.2115, + "step": 21600 + }, + { + "epoch": 0.02, + "learning_rate": 4.994297716925004e-05, + "loss": 1.1085, + "step": 21601 + }, + { + "epoch": 0.02, + "learning_rate": 4.994297186694158e-05, + "loss": 0.9663, + "step": 21602 + }, + { + "epoch": 0.02, + "learning_rate": 4.994296656438691e-05, + "loss": 1.2905, + "step": 21603 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942961261586e-05, + "loss": 0.8859, + "step": 21604 + }, + { + "epoch": 0.02, + "learning_rate": 4.994295595853886e-05, + "loss": 0.8585, + "step": 21605 + }, + { + "epoch": 0.02, + "learning_rate": 4.99429506552455e-05, + "loss": 1.0194, + "step": 21606 + }, + { + "epoch": 0.02, + "learning_rate": 4.994294535170592e-05, + "loss": 1.0628, + "step": 21607 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942940047920105e-05, + "loss": 1.1757, + "step": 21608 + }, + { + "epoch": 0.02, + "learning_rate": 4.994293474388807e-05, + "loss": 0.7947, + "step": 21609 + }, + { + "epoch": 0.02, + "learning_rate": 4.994292943960981e-05, + "loss": 0.7889, + "step": 21610 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942924135085314e-05, + "loss": 1.1843, + "step": 21611 + }, + { + "epoch": 0.02, + "learning_rate": 4.99429188303146e-05, + "loss": 1.0943, + "step": 21612 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942913525297655e-05, + "loss": 1.1487, + "step": 21613 + }, + { + "epoch": 0.02, + "learning_rate": 4.994290822003449e-05, + "loss": 1.2974, + "step": 21614 + }, + { + "epoch": 0.02, + "learning_rate": 4.99429029145251e-05, + "loss": 1.2003, + "step": 21615 + }, + { + "epoch": 0.02, + "learning_rate": 4.994289760876948e-05, + "loss": 1.1206, + "step": 21616 + }, + { + "epoch": 0.02, + "learning_rate": 4.994289230276763e-05, + "loss": 1.2136, + "step": 21617 + }, + { + "epoch": 0.02, + "learning_rate": 4.994288699651956e-05, + "loss": 1.2015, + "step": 21618 + }, + { + "epoch": 0.02, + "learning_rate": 4.994288169002526e-05, + "loss": 1.1269, + "step": 21619 + }, + { + "epoch": 0.02, + "learning_rate": 4.994287638328474e-05, + "loss": 1.1119, + "step": 21620 + }, + { + "epoch": 0.02, + "learning_rate": 4.994287107629799e-05, + "loss": 1.1216, + "step": 21621 + }, + { + "epoch": 0.02, + "learning_rate": 4.994286576906502e-05, + "loss": 1.0699, + "step": 21622 + }, + { + "epoch": 0.02, + "learning_rate": 4.994286046158582e-05, + "loss": 1.1917, + "step": 21623 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942855153860393e-05, + "loss": 0.9727, + "step": 21624 + }, + { + "epoch": 0.02, + "learning_rate": 4.994284984588875e-05, + "loss": 0.7442, + "step": 21625 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942844537670866e-05, + "loss": 0.9184, + "step": 21626 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942839229206776e-05, + "loss": 1.0461, + "step": 21627 + }, + { + "epoch": 0.02, + "learning_rate": 4.994283392049644e-05, + "loss": 1.2155, + "step": 21628 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942828611539894e-05, + "loss": 1.134, + "step": 21629 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942823302337116e-05, + "loss": 1.0879, + "step": 21630 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942817992888116e-05, + "loss": 1.0368, + "step": 21631 + }, + { + "epoch": 0.02, + "learning_rate": 4.994281268319289e-05, + "loss": 1.164, + "step": 21632 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942807373251436e-05, + "loss": 1.1019, + "step": 21633 + }, + { + "epoch": 0.02, + "learning_rate": 4.994280206306376e-05, + "loss": 1.0738, + "step": 21634 + }, + { + "epoch": 0.02, + "learning_rate": 4.994279675262986e-05, + "loss": 1.8306, + "step": 21635 + }, + { + "epoch": 0.02, + "learning_rate": 4.994279144194974e-05, + "loss": 0.9836, + "step": 21636 + }, + { + "epoch": 0.02, + "learning_rate": 4.994278613102339e-05, + "loss": 0.9996, + "step": 21637 + }, + { + "epoch": 0.02, + "learning_rate": 4.994278081985081e-05, + "loss": 1.356, + "step": 21638 + }, + { + "epoch": 0.02, + "learning_rate": 4.994277550843202e-05, + "loss": 0.9421, + "step": 21639 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942770196766984e-05, + "loss": 0.975, + "step": 21640 + }, + { + "epoch": 0.02, + "learning_rate": 4.994276488485574e-05, + "loss": 1.3124, + "step": 21641 + }, + { + "epoch": 0.02, + "learning_rate": 4.994275957269827e-05, + "loss": 1.0569, + "step": 21642 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942754260294566e-05, + "loss": 1.079, + "step": 21643 + }, + { + "epoch": 0.02, + "learning_rate": 4.994274894764465e-05, + "loss": 1.3087, + "step": 21644 + }, + { + "epoch": 0.02, + "learning_rate": 4.99427436347485e-05, + "loss": 1.3121, + "step": 21645 + }, + { + "epoch": 0.02, + "learning_rate": 4.994273832160613e-05, + "loss": 1.0195, + "step": 21646 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942733008217537e-05, + "loss": 1.1651, + "step": 21647 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942727694582717e-05, + "loss": 0.9677, + "step": 21648 + }, + { + "epoch": 0.02, + "learning_rate": 4.994272238070168e-05, + "loss": 0.334, + "step": 21649 + }, + { + "epoch": 0.02, + "learning_rate": 4.994271706657441e-05, + "loss": 0.6542, + "step": 21650 + }, + { + "epoch": 0.02, + "learning_rate": 4.994271175220092e-05, + "loss": 0.851, + "step": 21651 + }, + { + "epoch": 0.02, + "learning_rate": 4.994270643758121e-05, + "loss": 1.1128, + "step": 21652 + }, + { + "epoch": 0.02, + "learning_rate": 4.994270112271526e-05, + "loss": 1.0196, + "step": 21653 + }, + { + "epoch": 0.02, + "learning_rate": 4.99426958076031e-05, + "loss": 1.1413, + "step": 21654 + }, + { + "epoch": 0.02, + "learning_rate": 4.994269049224471e-05, + "loss": 1.0522, + "step": 21655 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942685176640106e-05, + "loss": 1.2215, + "step": 21656 + }, + { + "epoch": 0.02, + "learning_rate": 4.994267986078928e-05, + "loss": 1.7623, + "step": 21657 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942674544692215e-05, + "loss": 0.7826, + "step": 21658 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942669228348936e-05, + "loss": 1.0889, + "step": 21659 + }, + { + "epoch": 0.02, + "learning_rate": 4.994266391175943e-05, + "loss": 1.3004, + "step": 21660 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942658594923705e-05, + "loss": 1.1324, + "step": 21661 + }, + { + "epoch": 0.02, + "learning_rate": 4.994265327784176e-05, + "loss": 1.0685, + "step": 21662 + }, + { + "epoch": 0.02, + "learning_rate": 4.994264796051358e-05, + "loss": 1.2731, + "step": 21663 + }, + { + "epoch": 0.02, + "learning_rate": 4.994264264293918e-05, + "loss": 1.0818, + "step": 21664 + }, + { + "epoch": 0.02, + "learning_rate": 4.994263732511856e-05, + "loss": 1.0772, + "step": 21665 + }, + { + "epoch": 0.02, + "learning_rate": 4.994263200705172e-05, + "loss": 1.3476, + "step": 21666 + }, + { + "epoch": 0.02, + "learning_rate": 4.994262668873865e-05, + "loss": 1.0928, + "step": 21667 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942621370179354e-05, + "loss": 0.9148, + "step": 21668 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942616051373845e-05, + "loss": 1.0421, + "step": 21669 + }, + { + "epoch": 0.02, + "learning_rate": 4.994261073232211e-05, + "loss": 1.1773, + "step": 21670 + }, + { + "epoch": 0.02, + "learning_rate": 4.994260541302415e-05, + "loss": 1.0953, + "step": 21671 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942600093479964e-05, + "loss": 0.9524, + "step": 21672 + }, + { + "epoch": 0.02, + "learning_rate": 4.994259477368955e-05, + "loss": 1.3443, + "step": 21673 + }, + { + "epoch": 0.02, + "learning_rate": 4.994258945365293e-05, + "loss": 0.9674, + "step": 21674 + }, + { + "epoch": 0.02, + "learning_rate": 4.994258413337008e-05, + "loss": 1.0573, + "step": 21675 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942578812841004e-05, + "loss": 1.2206, + "step": 21676 + }, + { + "epoch": 0.02, + "learning_rate": 4.994257349206571e-05, + "loss": 1.0628, + "step": 21677 + }, + { + "epoch": 0.02, + "learning_rate": 4.994256817104419e-05, + "loss": 0.6475, + "step": 21678 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942562849776454e-05, + "loss": 0.4589, + "step": 21679 + }, + { + "epoch": 0.02, + "learning_rate": 4.994255752826249e-05, + "loss": 0.7265, + "step": 21680 + }, + { + "epoch": 0.02, + "learning_rate": 4.99425522065023e-05, + "loss": 0.5564, + "step": 21681 + }, + { + "epoch": 0.02, + "learning_rate": 4.994254688449589e-05, + "loss": 1.2558, + "step": 21682 + }, + { + "epoch": 0.02, + "learning_rate": 4.994254156224326e-05, + "loss": 0.8174, + "step": 21683 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942536239744405e-05, + "loss": 0.7358, + "step": 21684 + }, + { + "epoch": 0.02, + "learning_rate": 4.994253091699933e-05, + "loss": 1.1193, + "step": 21685 + }, + { + "epoch": 0.02, + "learning_rate": 4.994252559400804e-05, + "loss": 1.0256, + "step": 21686 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942520270770514e-05, + "loss": 1.0101, + "step": 21687 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942514947286774e-05, + "loss": 1.1176, + "step": 21688 + }, + { + "epoch": 0.02, + "learning_rate": 4.994250962355681e-05, + "loss": 1.1421, + "step": 21689 + }, + { + "epoch": 0.02, + "learning_rate": 4.994250429958063e-05, + "loss": 1.1085, + "step": 21690 + }, + { + "epoch": 0.02, + "learning_rate": 4.994249897535822e-05, + "loss": 1.0204, + "step": 21691 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942493650889586e-05, + "loss": 1.0841, + "step": 21692 + }, + { + "epoch": 0.02, + "learning_rate": 4.994248832617474e-05, + "loss": 1.118, + "step": 21693 + }, + { + "epoch": 0.02, + "learning_rate": 4.994248300121366e-05, + "loss": 0.8442, + "step": 21694 + }, + { + "epoch": 0.02, + "learning_rate": 4.994247767600637e-05, + "loss": 1.0817, + "step": 21695 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942472350552854e-05, + "loss": 1.0163, + "step": 21696 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942467024853114e-05, + "loss": 1.0365, + "step": 21697 + }, + { + "epoch": 0.02, + "learning_rate": 4.994246169890716e-05, + "loss": 1.2203, + "step": 21698 + }, + { + "epoch": 0.02, + "learning_rate": 4.994245637271497e-05, + "loss": 1.3005, + "step": 21699 + }, + { + "epoch": 0.02, + "learning_rate": 4.994245104627657e-05, + "loss": 1.1164, + "step": 21700 + }, + { + "epoch": 0.02, + "learning_rate": 4.994244571959195e-05, + "loss": 1.271, + "step": 21701 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942440392661104e-05, + "loss": 0.9653, + "step": 21702 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942435065484036e-05, + "loss": 1.1133, + "step": 21703 + }, + { + "epoch": 0.02, + "learning_rate": 4.994242973806075e-05, + "loss": 1.2871, + "step": 21704 + }, + { + "epoch": 0.02, + "learning_rate": 4.994242441039124e-05, + "loss": 1.3675, + "step": 21705 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942419082475513e-05, + "loss": 1.1945, + "step": 21706 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942413754313563e-05, + "loss": 1.0927, + "step": 21707 + }, + { + "epoch": 0.02, + "learning_rate": 4.99424084259054e-05, + "loss": 1.2139, + "step": 21708 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942403097251e-05, + "loss": 1.1224, + "step": 21709 + }, + { + "epoch": 0.02, + "learning_rate": 4.994239776835039e-05, + "loss": 1.9707, + "step": 21710 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942392439203554e-05, + "loss": 1.7993, + "step": 21711 + }, + { + "epoch": 0.02, + "learning_rate": 4.994238710981049e-05, + "loss": 1.3211, + "step": 21712 + }, + { + "epoch": 0.02, + "learning_rate": 4.994238178017122e-05, + "loss": 1.331, + "step": 21713 + }, + { + "epoch": 0.02, + "learning_rate": 4.994237645028572e-05, + "loss": 1.0083, + "step": 21714 + }, + { + "epoch": 0.02, + "learning_rate": 4.994237112015401e-05, + "loss": 1.0146, + "step": 21715 + }, + { + "epoch": 0.02, + "learning_rate": 4.994236578977607e-05, + "loss": 1.0937, + "step": 21716 + }, + { + "epoch": 0.02, + "learning_rate": 4.994236045915192e-05, + "loss": 1.0813, + "step": 21717 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942355128281536e-05, + "loss": 0.9742, + "step": 21718 + }, + { + "epoch": 0.02, + "learning_rate": 4.994234979716493e-05, + "loss": 0.9272, + "step": 21719 + }, + { + "epoch": 0.02, + "learning_rate": 4.994234446580212e-05, + "loss": 1.0401, + "step": 21720 + }, + { + "epoch": 0.02, + "learning_rate": 4.994233913419307e-05, + "loss": 1.0828, + "step": 21721 + }, + { + "epoch": 0.02, + "learning_rate": 4.994233380233781e-05, + "loss": 1.1581, + "step": 21722 + }, + { + "epoch": 0.02, + "learning_rate": 4.994232847023633e-05, + "loss": 1.1312, + "step": 21723 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942323137888637e-05, + "loss": 1.1615, + "step": 21724 + }, + { + "epoch": 0.02, + "learning_rate": 4.994231780529471e-05, + "loss": 1.0344, + "step": 21725 + }, + { + "epoch": 0.02, + "learning_rate": 4.994231247245458e-05, + "loss": 1.169, + "step": 21726 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942307139368216e-05, + "loss": 1.1597, + "step": 21727 + }, + { + "epoch": 0.02, + "learning_rate": 4.994230180603563e-05, + "loss": 0.9474, + "step": 21728 + }, + { + "epoch": 0.02, + "learning_rate": 4.994229647245683e-05, + "loss": 0.9169, + "step": 21729 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942291138631816e-05, + "loss": 1.0681, + "step": 21730 + }, + { + "epoch": 0.02, + "learning_rate": 4.994228580456057e-05, + "loss": 1.2147, + "step": 21731 + }, + { + "epoch": 0.02, + "learning_rate": 4.994228047024311e-05, + "loss": 1.0963, + "step": 21732 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942275135679434e-05, + "loss": 0.9662, + "step": 21733 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942269800869536e-05, + "loss": 1.252, + "step": 21734 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942264465813416e-05, + "loss": 0.84, + "step": 21735 + }, + { + "epoch": 0.02, + "learning_rate": 4.994225913051108e-05, + "loss": 0.9815, + "step": 21736 + }, + { + "epoch": 0.02, + "learning_rate": 4.994225379496252e-05, + "loss": 0.9686, + "step": 21737 + }, + { + "epoch": 0.02, + "learning_rate": 4.994224845916774e-05, + "loss": 1.1927, + "step": 21738 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942243123126744e-05, + "loss": 1.1757, + "step": 21739 + }, + { + "epoch": 0.02, + "learning_rate": 4.994223778683953e-05, + "loss": 0.9197, + "step": 21740 + }, + { + "epoch": 0.02, + "learning_rate": 4.99422324503061e-05, + "loss": 1.1095, + "step": 21741 + }, + { + "epoch": 0.02, + "learning_rate": 4.994222711352644e-05, + "loss": 1.3057, + "step": 21742 + }, + { + "epoch": 0.02, + "learning_rate": 4.994222177650057e-05, + "loss": 1.1465, + "step": 21743 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942216439228475e-05, + "loss": 1.0029, + "step": 21744 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942211101710166e-05, + "loss": 1.141, + "step": 21745 + }, + { + "epoch": 0.02, + "learning_rate": 4.994220576394564e-05, + "loss": 1.1373, + "step": 21746 + }, + { + "epoch": 0.02, + "learning_rate": 4.994220042593488e-05, + "loss": 1.1457, + "step": 21747 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942195087677916e-05, + "loss": 1.0692, + "step": 21748 + }, + { + "epoch": 0.02, + "learning_rate": 4.994218974917473e-05, + "loss": 1.2658, + "step": 21749 + }, + { + "epoch": 0.02, + "learning_rate": 4.994218441042532e-05, + "loss": 1.1392, + "step": 21750 + }, + { + "epoch": 0.02, + "learning_rate": 4.994217907142969e-05, + "loss": 0.8615, + "step": 21751 + }, + { + "epoch": 0.02, + "learning_rate": 4.994217373218786e-05, + "loss": 0.7384, + "step": 21752 + }, + { + "epoch": 0.02, + "learning_rate": 4.994216839269979e-05, + "loss": 0.6546, + "step": 21753 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942163052965515e-05, + "loss": 0.7958, + "step": 21754 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942157712985016e-05, + "loss": 1.1952, + "step": 21755 + }, + { + "epoch": 0.02, + "learning_rate": 4.99421523727583e-05, + "loss": 1.3958, + "step": 21756 + }, + { + "epoch": 0.02, + "learning_rate": 4.994214703228536e-05, + "loss": 1.0041, + "step": 21757 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942141691566206e-05, + "loss": 1.0627, + "step": 21758 + }, + { + "epoch": 0.02, + "learning_rate": 4.994213635060083e-05, + "loss": 1.2193, + "step": 21759 + }, + { + "epoch": 0.02, + "learning_rate": 4.994213100938924e-05, + "loss": 3.3144, + "step": 21760 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942125667931435e-05, + "loss": 1.3413, + "step": 21761 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942120326227407e-05, + "loss": 1.0732, + "step": 21762 + }, + { + "epoch": 0.02, + "learning_rate": 4.994211498427716e-05, + "loss": 1.3135, + "step": 21763 + }, + { + "epoch": 0.02, + "learning_rate": 4.99421096420807e-05, + "loss": 1.0917, + "step": 21764 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942104299638015e-05, + "loss": 1.5893, + "step": 21765 + }, + { + "epoch": 0.02, + "learning_rate": 4.994209895694912e-05, + "loss": 1.214, + "step": 21766 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942093614014e-05, + "loss": 1.0506, + "step": 21767 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942088270832664e-05, + "loss": 1.128, + "step": 21768 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942082927405107e-05, + "loss": 1.293, + "step": 21769 + }, + { + "epoch": 0.02, + "learning_rate": 4.994207758373134e-05, + "loss": 1.1546, + "step": 21770 + }, + { + "epoch": 0.02, + "learning_rate": 4.994207223981135e-05, + "loss": 1.1068, + "step": 21771 + }, + { + "epoch": 0.02, + "learning_rate": 4.994206689564515e-05, + "loss": 0.9333, + "step": 21772 + }, + { + "epoch": 0.02, + "learning_rate": 4.994206155123272e-05, + "loss": 1.0025, + "step": 21773 + }, + { + "epoch": 0.02, + "learning_rate": 4.994205620657408e-05, + "loss": 1.0137, + "step": 21774 + }, + { + "epoch": 0.02, + "learning_rate": 4.994205086166922e-05, + "loss": 1.2749, + "step": 21775 + }, + { + "epoch": 0.02, + "learning_rate": 4.994204551651815e-05, + "loss": 1.4387, + "step": 21776 + }, + { + "epoch": 0.02, + "learning_rate": 4.994204017112085e-05, + "loss": 1.3789, + "step": 21777 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942034825477345e-05, + "loss": 0.2956, + "step": 21778 + }, + { + "epoch": 0.02, + "learning_rate": 4.994202947958762e-05, + "loss": 0.6192, + "step": 21779 + }, + { + "epoch": 0.02, + "learning_rate": 4.994202413345167e-05, + "loss": 0.6336, + "step": 21780 + }, + { + "epoch": 0.02, + "learning_rate": 4.994201878706951e-05, + "loss": 0.3367, + "step": 21781 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942013440441126e-05, + "loss": 0.2924, + "step": 21782 + }, + { + "epoch": 0.02, + "learning_rate": 4.994200809356653e-05, + "loss": 0.2864, + "step": 21783 + }, + { + "epoch": 0.02, + "learning_rate": 4.994200274644572e-05, + "loss": 0.2752, + "step": 21784 + }, + { + "epoch": 0.02, + "learning_rate": 4.994199739907869e-05, + "loss": 0.8606, + "step": 21785 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941992051465446e-05, + "loss": 1.2449, + "step": 21786 + }, + { + "epoch": 0.02, + "learning_rate": 4.994198670360598e-05, + "loss": 1.3306, + "step": 21787 + }, + { + "epoch": 0.02, + "learning_rate": 4.99419813555003e-05, + "loss": 1.2214, + "step": 21788 + }, + { + "epoch": 0.02, + "learning_rate": 4.99419760071484e-05, + "loss": 1.2383, + "step": 21789 + }, + { + "epoch": 0.02, + "learning_rate": 4.994197065855028e-05, + "loss": 1.1973, + "step": 21790 + }, + { + "epoch": 0.02, + "learning_rate": 4.994196530970596e-05, + "loss": 0.9796, + "step": 21791 + }, + { + "epoch": 0.02, + "learning_rate": 4.994195996061541e-05, + "loss": 1.0425, + "step": 21792 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941954611278645e-05, + "loss": 0.9307, + "step": 21793 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941949261695665e-05, + "loss": 0.971, + "step": 21794 + }, + { + "epoch": 0.02, + "learning_rate": 4.994194391186647e-05, + "loss": 1.1617, + "step": 21795 + }, + { + "epoch": 0.02, + "learning_rate": 4.994193856179106e-05, + "loss": 1.0951, + "step": 21796 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941933211469426e-05, + "loss": 1.3445, + "step": 21797 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941927860901585e-05, + "loss": 0.943, + "step": 21798 + }, + { + "epoch": 0.02, + "learning_rate": 4.994192251008752e-05, + "loss": 1.1043, + "step": 21799 + }, + { + "epoch": 0.02, + "learning_rate": 4.994191715902724e-05, + "loss": 1.3865, + "step": 21800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941911807720746e-05, + "loss": 1.0918, + "step": 21801 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941906456168035e-05, + "loss": 1.1059, + "step": 21802 + }, + { + "epoch": 0.02, + "learning_rate": 4.994190110436911e-05, + "loss": 0.9877, + "step": 21803 + }, + { + "epoch": 0.02, + "learning_rate": 4.994189575232396e-05, + "loss": 1.2638, + "step": 21804 + }, + { + "epoch": 0.02, + "learning_rate": 4.994189040003261e-05, + "loss": 1.3432, + "step": 21805 + }, + { + "epoch": 0.02, + "learning_rate": 4.994188504749504e-05, + "loss": 3.0931, + "step": 21806 + }, + { + "epoch": 0.02, + "learning_rate": 4.994187969471125e-05, + "loss": 1.3332, + "step": 21807 + }, + { + "epoch": 0.02, + "learning_rate": 4.994187434168124e-05, + "loss": 1.0825, + "step": 21808 + }, + { + "epoch": 0.02, + "learning_rate": 4.994186898840502e-05, + "loss": 0.9153, + "step": 21809 + }, + { + "epoch": 0.02, + "learning_rate": 4.994186363488258e-05, + "loss": 1.1746, + "step": 21810 + }, + { + "epoch": 0.02, + "learning_rate": 4.994185828111393e-05, + "loss": 0.9023, + "step": 21811 + }, + { + "epoch": 0.02, + "learning_rate": 4.994185292709906e-05, + "loss": 1.0312, + "step": 21812 + }, + { + "epoch": 0.02, + "learning_rate": 4.994184757283798e-05, + "loss": 1.0223, + "step": 21813 + }, + { + "epoch": 0.02, + "learning_rate": 4.994184221833068e-05, + "loss": 1.2973, + "step": 21814 + }, + { + "epoch": 0.02, + "learning_rate": 4.994183686357717e-05, + "loss": 1.2954, + "step": 21815 + }, + { + "epoch": 0.02, + "learning_rate": 4.994183150857744e-05, + "loss": 0.8376, + "step": 21816 + }, + { + "epoch": 0.02, + "learning_rate": 4.99418261533315e-05, + "loss": 1.1644, + "step": 21817 + }, + { + "epoch": 0.02, + "learning_rate": 4.994182079783933e-05, + "loss": 1.2632, + "step": 21818 + }, + { + "epoch": 0.02, + "learning_rate": 4.994181544210096e-05, + "loss": 0.9657, + "step": 21819 + }, + { + "epoch": 0.02, + "learning_rate": 4.994181008611637e-05, + "loss": 0.8824, + "step": 21820 + }, + { + "epoch": 0.02, + "learning_rate": 4.994180472988557e-05, + "loss": 1.0467, + "step": 21821 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941799373408546e-05, + "loss": 0.9279, + "step": 21822 + }, + { + "epoch": 0.02, + "learning_rate": 4.99417940166853e-05, + "loss": 0.3203, + "step": 21823 + }, + { + "epoch": 0.02, + "learning_rate": 4.994178865971586e-05, + "loss": 0.4087, + "step": 21824 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941783302500196e-05, + "loss": 0.4972, + "step": 21825 + }, + { + "epoch": 0.02, + "learning_rate": 4.994177794503831e-05, + "loss": 0.459, + "step": 21826 + }, + { + "epoch": 0.02, + "learning_rate": 4.994177258733022e-05, + "loss": 0.4399, + "step": 21827 + }, + { + "epoch": 0.02, + "learning_rate": 4.994176722937591e-05, + "loss": 0.446, + "step": 21828 + }, + { + "epoch": 0.02, + "learning_rate": 4.994176187117539e-05, + "loss": 1.1619, + "step": 21829 + }, + { + "epoch": 0.02, + "learning_rate": 4.994175651272866e-05, + "loss": 1.3143, + "step": 21830 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941751154035695e-05, + "loss": 0.9979, + "step": 21831 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941745795096525e-05, + "loss": 1.2747, + "step": 21832 + }, + { + "epoch": 0.02, + "learning_rate": 4.994174043591115e-05, + "loss": 1.079, + "step": 21833 + }, + { + "epoch": 0.02, + "learning_rate": 4.994173507647956e-05, + "loss": 1.125, + "step": 21834 + }, + { + "epoch": 0.02, + "learning_rate": 4.994172971680174e-05, + "loss": 0.9689, + "step": 21835 + }, + { + "epoch": 0.02, + "learning_rate": 4.994172435687772e-05, + "loss": 1.1055, + "step": 21836 + }, + { + "epoch": 0.02, + "learning_rate": 4.994171899670748e-05, + "loss": 1.2078, + "step": 21837 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941713636291034e-05, + "loss": 1.015, + "step": 21838 + }, + { + "epoch": 0.02, + "learning_rate": 4.994170827562836e-05, + "loss": 1.3581, + "step": 21839 + }, + { + "epoch": 0.02, + "learning_rate": 4.994170291471948e-05, + "loss": 0.8426, + "step": 21840 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941697553564385e-05, + "loss": 0.6546, + "step": 21841 + }, + { + "epoch": 0.02, + "learning_rate": 4.994169219216308e-05, + "loss": 0.5401, + "step": 21842 + }, + { + "epoch": 0.02, + "learning_rate": 4.994168683051555e-05, + "loss": 0.8876, + "step": 21843 + }, + { + "epoch": 0.02, + "learning_rate": 4.994168146862182e-05, + "loss": 1.1866, + "step": 21844 + }, + { + "epoch": 0.02, + "learning_rate": 4.994167610648187e-05, + "loss": 1.1873, + "step": 21845 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941670744095704e-05, + "loss": 1.0371, + "step": 21846 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941665381463335e-05, + "loss": 1.1136, + "step": 21847 + }, + { + "epoch": 0.02, + "learning_rate": 4.994166001858474e-05, + "loss": 1.0121, + "step": 21848 + }, + { + "epoch": 0.02, + "learning_rate": 4.994165465545994e-05, + "loss": 0.5295, + "step": 21849 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941649292088914e-05, + "loss": 1.014, + "step": 21850 + }, + { + "epoch": 0.02, + "learning_rate": 4.994164392847168e-05, + "loss": 1.0339, + "step": 21851 + }, + { + "epoch": 0.02, + "learning_rate": 4.994163856460824e-05, + "loss": 0.4855, + "step": 21852 + }, + { + "epoch": 0.02, + "learning_rate": 4.994163320049858e-05, + "loss": 0.713, + "step": 21853 + }, + { + "epoch": 0.02, + "learning_rate": 4.994162783614271e-05, + "loss": 0.8797, + "step": 21854 + }, + { + "epoch": 0.02, + "learning_rate": 4.994162247154063e-05, + "loss": 0.766, + "step": 21855 + }, + { + "epoch": 0.02, + "learning_rate": 4.994161710669233e-05, + "loss": 0.8868, + "step": 21856 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941611741597816e-05, + "loss": 0.8826, + "step": 21857 + }, + { + "epoch": 0.02, + "learning_rate": 4.994160637625709e-05, + "loss": 0.8637, + "step": 21858 + }, + { + "epoch": 0.02, + "learning_rate": 4.994160101067016e-05, + "loss": 1.089, + "step": 21859 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941595644837004e-05, + "loss": 1.1623, + "step": 21860 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941590278757644e-05, + "loss": 1.3446, + "step": 21861 + }, + { + "epoch": 0.02, + "learning_rate": 4.994158491243207e-05, + "loss": 1.1669, + "step": 21862 + }, + { + "epoch": 0.02, + "learning_rate": 4.994157954586027e-05, + "loss": 1.1461, + "step": 21863 + }, + { + "epoch": 0.02, + "learning_rate": 4.994157417904227e-05, + "loss": 0.9994, + "step": 21864 + }, + { + "epoch": 0.02, + "learning_rate": 4.994156881197806e-05, + "loss": 1.2095, + "step": 21865 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941563444667634e-05, + "loss": 0.9657, + "step": 21866 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941558077110994e-05, + "loss": 1.1049, + "step": 21867 + }, + { + "epoch": 0.02, + "learning_rate": 4.994155270930814e-05, + "loss": 0.6798, + "step": 21868 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941547341259076e-05, + "loss": 1.0595, + "step": 21869 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941541972963804e-05, + "loss": 1.3335, + "step": 21870 + }, + { + "epoch": 0.02, + "learning_rate": 4.994153660442231e-05, + "loss": 1.1598, + "step": 21871 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941531235634606e-05, + "loss": 0.9445, + "step": 21872 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941525866600694e-05, + "loss": 1.1211, + "step": 21873 + }, + { + "epoch": 0.02, + "learning_rate": 4.994152049732057e-05, + "loss": 1.1046, + "step": 21874 + }, + { + "epoch": 0.02, + "learning_rate": 4.994151512779423e-05, + "loss": 0.8923, + "step": 21875 + }, + { + "epoch": 0.02, + "learning_rate": 4.994150975802168e-05, + "loss": 1.1848, + "step": 21876 + }, + { + "epoch": 0.02, + "learning_rate": 4.994150438800291e-05, + "loss": 1.2439, + "step": 21877 + }, + { + "epoch": 0.02, + "learning_rate": 4.994149901773794e-05, + "loss": 1.2503, + "step": 21878 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941493647226756e-05, + "loss": 0.9794, + "step": 21879 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941488276469355e-05, + "loss": 0.9108, + "step": 21880 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941482905465746e-05, + "loss": 1.26, + "step": 21881 + }, + { + "epoch": 0.02, + "learning_rate": 4.994147753421593e-05, + "loss": 0.8044, + "step": 21882 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941472162719894e-05, + "loss": 0.8771, + "step": 21883 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941466790977645e-05, + "loss": 0.3538, + "step": 21884 + }, + { + "epoch": 0.02, + "learning_rate": 4.994146141898919e-05, + "loss": 1.0575, + "step": 21885 + }, + { + "epoch": 0.02, + "learning_rate": 4.994145604675452e-05, + "loss": 1.0018, + "step": 21886 + }, + { + "epoch": 0.02, + "learning_rate": 4.994145067427364e-05, + "loss": 1.2002, + "step": 21887 + }, + { + "epoch": 0.02, + "learning_rate": 4.994144530154655e-05, + "loss": 0.9861, + "step": 21888 + }, + { + "epoch": 0.02, + "learning_rate": 4.994143992857324e-05, + "loss": 1.026, + "step": 21889 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941434555353725e-05, + "loss": 1.01, + "step": 21890 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941429181888e-05, + "loss": 0.9993, + "step": 21891 + }, + { + "epoch": 0.02, + "learning_rate": 4.994142380817606e-05, + "loss": 1.0555, + "step": 21892 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941418434217913e-05, + "loss": 1.1267, + "step": 21893 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941413060013556e-05, + "loss": 0.9104, + "step": 21894 + }, + { + "epoch": 0.02, + "learning_rate": 4.994140768556298e-05, + "loss": 1.0472, + "step": 21895 + }, + { + "epoch": 0.02, + "learning_rate": 4.99414023108662e-05, + "loss": 0.7859, + "step": 21896 + }, + { + "epoch": 0.02, + "learning_rate": 4.994139693592321e-05, + "loss": 1.181, + "step": 21897 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941391560734007e-05, + "loss": 1.0757, + "step": 21898 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941386185298585e-05, + "loss": 1.3281, + "step": 21899 + }, + { + "epoch": 0.02, + "learning_rate": 4.994138080961697e-05, + "loss": 1.1034, + "step": 21900 + }, + { + "epoch": 0.02, + "learning_rate": 4.994137543368913e-05, + "loss": 1.2334, + "step": 21901 + }, + { + "epoch": 0.02, + "learning_rate": 4.994137005751508e-05, + "loss": 1.0356, + "step": 21902 + }, + { + "epoch": 0.02, + "learning_rate": 4.994136468109482e-05, + "loss": 1.1099, + "step": 21903 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941359304428355e-05, + "loss": 1.2952, + "step": 21904 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941353927515674e-05, + "loss": 1.0641, + "step": 21905 + }, + { + "epoch": 0.02, + "learning_rate": 4.994134855035678e-05, + "loss": 1.2827, + "step": 21906 + }, + { + "epoch": 0.02, + "learning_rate": 4.994134317295168e-05, + "loss": 0.9622, + "step": 21907 + }, + { + "epoch": 0.02, + "learning_rate": 4.994133779530037e-05, + "loss": 1.0262, + "step": 21908 + }, + { + "epoch": 0.02, + "learning_rate": 4.994133241740285e-05, + "loss": 1.0099, + "step": 21909 + }, + { + "epoch": 0.02, + "learning_rate": 4.994132703925912e-05, + "loss": 1.1109, + "step": 21910 + }, + { + "epoch": 0.02, + "learning_rate": 4.994132166086918e-05, + "loss": 0.4983, + "step": 21911 + }, + { + "epoch": 0.02, + "learning_rate": 4.994131628223303e-05, + "loss": 0.2494, + "step": 21912 + }, + { + "epoch": 0.02, + "learning_rate": 4.994131090335067e-05, + "loss": 0.6109, + "step": 21913 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941305524222094e-05, + "loss": 1.1543, + "step": 21914 + }, + { + "epoch": 0.02, + "learning_rate": 4.994130014484731e-05, + "loss": 0.9951, + "step": 21915 + }, + { + "epoch": 0.02, + "learning_rate": 4.994129476522632e-05, + "loss": 1.0126, + "step": 21916 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941289385359116e-05, + "loss": 1.0468, + "step": 21917 + }, + { + "epoch": 0.02, + "learning_rate": 4.99412840052457e-05, + "loss": 0.7462, + "step": 21918 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941278624886084e-05, + "loss": 0.974, + "step": 21919 + }, + { + "epoch": 0.02, + "learning_rate": 4.994127324428025e-05, + "loss": 1.1896, + "step": 21920 + }, + { + "epoch": 0.02, + "learning_rate": 4.994126786342821e-05, + "loss": 1.1428, + "step": 21921 + }, + { + "epoch": 0.02, + "learning_rate": 4.994126248232996e-05, + "loss": 1.4326, + "step": 21922 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941257100985495e-05, + "loss": 1.4802, + "step": 21923 + }, + { + "epoch": 0.02, + "learning_rate": 4.994125171939483e-05, + "loss": 1.6295, + "step": 21924 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941246337557945e-05, + "loss": 1.1482, + "step": 21925 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941240955474854e-05, + "loss": 1.051, + "step": 21926 + }, + { + "epoch": 0.02, + "learning_rate": 4.994123557314556e-05, + "loss": 1.212, + "step": 21927 + }, + { + "epoch": 0.02, + "learning_rate": 4.994123019057005e-05, + "loss": 1.0036, + "step": 21928 + }, + { + "epoch": 0.02, + "learning_rate": 4.994122480774833e-05, + "loss": 0.9438, + "step": 21929 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941219424680406e-05, + "loss": 0.8709, + "step": 21930 + }, + { + "epoch": 0.02, + "learning_rate": 4.994121404136627e-05, + "loss": 1.2161, + "step": 21931 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941208657805926e-05, + "loss": 1.2513, + "step": 21932 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941203273999374e-05, + "loss": 0.848, + "step": 21933 + }, + { + "epoch": 0.02, + "learning_rate": 4.994119788994661e-05, + "loss": 1.0146, + "step": 21934 + }, + { + "epoch": 0.02, + "learning_rate": 4.994119250564764e-05, + "loss": 1.2515, + "step": 21935 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941187121102455e-05, + "loss": 1.1471, + "step": 21936 + }, + { + "epoch": 0.02, + "learning_rate": 4.994118173631107e-05, + "loss": 1.1343, + "step": 21937 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941176351273464e-05, + "loss": 0.9777, + "step": 21938 + }, + { + "epoch": 0.02, + "learning_rate": 4.994117096598966e-05, + "loss": 0.7114, + "step": 21939 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941165580459644e-05, + "loss": 0.5791, + "step": 21940 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941160194683414e-05, + "loss": 0.5484, + "step": 21941 + }, + { + "epoch": 0.02, + "learning_rate": 4.994115480866098e-05, + "loss": 0.7862, + "step": 21942 + }, + { + "epoch": 0.02, + "learning_rate": 4.994114942239234e-05, + "loss": 1.1782, + "step": 21943 + }, + { + "epoch": 0.02, + "learning_rate": 4.994114403587749e-05, + "loss": 1.2639, + "step": 21944 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941138649116434e-05, + "loss": 1.5894, + "step": 21945 + }, + { + "epoch": 0.02, + "learning_rate": 4.994113326210916e-05, + "loss": 1.1496, + "step": 21946 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941127874855685e-05, + "loss": 1.5786, + "step": 21947 + }, + { + "epoch": 0.02, + "learning_rate": 4.994112248735601e-05, + "loss": 1.2341, + "step": 21948 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941117099610114e-05, + "loss": 1.2032, + "step": 21949 + }, + { + "epoch": 0.02, + "learning_rate": 4.994111171161801e-05, + "loss": 0.8861, + "step": 21950 + }, + { + "epoch": 0.02, + "learning_rate": 4.99411063233797e-05, + "loss": 1.0823, + "step": 21951 + }, + { + "epoch": 0.02, + "learning_rate": 4.994110093489518e-05, + "loss": 1.1882, + "step": 21952 + }, + { + "epoch": 0.02, + "learning_rate": 4.994109554616446e-05, + "loss": 1.2747, + "step": 21953 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941090157187524e-05, + "loss": 0.6245, + "step": 21954 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941084767964385e-05, + "loss": 0.988, + "step": 21955 + }, + { + "epoch": 0.02, + "learning_rate": 4.994107937849504e-05, + "loss": 0.9734, + "step": 21956 + }, + { + "epoch": 0.02, + "learning_rate": 4.994107398877948e-05, + "loss": 1.241, + "step": 21957 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941068598817716e-05, + "loss": 1.2708, + "step": 21958 + }, + { + "epoch": 0.02, + "learning_rate": 4.994106320860974e-05, + "loss": 1.6863, + "step": 21959 + }, + { + "epoch": 0.02, + "learning_rate": 4.994105781815556e-05, + "loss": 1.129, + "step": 21960 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941052427455174e-05, + "loss": 1.1986, + "step": 21961 + }, + { + "epoch": 0.02, + "learning_rate": 4.994104703650858e-05, + "loss": 1.1472, + "step": 21962 + }, + { + "epoch": 0.02, + "learning_rate": 4.994104164531578e-05, + "loss": 1.1469, + "step": 21963 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941036253876774e-05, + "loss": 1.0672, + "step": 21964 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941030862191554e-05, + "loss": 0.9028, + "step": 21965 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941025470260126e-05, + "loss": 1.8202, + "step": 21966 + }, + { + "epoch": 0.02, + "learning_rate": 4.994102007808249e-05, + "loss": 0.8994, + "step": 21967 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941014685658656e-05, + "loss": 1.0473, + "step": 21968 + }, + { + "epoch": 0.02, + "learning_rate": 4.994100929298861e-05, + "loss": 1.0011, + "step": 21969 + }, + { + "epoch": 0.02, + "learning_rate": 4.994100390007236e-05, + "loss": 1.1317, + "step": 21970 + }, + { + "epoch": 0.02, + "learning_rate": 4.99409985069099e-05, + "loss": 1.1085, + "step": 21971 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940993113501225e-05, + "loss": 1.0306, + "step": 21972 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940987719846355e-05, + "loss": 1.2431, + "step": 21973 + }, + { + "epoch": 0.02, + "learning_rate": 4.994098232594527e-05, + "loss": 1.1118, + "step": 21974 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940976931797977e-05, + "loss": 1.1545, + "step": 21975 + }, + { + "epoch": 0.02, + "learning_rate": 4.994097153740449e-05, + "loss": 1.1777, + "step": 21976 + }, + { + "epoch": 0.02, + "learning_rate": 4.994096614276478e-05, + "loss": 1.3271, + "step": 21977 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940960747878876e-05, + "loss": 1.6184, + "step": 21978 + }, + { + "epoch": 0.02, + "learning_rate": 4.994095535274676e-05, + "loss": 0.8815, + "step": 21979 + }, + { + "epoch": 0.02, + "learning_rate": 4.994094995736844e-05, + "loss": 1.5912, + "step": 21980 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940944561743904e-05, + "loss": 0.8405, + "step": 21981 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940939165873176e-05, + "loss": 0.9565, + "step": 21982 + }, + { + "epoch": 0.02, + "learning_rate": 4.994093376975623e-05, + "loss": 0.996, + "step": 21983 + }, + { + "epoch": 0.02, + "learning_rate": 4.994092837339308e-05, + "loss": 1.1973, + "step": 21984 + }, + { + "epoch": 0.02, + "learning_rate": 4.994092297678373e-05, + "loss": 1.0706, + "step": 21985 + }, + { + "epoch": 0.02, + "learning_rate": 4.994091757992817e-05, + "loss": 1.0045, + "step": 21986 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940912182826405e-05, + "loss": 0.976, + "step": 21987 + }, + { + "epoch": 0.02, + "learning_rate": 4.994090678547843e-05, + "loss": 0.9117, + "step": 21988 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940901387884255e-05, + "loss": 0.6756, + "step": 21989 + }, + { + "epoch": 0.02, + "learning_rate": 4.994089599004387e-05, + "loss": 1.0898, + "step": 21990 + }, + { + "epoch": 0.02, + "learning_rate": 4.994089059195728e-05, + "loss": 1.1355, + "step": 21991 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940885193624475e-05, + "loss": 1.2356, + "step": 21992 + }, + { + "epoch": 0.02, + "learning_rate": 4.994087979504548e-05, + "loss": 1.1732, + "step": 21993 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940874396220265e-05, + "loss": 1.0352, + "step": 21994 + }, + { + "epoch": 0.02, + "learning_rate": 4.994086899714885e-05, + "loss": 1.0433, + "step": 21995 + }, + { + "epoch": 0.02, + "learning_rate": 4.994086359783123e-05, + "loss": 0.6878, + "step": 21996 + }, + { + "epoch": 0.02, + "learning_rate": 4.99408581982674e-05, + "loss": 1.1046, + "step": 21997 + }, + { + "epoch": 0.02, + "learning_rate": 4.994085279845737e-05, + "loss": 0.9943, + "step": 21998 + }, + { + "epoch": 0.02, + "learning_rate": 4.994084739840114e-05, + "loss": 1.2319, + "step": 21999 + }, + { + "epoch": 0.02, + "learning_rate": 4.99408419980987e-05, + "loss": 0.9888, + "step": 22000 + }, + { + "epoch": 0.02, + "eval_loss": 1.0208954811096191, + "eval_runtime": 86.4671, + "eval_samples_per_second": 16.018, + "eval_steps_per_second": 4.013, + "step": 22000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940836597550045e-05, + "loss": 0.6165, + "step": 22001 + }, + { + "epoch": 0.02, + "learning_rate": 4.994083119675519e-05, + "loss": 0.6036, + "step": 22002 + }, + { + "epoch": 0.02, + "learning_rate": 4.994082579571413e-05, + "loss": 0.5895, + "step": 22003 + }, + { + "epoch": 0.02, + "learning_rate": 4.994082039442687e-05, + "loss": 0.5954, + "step": 22004 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940814992893396e-05, + "loss": 0.6935, + "step": 22005 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940809591113725e-05, + "loss": 1.3611, + "step": 22006 + }, + { + "epoch": 0.02, + "learning_rate": 4.994080418908784e-05, + "loss": 1.0446, + "step": 22007 + }, + { + "epoch": 0.02, + "learning_rate": 4.994079878681576e-05, + "loss": 1.0151, + "step": 22008 + }, + { + "epoch": 0.02, + "learning_rate": 4.994079338429747e-05, + "loss": 1.2993, + "step": 22009 + }, + { + "epoch": 0.02, + "learning_rate": 4.994078798153297e-05, + "loss": 2.0238, + "step": 22010 + }, + { + "epoch": 0.02, + "learning_rate": 4.994078257852227e-05, + "loss": 1.2142, + "step": 22011 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940777175265366e-05, + "loss": 1.1003, + "step": 22012 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940771771762254e-05, + "loss": 1.0433, + "step": 22013 + }, + { + "epoch": 0.02, + "learning_rate": 4.994076636801294e-05, + "loss": 1.1874, + "step": 22014 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940760964017425e-05, + "loss": 1.2904, + "step": 22015 + }, + { + "epoch": 0.02, + "learning_rate": 4.99407555597757e-05, + "loss": 1.1201, + "step": 22016 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940750155287775e-05, + "loss": 1.093, + "step": 22017 + }, + { + "epoch": 0.02, + "learning_rate": 4.994074475055364e-05, + "loss": 1.0782, + "step": 22018 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940739345573296e-05, + "loss": 0.866, + "step": 22019 + }, + { + "epoch": 0.02, + "learning_rate": 4.994073394034676e-05, + "loss": 1.0851, + "step": 22020 + }, + { + "epoch": 0.02, + "learning_rate": 4.994072853487401e-05, + "loss": 1.0589, + "step": 22021 + }, + { + "epoch": 0.02, + "learning_rate": 4.994072312915506e-05, + "loss": 0.606, + "step": 22022 + }, + { + "epoch": 0.02, + "learning_rate": 4.99407177231899e-05, + "loss": 0.7022, + "step": 22023 + }, + { + "epoch": 0.02, + "learning_rate": 4.994071231697855e-05, + "loss": 0.9314, + "step": 22024 + }, + { + "epoch": 0.02, + "learning_rate": 4.994070691052098e-05, + "loss": 1.0438, + "step": 22025 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940701503817213e-05, + "loss": 1.178, + "step": 22026 + }, + { + "epoch": 0.02, + "learning_rate": 4.994069609686724e-05, + "loss": 1.0563, + "step": 22027 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940690689671065e-05, + "loss": 0.9947, + "step": 22028 + }, + { + "epoch": 0.02, + "learning_rate": 4.994068528222868e-05, + "loss": 1.1696, + "step": 22029 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940679874540095e-05, + "loss": 0.881, + "step": 22030 + }, + { + "epoch": 0.02, + "learning_rate": 4.994067446660531e-05, + "loss": 1.0745, + "step": 22031 + }, + { + "epoch": 0.02, + "learning_rate": 4.994066905842432e-05, + "loss": 1.1293, + "step": 22032 + }, + { + "epoch": 0.02, + "learning_rate": 4.994066364999712e-05, + "loss": 1.0681, + "step": 22033 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940658241323725e-05, + "loss": 0.759, + "step": 22034 + }, + { + "epoch": 0.02, + "learning_rate": 4.994065283240412e-05, + "loss": 0.7087, + "step": 22035 + }, + { + "epoch": 0.02, + "learning_rate": 4.994064742323832e-05, + "loss": 1.2128, + "step": 22036 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940642013826304e-05, + "loss": 1.5023, + "step": 22037 + }, + { + "epoch": 0.02, + "learning_rate": 4.994063660416809e-05, + "loss": 1.2635, + "step": 22038 + }, + { + "epoch": 0.02, + "learning_rate": 4.994063119426368e-05, + "loss": 1.0739, + "step": 22039 + }, + { + "epoch": 0.02, + "learning_rate": 4.994062578411305e-05, + "loss": 1.1619, + "step": 22040 + }, + { + "epoch": 0.02, + "learning_rate": 4.994062037371623e-05, + "loss": 0.9069, + "step": 22041 + }, + { + "epoch": 0.02, + "learning_rate": 4.99406149630732e-05, + "loss": 1.0926, + "step": 22042 + }, + { + "epoch": 0.02, + "learning_rate": 4.994060955218397e-05, + "loss": 1.3818, + "step": 22043 + }, + { + "epoch": 0.02, + "learning_rate": 4.994060414104854e-05, + "loss": 1.2179, + "step": 22044 + }, + { + "epoch": 0.02, + "learning_rate": 4.99405987296669e-05, + "loss": 1.0342, + "step": 22045 + }, + { + "epoch": 0.02, + "learning_rate": 4.994059331803906e-05, + "loss": 0.8937, + "step": 22046 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940587906165014e-05, + "loss": 1.1544, + "step": 22047 + }, + { + "epoch": 0.02, + "learning_rate": 4.994058249404477e-05, + "loss": 0.8583, + "step": 22048 + }, + { + "epoch": 0.02, + "learning_rate": 4.994057708167833e-05, + "loss": 1.1914, + "step": 22049 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940571669065675e-05, + "loss": 0.9587, + "step": 22050 + }, + { + "epoch": 0.02, + "learning_rate": 4.994056625620682e-05, + "loss": 0.9812, + "step": 22051 + }, + { + "epoch": 0.02, + "learning_rate": 4.994056084310176e-05, + "loss": 1.1614, + "step": 22052 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940555429750505e-05, + "loss": 1.1222, + "step": 22053 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940550016153044e-05, + "loss": 1.1019, + "step": 22054 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940544602309375e-05, + "loss": 1.0073, + "step": 22055 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940539188219504e-05, + "loss": 0.9672, + "step": 22056 + }, + { + "epoch": 0.02, + "learning_rate": 4.994053377388344e-05, + "loss": 0.9748, + "step": 22057 + }, + { + "epoch": 0.02, + "learning_rate": 4.994052835930116e-05, + "loss": 1.0404, + "step": 22058 + }, + { + "epoch": 0.02, + "learning_rate": 4.994052294447269e-05, + "loss": 1.1456, + "step": 22059 + }, + { + "epoch": 0.02, + "learning_rate": 4.994051752939801e-05, + "loss": 1.0114, + "step": 22060 + }, + { + "epoch": 0.02, + "learning_rate": 4.994051211407713e-05, + "loss": 0.9307, + "step": 22061 + }, + { + "epoch": 0.02, + "learning_rate": 4.994050669851005e-05, + "loss": 1.1907, + "step": 22062 + }, + { + "epoch": 0.02, + "learning_rate": 4.994050128269676e-05, + "loss": 1.0894, + "step": 22063 + }, + { + "epoch": 0.02, + "learning_rate": 4.994049586663728e-05, + "loss": 1.2495, + "step": 22064 + }, + { + "epoch": 0.02, + "learning_rate": 4.994049045033159e-05, + "loss": 1.2588, + "step": 22065 + }, + { + "epoch": 0.02, + "learning_rate": 4.994048503377971e-05, + "loss": 0.9996, + "step": 22066 + }, + { + "epoch": 0.02, + "learning_rate": 4.994047961698161e-05, + "loss": 1.1738, + "step": 22067 + }, + { + "epoch": 0.02, + "learning_rate": 4.994047419993731e-05, + "loss": 1.1268, + "step": 22068 + }, + { + "epoch": 0.02, + "learning_rate": 4.994046878264682e-05, + "loss": 1.1319, + "step": 22069 + }, + { + "epoch": 0.02, + "learning_rate": 4.994046336511012e-05, + "loss": 1.2897, + "step": 22070 + }, + { + "epoch": 0.02, + "learning_rate": 4.994045794732722e-05, + "loss": 0.938, + "step": 22071 + }, + { + "epoch": 0.02, + "learning_rate": 4.994045252929812e-05, + "loss": 0.9807, + "step": 22072 + }, + { + "epoch": 0.02, + "learning_rate": 4.994044711102281e-05, + "loss": 1.1295, + "step": 22073 + }, + { + "epoch": 0.02, + "learning_rate": 4.994044169250132e-05, + "loss": 1.2488, + "step": 22074 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940436273733604e-05, + "loss": 1.2837, + "step": 22075 + }, + { + "epoch": 0.02, + "learning_rate": 4.99404308547197e-05, + "loss": 1.34, + "step": 22076 + }, + { + "epoch": 0.02, + "learning_rate": 4.994042543545959e-05, + "loss": 1.2402, + "step": 22077 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940420015953285e-05, + "loss": 0.9383, + "step": 22078 + }, + { + "epoch": 0.02, + "learning_rate": 4.994041459620077e-05, + "loss": 1.3563, + "step": 22079 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940409176202055e-05, + "loss": 1.0821, + "step": 22080 + }, + { + "epoch": 0.02, + "learning_rate": 4.994040375595714e-05, + "loss": 0.886, + "step": 22081 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940398335466025e-05, + "loss": 1.2144, + "step": 22082 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940392914728713e-05, + "loss": 1.1659, + "step": 22083 + }, + { + "epoch": 0.02, + "learning_rate": 4.994038749374519e-05, + "loss": 1.0736, + "step": 22084 + }, + { + "epoch": 0.02, + "learning_rate": 4.994038207251548e-05, + "loss": 0.1461, + "step": 22085 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940376651039554e-05, + "loss": 0.0973, + "step": 22086 + }, + { + "epoch": 0.02, + "learning_rate": 4.994037122931744e-05, + "loss": 0.0882, + "step": 22087 + }, + { + "epoch": 0.02, + "learning_rate": 4.994036580734911e-05, + "loss": 0.07, + "step": 22088 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940360385134596e-05, + "loss": 0.0834, + "step": 22089 + }, + { + "epoch": 0.02, + "learning_rate": 4.994035496267387e-05, + "loss": 0.0662, + "step": 22090 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940349539966944e-05, + "loss": 0.0606, + "step": 22091 + }, + { + "epoch": 0.02, + "learning_rate": 4.994034411701383e-05, + "loss": 0.0522, + "step": 22092 + }, + { + "epoch": 0.02, + "learning_rate": 4.99403386938145e-05, + "loss": 0.0544, + "step": 22093 + }, + { + "epoch": 0.02, + "learning_rate": 4.994033327036898e-05, + "loss": 0.0373, + "step": 22094 + }, + { + "epoch": 0.02, + "learning_rate": 4.994032784667725e-05, + "loss": 0.0541, + "step": 22095 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940322422739325e-05, + "loss": 0.0627, + "step": 22096 + }, + { + "epoch": 0.02, + "learning_rate": 4.99403169985552e-05, + "loss": 0.0694, + "step": 22097 + }, + { + "epoch": 0.02, + "learning_rate": 4.994031157412487e-05, + "loss": 0.0533, + "step": 22098 + }, + { + "epoch": 0.02, + "learning_rate": 4.994030614944835e-05, + "loss": 0.8092, + "step": 22099 + }, + { + "epoch": 0.02, + "learning_rate": 4.994030072452562e-05, + "loss": 1.3318, + "step": 22100 + }, + { + "epoch": 0.02, + "learning_rate": 4.994029529935669e-05, + "loss": 1.1477, + "step": 22101 + }, + { + "epoch": 0.02, + "learning_rate": 4.994028987394157e-05, + "loss": 1.4543, + "step": 22102 + }, + { + "epoch": 0.02, + "learning_rate": 4.994028444828024e-05, + "loss": 0.8911, + "step": 22103 + }, + { + "epoch": 0.02, + "learning_rate": 4.994027902237271e-05, + "loss": 0.8009, + "step": 22104 + }, + { + "epoch": 0.02, + "learning_rate": 4.994027359621898e-05, + "loss": 0.8756, + "step": 22105 + }, + { + "epoch": 0.02, + "learning_rate": 4.994026816981906e-05, + "loss": 0.9474, + "step": 22106 + }, + { + "epoch": 0.02, + "learning_rate": 4.994026274317293e-05, + "loss": 0.645, + "step": 22107 + }, + { + "epoch": 0.02, + "learning_rate": 4.99402573162806e-05, + "loss": 0.7555, + "step": 22108 + }, + { + "epoch": 0.02, + "learning_rate": 4.994025188914208e-05, + "loss": 0.121, + "step": 22109 + }, + { + "epoch": 0.02, + "learning_rate": 4.994024646175735e-05, + "loss": 0.7596, + "step": 22110 + }, + { + "epoch": 0.02, + "learning_rate": 4.994024103412642e-05, + "loss": 0.9172, + "step": 22111 + }, + { + "epoch": 0.02, + "learning_rate": 4.99402356062493e-05, + "loss": 1.1311, + "step": 22112 + }, + { + "epoch": 0.02, + "learning_rate": 4.994023017812598e-05, + "loss": 1.383, + "step": 22113 + }, + { + "epoch": 0.02, + "learning_rate": 4.994022474975645e-05, + "loss": 1.4239, + "step": 22114 + }, + { + "epoch": 0.02, + "learning_rate": 4.994021932114073e-05, + "loss": 1.2888, + "step": 22115 + }, + { + "epoch": 0.02, + "learning_rate": 4.994021389227881e-05, + "loss": 1.3813, + "step": 22116 + }, + { + "epoch": 0.02, + "learning_rate": 4.994020846317068e-05, + "loss": 1.1023, + "step": 22117 + }, + { + "epoch": 0.02, + "learning_rate": 4.994020303381636e-05, + "loss": 1.1101, + "step": 22118 + }, + { + "epoch": 0.02, + "learning_rate": 4.994019760421585e-05, + "loss": 1.0153, + "step": 22119 + }, + { + "epoch": 0.02, + "learning_rate": 4.994019217436913e-05, + "loss": 1.2262, + "step": 22120 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940186744276204e-05, + "loss": 1.1241, + "step": 22121 + }, + { + "epoch": 0.02, + "learning_rate": 4.994018131393708e-05, + "loss": 0.8321, + "step": 22122 + }, + { + "epoch": 0.02, + "learning_rate": 4.994017588335177e-05, + "loss": 0.818, + "step": 22123 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940170452520255e-05, + "loss": 1.2988, + "step": 22124 + }, + { + "epoch": 0.02, + "learning_rate": 4.994016502144254e-05, + "loss": 1.2263, + "step": 22125 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940159590118626e-05, + "loss": 0.7546, + "step": 22126 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940154158548516e-05, + "loss": 1.0711, + "step": 22127 + }, + { + "epoch": 0.02, + "learning_rate": 4.99401487267322e-05, + "loss": 1.0441, + "step": 22128 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940143294669695e-05, + "loss": 0.8568, + "step": 22129 + }, + { + "epoch": 0.02, + "learning_rate": 4.994013786236099e-05, + "loss": 1.1937, + "step": 22130 + }, + { + "epoch": 0.02, + "learning_rate": 4.994013242980609e-05, + "loss": 1.2498, + "step": 22131 + }, + { + "epoch": 0.02, + "learning_rate": 4.994012699700498e-05, + "loss": 1.0209, + "step": 22132 + }, + { + "epoch": 0.02, + "learning_rate": 4.994012156395768e-05, + "loss": 1.3841, + "step": 22133 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940116130664173e-05, + "loss": 1.1133, + "step": 22134 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940110697124474e-05, + "loss": 1.126, + "step": 22135 + }, + { + "epoch": 0.02, + "learning_rate": 4.994010526333858e-05, + "loss": 0.8245, + "step": 22136 + }, + { + "epoch": 0.02, + "learning_rate": 4.994009982930648e-05, + "loss": 0.8978, + "step": 22137 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940094395028184e-05, + "loss": 1.4016, + "step": 22138 + }, + { + "epoch": 0.02, + "learning_rate": 4.99400889605037e-05, + "loss": 0.8745, + "step": 22139 + }, + { + "epoch": 0.02, + "learning_rate": 4.994008352573301e-05, + "loss": 1.1652, + "step": 22140 + }, + { + "epoch": 0.02, + "learning_rate": 4.994007809071612e-05, + "loss": 1.1048, + "step": 22141 + }, + { + "epoch": 0.02, + "learning_rate": 4.994007265545303e-05, + "loss": 1.181, + "step": 22142 + }, + { + "epoch": 0.02, + "learning_rate": 4.994006721994374e-05, + "loss": 1.021, + "step": 22143 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940061784188266e-05, + "loss": 1.3591, + "step": 22144 + }, + { + "epoch": 0.02, + "learning_rate": 4.994005634818658e-05, + "loss": 1.1281, + "step": 22145 + }, + { + "epoch": 0.02, + "learning_rate": 4.994005091193871e-05, + "loss": 1.172, + "step": 22146 + }, + { + "epoch": 0.02, + "learning_rate": 4.994004547544463e-05, + "loss": 1.4742, + "step": 22147 + }, + { + "epoch": 0.02, + "learning_rate": 4.994004003870435e-05, + "loss": 1.2497, + "step": 22148 + }, + { + "epoch": 0.02, + "learning_rate": 4.994003460171788e-05, + "loss": 1.1395, + "step": 22149 + }, + { + "epoch": 0.02, + "learning_rate": 4.994002916448521e-05, + "loss": 0.9154, + "step": 22150 + }, + { + "epoch": 0.02, + "learning_rate": 4.994002372700635e-05, + "loss": 1.1961, + "step": 22151 + }, + { + "epoch": 0.02, + "learning_rate": 4.994001828928129e-05, + "loss": 1.1869, + "step": 22152 + }, + { + "epoch": 0.02, + "learning_rate": 4.994001285131002e-05, + "loss": 1.1057, + "step": 22153 + }, + { + "epoch": 0.02, + "learning_rate": 4.994000741309257e-05, + "loss": 1.1132, + "step": 22154 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940001974628915e-05, + "loss": 1.2987, + "step": 22155 + }, + { + "epoch": 0.02, + "learning_rate": 4.993999653591906e-05, + "loss": 1.3052, + "step": 22156 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939991096963014e-05, + "loss": 1.0056, + "step": 22157 + }, + { + "epoch": 0.02, + "learning_rate": 4.993998565776077e-05, + "loss": 0.7543, + "step": 22158 + }, + { + "epoch": 0.02, + "learning_rate": 4.993998021831232e-05, + "loss": 1.1718, + "step": 22159 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939974778617685e-05, + "loss": 0.9731, + "step": 22160 + }, + { + "epoch": 0.02, + "learning_rate": 4.993996933867684e-05, + "loss": 1.1318, + "step": 22161 + }, + { + "epoch": 0.02, + "learning_rate": 4.993996389848981e-05, + "loss": 0.9144, + "step": 22162 + }, + { + "epoch": 0.02, + "learning_rate": 4.993995845805658e-05, + "loss": 1.0712, + "step": 22163 + }, + { + "epoch": 0.02, + "learning_rate": 4.993995301737715e-05, + "loss": 1.0625, + "step": 22164 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939947576451526e-05, + "loss": 1.1013, + "step": 22165 + }, + { + "epoch": 0.02, + "learning_rate": 4.993994213527971e-05, + "loss": 1.2061, + "step": 22166 + }, + { + "epoch": 0.02, + "learning_rate": 4.993993669386169e-05, + "loss": 0.9907, + "step": 22167 + }, + { + "epoch": 0.02, + "learning_rate": 4.993993125219748e-05, + "loss": 0.7643, + "step": 22168 + }, + { + "epoch": 0.02, + "learning_rate": 4.993992581028707e-05, + "loss": 1.9165, + "step": 22169 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939920368130454e-05, + "loss": 1.573, + "step": 22170 + }, + { + "epoch": 0.02, + "learning_rate": 4.993991492572765e-05, + "loss": 1.0276, + "step": 22171 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939909483078655e-05, + "loss": 1.1533, + "step": 22172 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939904040183464e-05, + "loss": 1.3832, + "step": 22173 + }, + { + "epoch": 0.02, + "learning_rate": 4.993989859704207e-05, + "loss": 0.8612, + "step": 22174 + }, + { + "epoch": 0.02, + "learning_rate": 4.993989315365448e-05, + "loss": 0.8356, + "step": 22175 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939887710020696e-05, + "loss": 0.271, + "step": 22176 + }, + { + "epoch": 0.02, + "learning_rate": 4.993988226614072e-05, + "loss": 0.2349, + "step": 22177 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939876822014535e-05, + "loss": 0.2051, + "step": 22178 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939871377642165e-05, + "loss": 0.7093, + "step": 22179 + }, + { + "epoch": 0.02, + "learning_rate": 4.99398659330236e-05, + "loss": 1.2577, + "step": 22180 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939860488158826e-05, + "loss": 1.1181, + "step": 22181 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939855043047864e-05, + "loss": 1.1564, + "step": 22182 + }, + { + "epoch": 0.02, + "learning_rate": 4.993984959769071e-05, + "loss": 1.0597, + "step": 22183 + }, + { + "epoch": 0.02, + "learning_rate": 4.993984415208737e-05, + "loss": 0.4073, + "step": 22184 + }, + { + "epoch": 0.02, + "learning_rate": 4.993983870623782e-05, + "loss": 1.2348, + "step": 22185 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939833260142076e-05, + "loss": 0.9688, + "step": 22186 + }, + { + "epoch": 0.02, + "learning_rate": 4.993982781380013e-05, + "loss": 1.0327, + "step": 22187 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939822367212004e-05, + "loss": 1.3823, + "step": 22188 + }, + { + "epoch": 0.02, + "learning_rate": 4.993981692037767e-05, + "loss": 1.1151, + "step": 22189 + }, + { + "epoch": 0.02, + "learning_rate": 4.993981147329715e-05, + "loss": 1.0981, + "step": 22190 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939806025970425e-05, + "loss": 1.1135, + "step": 22191 + }, + { + "epoch": 0.02, + "learning_rate": 4.993980057839751e-05, + "loss": 1.1927, + "step": 22192 + }, + { + "epoch": 0.02, + "learning_rate": 4.99397951305784e-05, + "loss": 1.103, + "step": 22193 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939789682513097e-05, + "loss": 0.885, + "step": 22194 + }, + { + "epoch": 0.02, + "learning_rate": 4.993978423420159e-05, + "loss": 1.1748, + "step": 22195 + }, + { + "epoch": 0.02, + "learning_rate": 4.99397787856439e-05, + "loss": 1.0383, + "step": 22196 + }, + { + "epoch": 0.02, + "learning_rate": 4.993977333684001e-05, + "loss": 1.1571, + "step": 22197 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939767887789925e-05, + "loss": 1.0387, + "step": 22198 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939762438493644e-05, + "loss": 1.1472, + "step": 22199 + }, + { + "epoch": 0.02, + "learning_rate": 4.993975698895117e-05, + "loss": 0.8504, + "step": 22200 + }, + { + "epoch": 0.02, + "learning_rate": 4.99397515391625e-05, + "loss": 1.0299, + "step": 22201 + }, + { + "epoch": 0.02, + "learning_rate": 4.993974608912764e-05, + "loss": 1.1455, + "step": 22202 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939740638846576e-05, + "loss": 1.2034, + "step": 22203 + }, + { + "epoch": 0.02, + "learning_rate": 4.993973518831932e-05, + "loss": 1.2032, + "step": 22204 + }, + { + "epoch": 0.02, + "learning_rate": 4.993972973754587e-05, + "loss": 1.1968, + "step": 22205 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939724286526226e-05, + "loss": 1.2317, + "step": 22206 + }, + { + "epoch": 0.02, + "learning_rate": 4.993971883526039e-05, + "loss": 1.1543, + "step": 22207 + }, + { + "epoch": 0.02, + "learning_rate": 4.993971338374836e-05, + "loss": 1.0025, + "step": 22208 + }, + { + "epoch": 0.02, + "learning_rate": 4.993970793199013e-05, + "loss": 1.1406, + "step": 22209 + }, + { + "epoch": 0.02, + "learning_rate": 4.993970247998571e-05, + "loss": 1.2305, + "step": 22210 + }, + { + "epoch": 0.02, + "learning_rate": 4.99396970277351e-05, + "loss": 0.8869, + "step": 22211 + }, + { + "epoch": 0.02, + "learning_rate": 4.993969157523829e-05, + "loss": 1.0279, + "step": 22212 + }, + { + "epoch": 0.02, + "learning_rate": 4.993968612249529e-05, + "loss": 1.0019, + "step": 22213 + }, + { + "epoch": 0.02, + "learning_rate": 4.993968066950609e-05, + "loss": 1.452, + "step": 22214 + }, + { + "epoch": 0.02, + "learning_rate": 4.99396752162707e-05, + "loss": 1.0182, + "step": 22215 + }, + { + "epoch": 0.02, + "learning_rate": 4.993966976278911e-05, + "loss": 1.1806, + "step": 22216 + }, + { + "epoch": 0.02, + "learning_rate": 4.993966430906133e-05, + "loss": 1.1939, + "step": 22217 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939658855087354e-05, + "loss": 1.0205, + "step": 22218 + }, + { + "epoch": 0.02, + "learning_rate": 4.993965340086719e-05, + "loss": 1.2034, + "step": 22219 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939647946400836e-05, + "loss": 1.1132, + "step": 22220 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939642491688274e-05, + "loss": 1.0666, + "step": 22221 + }, + { + "epoch": 0.02, + "learning_rate": 4.993963703672953e-05, + "loss": 0.7728, + "step": 22222 + }, + { + "epoch": 0.02, + "learning_rate": 4.993963158152459e-05, + "loss": 0.6948, + "step": 22223 + }, + { + "epoch": 0.02, + "learning_rate": 4.993962612607346e-05, + "loss": 1.4592, + "step": 22224 + }, + { + "epoch": 0.02, + "learning_rate": 4.993962067037613e-05, + "loss": 1.2735, + "step": 22225 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939615214432604e-05, + "loss": 0.5582, + "step": 22226 + }, + { + "epoch": 0.02, + "learning_rate": 4.993960975824289e-05, + "loss": 1.1151, + "step": 22227 + }, + { + "epoch": 0.02, + "learning_rate": 4.993960430180698e-05, + "loss": 1.0791, + "step": 22228 + }, + { + "epoch": 0.02, + "learning_rate": 4.993959884512488e-05, + "loss": 1.6951, + "step": 22229 + }, + { + "epoch": 0.02, + "learning_rate": 4.993959338819658e-05, + "loss": 1.4961, + "step": 22230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939587931022095e-05, + "loss": 1.2747, + "step": 22231 + }, + { + "epoch": 0.02, + "learning_rate": 4.993958247360141e-05, + "loss": 0.8967, + "step": 22232 + }, + { + "epoch": 0.02, + "learning_rate": 4.993957701593454e-05, + "loss": 1.3909, + "step": 22233 + }, + { + "epoch": 0.02, + "learning_rate": 4.993957155802147e-05, + "loss": 1.1862, + "step": 22234 + }, + { + "epoch": 0.02, + "learning_rate": 4.993956609986221e-05, + "loss": 1.2344, + "step": 22235 + }, + { + "epoch": 0.02, + "learning_rate": 4.993956064145676e-05, + "loss": 1.3602, + "step": 22236 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939555182805115e-05, + "loss": 0.623, + "step": 22237 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939549723907275e-05, + "loss": 0.2655, + "step": 22238 + }, + { + "epoch": 0.02, + "learning_rate": 4.993954426476325e-05, + "loss": 0.1426, + "step": 22239 + }, + { + "epoch": 0.02, + "learning_rate": 4.993953880537302e-05, + "loss": 0.5222, + "step": 22240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99395333457366e-05, + "loss": 1.0755, + "step": 22241 + }, + { + "epoch": 0.02, + "learning_rate": 4.993952788585399e-05, + "loss": 1.2242, + "step": 22242 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939522425725195e-05, + "loss": 1.1512, + "step": 22243 + }, + { + "epoch": 0.02, + "learning_rate": 4.99395169653502e-05, + "loss": 0.5723, + "step": 22244 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939511504729005e-05, + "loss": 0.9184, + "step": 22245 + }, + { + "epoch": 0.02, + "learning_rate": 4.993950604386163e-05, + "loss": 1.023, + "step": 22246 + }, + { + "epoch": 0.02, + "learning_rate": 4.993950058274806e-05, + "loss": 1.2333, + "step": 22247 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939495121388297e-05, + "loss": 1.0079, + "step": 22248 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939489659782343e-05, + "loss": 1.4063, + "step": 22249 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939484197930195e-05, + "loss": 1.1359, + "step": 22250 + }, + { + "epoch": 0.02, + "learning_rate": 4.993947873583186e-05, + "loss": 1.1665, + "step": 22251 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939473273487326e-05, + "loss": 0.9153, + "step": 22252 + }, + { + "epoch": 0.02, + "learning_rate": 4.99394678108966e-05, + "loss": 0.9765, + "step": 22253 + }, + { + "epoch": 0.02, + "learning_rate": 4.993946234805968e-05, + "loss": 1.1693, + "step": 22254 + }, + { + "epoch": 0.02, + "learning_rate": 4.993945688497658e-05, + "loss": 1.3216, + "step": 22255 + }, + { + "epoch": 0.02, + "learning_rate": 4.993945142164728e-05, + "loss": 1.1673, + "step": 22256 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939445958071785e-05, + "loss": 0.9825, + "step": 22257 + }, + { + "epoch": 0.02, + "learning_rate": 4.99394404942501e-05, + "loss": 1.0525, + "step": 22258 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939435030182224e-05, + "loss": 1.0972, + "step": 22259 + }, + { + "epoch": 0.02, + "learning_rate": 4.993942956586816e-05, + "loss": 1.0889, + "step": 22260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939424101307896e-05, + "loss": 1.204, + "step": 22261 + }, + { + "epoch": 0.02, + "learning_rate": 4.993941863650145e-05, + "loss": 1.2045, + "step": 22262 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939413171448814e-05, + "loss": 1.2288, + "step": 22263 + }, + { + "epoch": 0.02, + "learning_rate": 4.993940770614998e-05, + "loss": 1.1883, + "step": 22264 + }, + { + "epoch": 0.02, + "learning_rate": 4.993940224060496e-05, + "loss": 0.8922, + "step": 22265 + }, + { + "epoch": 0.02, + "learning_rate": 4.993939677481374e-05, + "loss": 0.9595, + "step": 22266 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939391308776334e-05, + "loss": 1.1863, + "step": 22267 + }, + { + "epoch": 0.02, + "learning_rate": 4.993938584249274e-05, + "loss": 1.5447, + "step": 22268 + }, + { + "epoch": 0.02, + "learning_rate": 4.993938037596295e-05, + "loss": 1.5893, + "step": 22269 + }, + { + "epoch": 0.02, + "learning_rate": 4.993937490918697e-05, + "loss": 0.8702, + "step": 22270 + }, + { + "epoch": 0.02, + "learning_rate": 4.99393694421648e-05, + "loss": 1.189, + "step": 22271 + }, + { + "epoch": 0.02, + "learning_rate": 4.993936397489644e-05, + "loss": 1.1282, + "step": 22272 + }, + { + "epoch": 0.02, + "learning_rate": 4.993935850738189e-05, + "loss": 1.0422, + "step": 22273 + }, + { + "epoch": 0.02, + "learning_rate": 4.993935303962115e-05, + "loss": 1.0953, + "step": 22274 + }, + { + "epoch": 0.02, + "learning_rate": 4.993934757161421e-05, + "loss": 1.1224, + "step": 22275 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939342103361086e-05, + "loss": 1.0236, + "step": 22276 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939336634861775e-05, + "loss": 0.837, + "step": 22277 + }, + { + "epoch": 0.02, + "learning_rate": 4.993933116611626e-05, + "loss": 1.1181, + "step": 22278 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939325697124575e-05, + "loss": 0.9411, + "step": 22279 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939320227886685e-05, + "loss": 0.9417, + "step": 22280 + }, + { + "epoch": 0.02, + "learning_rate": 4.993931475840261e-05, + "loss": 1.0896, + "step": 22281 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939309288672334e-05, + "loss": 1.225, + "step": 22282 + }, + { + "epoch": 0.02, + "learning_rate": 4.993930381869588e-05, + "loss": 1.1255, + "step": 22283 + }, + { + "epoch": 0.02, + "learning_rate": 4.993929834847323e-05, + "loss": 0.9771, + "step": 22284 + }, + { + "epoch": 0.02, + "learning_rate": 4.99392928780044e-05, + "loss": 0.567, + "step": 22285 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939287407289365e-05, + "loss": 0.6939, + "step": 22286 + }, + { + "epoch": 0.02, + "learning_rate": 4.993928193632814e-05, + "loss": 1.2104, + "step": 22287 + }, + { + "epoch": 0.02, + "learning_rate": 4.993927646512073e-05, + "loss": 0.8033, + "step": 22288 + }, + { + "epoch": 0.02, + "learning_rate": 4.993927099366714e-05, + "loss": 1.0041, + "step": 22289 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939265521967346e-05, + "loss": 1.1004, + "step": 22290 + }, + { + "epoch": 0.02, + "learning_rate": 4.993926005002137e-05, + "loss": 1.037, + "step": 22291 + }, + { + "epoch": 0.02, + "learning_rate": 4.99392545778292e-05, + "loss": 0.9785, + "step": 22292 + }, + { + "epoch": 0.02, + "learning_rate": 4.993924910539084e-05, + "loss": 0.9523, + "step": 22293 + }, + { + "epoch": 0.02, + "learning_rate": 4.993924363270629e-05, + "loss": 1.2054, + "step": 22294 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939238159775556e-05, + "loss": 1.2382, + "step": 22295 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939232686598624e-05, + "loss": 1.3422, + "step": 22296 + }, + { + "epoch": 0.02, + "learning_rate": 4.993922721317551e-05, + "loss": 1.1719, + "step": 22297 + }, + { + "epoch": 0.02, + "learning_rate": 4.99392217395062e-05, + "loss": 1.2362, + "step": 22298 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939216265590706e-05, + "loss": 1.3537, + "step": 22299 + }, + { + "epoch": 0.02, + "learning_rate": 4.993921079142903e-05, + "loss": 0.9478, + "step": 22300 + }, + { + "epoch": 0.02, + "learning_rate": 4.993920531702114e-05, + "loss": 1.0678, + "step": 22301 + }, + { + "epoch": 0.02, + "learning_rate": 4.993919984236708e-05, + "loss": 0.8433, + "step": 22302 + }, + { + "epoch": 0.02, + "learning_rate": 4.993919436746682e-05, + "loss": 0.9548, + "step": 22303 + }, + { + "epoch": 0.02, + "learning_rate": 4.993918889232039e-05, + "loss": 0.9891, + "step": 22304 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939183416927755e-05, + "loss": 1.6143, + "step": 22305 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939177941288925e-05, + "loss": 1.2971, + "step": 22306 + }, + { + "epoch": 0.02, + "learning_rate": 4.993917246540392e-05, + "loss": 0.8234, + "step": 22307 + }, + { + "epoch": 0.02, + "learning_rate": 4.993916698927272e-05, + "loss": 0.8769, + "step": 22308 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939161512895334e-05, + "loss": 1.0315, + "step": 22309 + }, + { + "epoch": 0.02, + "learning_rate": 4.993915603627176e-05, + "loss": 0.9439, + "step": 22310 + }, + { + "epoch": 0.02, + "learning_rate": 4.993915055940199e-05, + "loss": 0.9394, + "step": 22311 + }, + { + "epoch": 0.02, + "learning_rate": 4.993914508228604e-05, + "loss": 1.2126, + "step": 22312 + }, + { + "epoch": 0.02, + "learning_rate": 4.993913960492389e-05, + "loss": 1.1805, + "step": 22313 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939134127315554e-05, + "loss": 1.1278, + "step": 22314 + }, + { + "epoch": 0.02, + "learning_rate": 4.993912864946104e-05, + "loss": 1.0248, + "step": 22315 + }, + { + "epoch": 0.02, + "learning_rate": 4.993912317136033e-05, + "loss": 0.7719, + "step": 22316 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939117693013434e-05, + "loss": 0.6507, + "step": 22317 + }, + { + "epoch": 0.02, + "learning_rate": 4.993911221442035e-05, + "loss": 0.8241, + "step": 22318 + }, + { + "epoch": 0.02, + "learning_rate": 4.993910673558107e-05, + "loss": 1.1863, + "step": 22319 + }, + { + "epoch": 0.02, + "learning_rate": 4.993910125649561e-05, + "loss": 1.3555, + "step": 22320 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939095777163955e-05, + "loss": 1.0136, + "step": 22321 + }, + { + "epoch": 0.02, + "learning_rate": 4.993909029758612e-05, + "loss": 1.168, + "step": 22322 + }, + { + "epoch": 0.02, + "learning_rate": 4.993908481776209e-05, + "loss": 1.2779, + "step": 22323 + }, + { + "epoch": 0.02, + "learning_rate": 4.993907933769188e-05, + "loss": 1.253, + "step": 22324 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939073857375474e-05, + "loss": 0.9574, + "step": 22325 + }, + { + "epoch": 0.02, + "learning_rate": 4.993906837681288e-05, + "loss": 1.0986, + "step": 22326 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939062896004096e-05, + "loss": 0.9036, + "step": 22327 + }, + { + "epoch": 0.02, + "learning_rate": 4.993905741494913e-05, + "loss": 0.7313, + "step": 22328 + }, + { + "epoch": 0.02, + "learning_rate": 4.993905193364797e-05, + "loss": 1.087, + "step": 22329 + }, + { + "epoch": 0.02, + "learning_rate": 4.993904645210063e-05, + "loss": 1.033, + "step": 22330 + }, + { + "epoch": 0.02, + "learning_rate": 4.99390409703071e-05, + "loss": 1.0377, + "step": 22331 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939035488267385e-05, + "loss": 1.1691, + "step": 22332 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939030005981475e-05, + "loss": 0.8152, + "step": 22333 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939024523449376e-05, + "loss": 0.8757, + "step": 22334 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939019040671096e-05, + "loss": 0.9749, + "step": 22335 + }, + { + "epoch": 0.02, + "learning_rate": 4.993901355764663e-05, + "loss": 1.4952, + "step": 22336 + }, + { + "epoch": 0.02, + "learning_rate": 4.993900807437597e-05, + "loss": 1.6821, + "step": 22337 + }, + { + "epoch": 0.02, + "learning_rate": 4.993900259085913e-05, + "loss": 1.5565, + "step": 22338 + }, + { + "epoch": 0.02, + "learning_rate": 4.99389971070961e-05, + "loss": 1.1564, + "step": 22339 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938991623086875e-05, + "loss": 1.1183, + "step": 22340 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938986138831464e-05, + "loss": 0.6658, + "step": 22341 + }, + { + "epoch": 0.02, + "learning_rate": 4.993898065432988e-05, + "loss": 0.3195, + "step": 22342 + }, + { + "epoch": 0.02, + "learning_rate": 4.99389751695821e-05, + "loss": 1.0331, + "step": 22343 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938969684588134e-05, + "loss": 1.1784, + "step": 22344 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938964199347977e-05, + "loss": 1.1786, + "step": 22345 + }, + { + "epoch": 0.02, + "learning_rate": 4.993895871386163e-05, + "loss": 1.2127, + "step": 22346 + }, + { + "epoch": 0.02, + "learning_rate": 4.99389532281291e-05, + "loss": 1.0877, + "step": 22347 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938947742150386e-05, + "loss": 1.0797, + "step": 22348 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938942255925495e-05, + "loss": 1.1066, + "step": 22349 + }, + { + "epoch": 0.02, + "learning_rate": 4.99389367694544e-05, + "loss": 1.0485, + "step": 22350 + }, + { + "epoch": 0.02, + "learning_rate": 4.993893128273712e-05, + "loss": 1.1021, + "step": 22351 + }, + { + "epoch": 0.02, + "learning_rate": 4.993892579577366e-05, + "loss": 1.1608, + "step": 22352 + }, + { + "epoch": 0.02, + "learning_rate": 4.993892030856402e-05, + "loss": 0.7678, + "step": 22353 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938914821108185e-05, + "loss": 0.8466, + "step": 22354 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938909333406156e-05, + "loss": 0.9763, + "step": 22355 + }, + { + "epoch": 0.02, + "learning_rate": 4.993890384545795e-05, + "loss": 1.0229, + "step": 22356 + }, + { + "epoch": 0.02, + "learning_rate": 4.993889835726356e-05, + "loss": 1.1285, + "step": 22357 + }, + { + "epoch": 0.02, + "learning_rate": 4.993889286882298e-05, + "loss": 1.1387, + "step": 22358 + }, + { + "epoch": 0.02, + "learning_rate": 4.993888738013621e-05, + "loss": 1.1313, + "step": 22359 + }, + { + "epoch": 0.02, + "learning_rate": 4.993888189120326e-05, + "loss": 0.8278, + "step": 22360 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938876402024115e-05, + "loss": 1.1724, + "step": 22361 + }, + { + "epoch": 0.02, + "learning_rate": 4.993887091259879e-05, + "loss": 1.5463, + "step": 22362 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938865422927286e-05, + "loss": 1.1391, + "step": 22363 + }, + { + "epoch": 0.02, + "learning_rate": 4.993885993300959e-05, + "loss": 1.194, + "step": 22364 + }, + { + "epoch": 0.02, + "learning_rate": 4.993885444284571e-05, + "loss": 1.1724, + "step": 22365 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938848952435635e-05, + "loss": 0.8531, + "step": 22366 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938843461779386e-05, + "loss": 0.996, + "step": 22367 + }, + { + "epoch": 0.02, + "learning_rate": 4.993883797087694e-05, + "loss": 1.2535, + "step": 22368 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938832479728315e-05, + "loss": 1.2504, + "step": 22369 + }, + { + "epoch": 0.02, + "learning_rate": 4.99388269883335e-05, + "loss": 0.9097, + "step": 22370 + }, + { + "epoch": 0.02, + "learning_rate": 4.99388214966925e-05, + "loss": 0.8968, + "step": 22371 + }, + { + "epoch": 0.02, + "learning_rate": 4.993881600480532e-05, + "loss": 0.8397, + "step": 22372 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938810512671954e-05, + "loss": 0.7432, + "step": 22373 + }, + { + "epoch": 0.02, + "learning_rate": 4.99388050202924e-05, + "loss": 0.9595, + "step": 22374 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938799527666656e-05, + "loss": 1.0751, + "step": 22375 + }, + { + "epoch": 0.02, + "learning_rate": 4.993879403479473e-05, + "loss": 1.315, + "step": 22376 + }, + { + "epoch": 0.02, + "learning_rate": 4.993878854167663e-05, + "loss": 1.271, + "step": 22377 + }, + { + "epoch": 0.02, + "learning_rate": 4.993878304831233e-05, + "loss": 1.112, + "step": 22378 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938777554701854e-05, + "loss": 0.9374, + "step": 22379 + }, + { + "epoch": 0.02, + "learning_rate": 4.993877206084518e-05, + "loss": 1.1241, + "step": 22380 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938766566742336e-05, + "loss": 1.0031, + "step": 22381 + }, + { + "epoch": 0.02, + "learning_rate": 4.99387610723933e-05, + "loss": 1.2828, + "step": 22382 + }, + { + "epoch": 0.02, + "learning_rate": 4.993875557779808e-05, + "loss": 1.1168, + "step": 22383 + }, + { + "epoch": 0.02, + "learning_rate": 4.993875008295667e-05, + "loss": 1.0985, + "step": 22384 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938744587869086e-05, + "loss": 1.2524, + "step": 22385 + }, + { + "epoch": 0.02, + "learning_rate": 4.993873909253531e-05, + "loss": 0.3048, + "step": 22386 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938733596955354e-05, + "loss": 0.3393, + "step": 22387 + }, + { + "epoch": 0.02, + "learning_rate": 4.993872810112921e-05, + "loss": 0.9326, + "step": 22388 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938722605056875e-05, + "loss": 1.8575, + "step": 22389 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938717108738366e-05, + "loss": 1.0155, + "step": 22390 + }, + { + "epoch": 0.02, + "learning_rate": 4.993871161217367e-05, + "loss": 0.8362, + "step": 22391 + }, + { + "epoch": 0.02, + "learning_rate": 4.993870611536279e-05, + "loss": 1.1972, + "step": 22392 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938700618305723e-05, + "loss": 0.9672, + "step": 22393 + }, + { + "epoch": 0.02, + "learning_rate": 4.993869512100247e-05, + "loss": 0.7031, + "step": 22394 + }, + { + "epoch": 0.02, + "learning_rate": 4.993868962345303e-05, + "loss": 0.1834, + "step": 22395 + }, + { + "epoch": 0.02, + "learning_rate": 4.993868412565741e-05, + "loss": 0.1404, + "step": 22396 + }, + { + "epoch": 0.02, + "learning_rate": 4.993867862761561e-05, + "loss": 0.2524, + "step": 22397 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938673129327616e-05, + "loss": 0.8606, + "step": 22398 + }, + { + "epoch": 0.02, + "learning_rate": 4.993866763079345e-05, + "loss": 1.1858, + "step": 22399 + }, + { + "epoch": 0.02, + "learning_rate": 4.993866213201309e-05, + "loss": 0.7368, + "step": 22400 + }, + { + "epoch": 0.02, + "learning_rate": 4.993865663298656e-05, + "loss": 0.599, + "step": 22401 + }, + { + "epoch": 0.02, + "learning_rate": 4.993865113371383e-05, + "loss": 0.6903, + "step": 22402 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938645634194926e-05, + "loss": 0.6282, + "step": 22403 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938640134429834e-05, + "loss": 0.6495, + "step": 22404 + }, + { + "epoch": 0.02, + "learning_rate": 4.993863463441856e-05, + "loss": 0.4325, + "step": 22405 + }, + { + "epoch": 0.02, + "learning_rate": 4.99386291341611e-05, + "loss": 0.6941, + "step": 22406 + }, + { + "epoch": 0.02, + "learning_rate": 4.993862363365745e-05, + "loss": 0.4224, + "step": 22407 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938618132907634e-05, + "loss": 0.6211, + "step": 22408 + }, + { + "epoch": 0.02, + "learning_rate": 4.993861263191162e-05, + "loss": 0.7563, + "step": 22409 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938607130669424e-05, + "loss": 0.7944, + "step": 22410 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938601629181046e-05, + "loss": 0.5879, + "step": 22411 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938596127446494e-05, + "loss": 0.535, + "step": 22412 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938590625465746e-05, + "loss": 0.3786, + "step": 22413 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938585123238824e-05, + "loss": 0.3783, + "step": 22414 + }, + { + "epoch": 0.02, + "learning_rate": 4.993857962076571e-05, + "loss": 0.2556, + "step": 22415 + }, + { + "epoch": 0.02, + "learning_rate": 4.993857411804642e-05, + "loss": 0.6238, + "step": 22416 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938568615080946e-05, + "loss": 0.9536, + "step": 22417 + }, + { + "epoch": 0.02, + "learning_rate": 4.993856311186928e-05, + "loss": 1.3165, + "step": 22418 + }, + { + "epoch": 0.02, + "learning_rate": 4.993855760841144e-05, + "loss": 1.2405, + "step": 22419 + }, + { + "epoch": 0.02, + "learning_rate": 4.993855210470741e-05, + "loss": 1.5025, + "step": 22420 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938546600757205e-05, + "loss": 0.9054, + "step": 22421 + }, + { + "epoch": 0.02, + "learning_rate": 4.993854109656082e-05, + "loss": 0.8825, + "step": 22422 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938535592118245e-05, + "loss": 0.9591, + "step": 22423 + }, + { + "epoch": 0.02, + "learning_rate": 4.993853008742949e-05, + "loss": 1.1853, + "step": 22424 + }, + { + "epoch": 0.02, + "learning_rate": 4.993852458249455e-05, + "loss": 0.9967, + "step": 22425 + }, + { + "epoch": 0.02, + "learning_rate": 4.993851907731343e-05, + "loss": 1.0152, + "step": 22426 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938513571886124e-05, + "loss": 1.058, + "step": 22427 + }, + { + "epoch": 0.02, + "learning_rate": 4.993850806621264e-05, + "loss": 1.1433, + "step": 22428 + }, + { + "epoch": 0.02, + "learning_rate": 4.993850256029297e-05, + "loss": 1.1891, + "step": 22429 + }, + { + "epoch": 0.02, + "learning_rate": 4.993849705412712e-05, + "loss": 1.3641, + "step": 22430 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938491547715083e-05, + "loss": 1.0699, + "step": 22431 + }, + { + "epoch": 0.02, + "learning_rate": 4.993848604105687e-05, + "loss": 1.2606, + "step": 22432 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938480534152477e-05, + "loss": 0.4229, + "step": 22433 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938475027001894e-05, + "loss": 0.3915, + "step": 22434 + }, + { + "epoch": 0.02, + "learning_rate": 4.993846951960513e-05, + "loss": 0.3044, + "step": 22435 + }, + { + "epoch": 0.02, + "learning_rate": 4.993846401196218e-05, + "loss": 0.2466, + "step": 22436 + }, + { + "epoch": 0.02, + "learning_rate": 4.993845850407306e-05, + "loss": 0.138, + "step": 22437 + }, + { + "epoch": 0.02, + "learning_rate": 4.993845299593775e-05, + "loss": 0.0789, + "step": 22438 + }, + { + "epoch": 0.02, + "learning_rate": 4.993844748755626e-05, + "loss": 0.058, + "step": 22439 + }, + { + "epoch": 0.02, + "learning_rate": 4.993844197892858e-05, + "loss": 0.0846, + "step": 22440 + }, + { + "epoch": 0.02, + "learning_rate": 4.993843647005474e-05, + "loss": 1.1509, + "step": 22441 + }, + { + "epoch": 0.02, + "learning_rate": 4.99384309609347e-05, + "loss": 1.1969, + "step": 22442 + }, + { + "epoch": 0.02, + "learning_rate": 4.993842545156848e-05, + "loss": 1.066, + "step": 22443 + }, + { + "epoch": 0.02, + "learning_rate": 4.993841994195608e-05, + "loss": 1.005, + "step": 22444 + }, + { + "epoch": 0.02, + "learning_rate": 4.99384144320975e-05, + "loss": 1.2684, + "step": 22445 + }, + { + "epoch": 0.02, + "learning_rate": 4.993840892199274e-05, + "loss": 1.1741, + "step": 22446 + }, + { + "epoch": 0.02, + "learning_rate": 4.99384034116418e-05, + "loss": 1.0013, + "step": 22447 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938397901044675e-05, + "loss": 1.0643, + "step": 22448 + }, + { + "epoch": 0.02, + "learning_rate": 4.993839239020137e-05, + "loss": 1.4342, + "step": 22449 + }, + { + "epoch": 0.02, + "learning_rate": 4.993838687911188e-05, + "loss": 1.126, + "step": 22450 + }, + { + "epoch": 0.02, + "learning_rate": 4.993838136777621e-05, + "loss": 0.7729, + "step": 22451 + }, + { + "epoch": 0.02, + "learning_rate": 4.993837585619436e-05, + "loss": 1.119, + "step": 22452 + }, + { + "epoch": 0.02, + "learning_rate": 4.993837034436633e-05, + "loss": 1.0293, + "step": 22453 + }, + { + "epoch": 0.02, + "learning_rate": 4.993836483229212e-05, + "loss": 1.2539, + "step": 22454 + }, + { + "epoch": 0.02, + "learning_rate": 4.993835931997173e-05, + "loss": 1.2863, + "step": 22455 + }, + { + "epoch": 0.02, + "learning_rate": 4.993835380740515e-05, + "loss": 0.8858, + "step": 22456 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938348294592395e-05, + "loss": 0.9769, + "step": 22457 + }, + { + "epoch": 0.02, + "learning_rate": 4.993834278153346e-05, + "loss": 1.0828, + "step": 22458 + }, + { + "epoch": 0.02, + "learning_rate": 4.993833726822834e-05, + "loss": 1.1159, + "step": 22459 + }, + { + "epoch": 0.02, + "learning_rate": 4.993833175467705e-05, + "loss": 1.2737, + "step": 22460 + }, + { + "epoch": 0.02, + "learning_rate": 4.993832624087956e-05, + "loss": 1.2609, + "step": 22461 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938320726835906e-05, + "loss": 1.0574, + "step": 22462 + }, + { + "epoch": 0.02, + "learning_rate": 4.993831521254607e-05, + "loss": 1.1267, + "step": 22463 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938309698010044e-05, + "loss": 0.4327, + "step": 22464 + }, + { + "epoch": 0.02, + "learning_rate": 4.993830418322784e-05, + "loss": 0.8185, + "step": 22465 + }, + { + "epoch": 0.02, + "learning_rate": 4.993829866819947e-05, + "loss": 1.2331, + "step": 22466 + }, + { + "epoch": 0.02, + "learning_rate": 4.993829315292491e-05, + "loss": 0.9731, + "step": 22467 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938287637404165e-05, + "loss": 1.0528, + "step": 22468 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938282121637245e-05, + "loss": 1.3864, + "step": 22469 + }, + { + "epoch": 0.02, + "learning_rate": 4.993827660562414e-05, + "loss": 0.9818, + "step": 22470 + }, + { + "epoch": 0.02, + "learning_rate": 4.993827108936486e-05, + "loss": 1.0552, + "step": 22471 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938265572859394e-05, + "loss": 0.9404, + "step": 22472 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938260056107754e-05, + "loss": 1.3186, + "step": 22473 + }, + { + "epoch": 0.02, + "learning_rate": 4.993825453910993e-05, + "loss": 1.7446, + "step": 22474 + }, + { + "epoch": 0.02, + "learning_rate": 4.993824902186593e-05, + "loss": 1.4857, + "step": 22475 + }, + { + "epoch": 0.02, + "learning_rate": 4.993824350437575e-05, + "loss": 1.1708, + "step": 22476 + }, + { + "epoch": 0.02, + "learning_rate": 4.993823798663938e-05, + "loss": 1.0398, + "step": 22477 + }, + { + "epoch": 0.02, + "learning_rate": 4.993823246865684e-05, + "loss": 1.0995, + "step": 22478 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938226950428125e-05, + "loss": 1.0324, + "step": 22479 + }, + { + "epoch": 0.02, + "learning_rate": 4.993822143195322e-05, + "loss": 1.3237, + "step": 22480 + }, + { + "epoch": 0.02, + "learning_rate": 4.993821591323214e-05, + "loss": 1.2396, + "step": 22481 + }, + { + "epoch": 0.02, + "learning_rate": 4.993821039426488e-05, + "loss": 1.3239, + "step": 22482 + }, + { + "epoch": 0.02, + "learning_rate": 4.993820487505144e-05, + "loss": 1.2095, + "step": 22483 + }, + { + "epoch": 0.02, + "learning_rate": 4.993819935559182e-05, + "loss": 1.3323, + "step": 22484 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938193835886024e-05, + "loss": 0.9935, + "step": 22485 + }, + { + "epoch": 0.02, + "learning_rate": 4.993818831593404e-05, + "loss": 0.6312, + "step": 22486 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938182795735886e-05, + "loss": 1.2235, + "step": 22487 + }, + { + "epoch": 0.02, + "learning_rate": 4.993817727529155e-05, + "loss": 1.0446, + "step": 22488 + }, + { + "epoch": 0.02, + "learning_rate": 4.993817175460103e-05, + "loss": 1.1664, + "step": 22489 + }, + { + "epoch": 0.02, + "learning_rate": 4.993816623366434e-05, + "loss": 0.8425, + "step": 22490 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938160712481465e-05, + "loss": 1.0723, + "step": 22491 + }, + { + "epoch": 0.02, + "learning_rate": 4.993815519105241e-05, + "loss": 1.0579, + "step": 22492 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938149669377175e-05, + "loss": 1.1235, + "step": 22493 + }, + { + "epoch": 0.02, + "learning_rate": 4.993814414745577e-05, + "loss": 0.8712, + "step": 22494 + }, + { + "epoch": 0.02, + "learning_rate": 4.993813862528818e-05, + "loss": 1.0406, + "step": 22495 + }, + { + "epoch": 0.02, + "learning_rate": 4.993813310287441e-05, + "loss": 1.0186, + "step": 22496 + }, + { + "epoch": 0.02, + "learning_rate": 4.993812758021446e-05, + "loss": 1.2051, + "step": 22497 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938122057308334e-05, + "loss": 1.0523, + "step": 22498 + }, + { + "epoch": 0.02, + "learning_rate": 4.993811653415603e-05, + "loss": 1.1873, + "step": 22499 + }, + { + "epoch": 0.02, + "learning_rate": 4.993811101075755e-05, + "loss": 1.3007, + "step": 22500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0252221822738647, + "eval_runtime": 85.1725, + "eval_samples_per_second": 16.261, + "eval_steps_per_second": 4.074, + "step": 22500 + }, + { + "epoch": 0.02, + "learning_rate": 4.993810548711288e-05, + "loss": 1.2051, + "step": 22501 + }, + { + "epoch": 0.02, + "learning_rate": 4.993809996322205e-05, + "loss": 1.1607, + "step": 22502 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938094439085026e-05, + "loss": 0.8914, + "step": 22503 + }, + { + "epoch": 0.02, + "learning_rate": 4.993808891470183e-05, + "loss": 1.1283, + "step": 22504 + }, + { + "epoch": 0.02, + "learning_rate": 4.993808339007246e-05, + "loss": 1.5721, + "step": 22505 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938077865196904e-05, + "loss": 1.2308, + "step": 22506 + }, + { + "epoch": 0.02, + "learning_rate": 4.993807234007517e-05, + "loss": 1.2162, + "step": 22507 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938066814707265e-05, + "loss": 1.1442, + "step": 22508 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938061289093174e-05, + "loss": 1.2297, + "step": 22509 + }, + { + "epoch": 0.02, + "learning_rate": 4.993805576323291e-05, + "loss": 1.6944, + "step": 22510 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938050237126466e-05, + "loss": 1.0412, + "step": 22511 + }, + { + "epoch": 0.02, + "learning_rate": 4.993804471077384e-05, + "loss": 1.1968, + "step": 22512 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938039184175045e-05, + "loss": 1.2116, + "step": 22513 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938033657330065e-05, + "loss": 1.2242, + "step": 22514 + }, + { + "epoch": 0.02, + "learning_rate": 4.993802813023891e-05, + "loss": 0.3192, + "step": 22515 + }, + { + "epoch": 0.02, + "learning_rate": 4.993802260290158e-05, + "loss": 0.1779, + "step": 22516 + }, + { + "epoch": 0.02, + "learning_rate": 4.993801707531807e-05, + "loss": 0.1543, + "step": 22517 + }, + { + "epoch": 0.02, + "learning_rate": 4.993801154748838e-05, + "loss": 0.1213, + "step": 22518 + }, + { + "epoch": 0.02, + "learning_rate": 4.993800601941251e-05, + "loss": 0.0841, + "step": 22519 + }, + { + "epoch": 0.02, + "learning_rate": 4.993800049109047e-05, + "loss": 0.0696, + "step": 22520 + }, + { + "epoch": 0.02, + "learning_rate": 4.993799496252225e-05, + "loss": 0.0594, + "step": 22521 + }, + { + "epoch": 0.02, + "learning_rate": 4.993798943370785e-05, + "loss": 0.0512, + "step": 22522 + }, + { + "epoch": 0.02, + "learning_rate": 4.993798390464728e-05, + "loss": 0.0433, + "step": 22523 + }, + { + "epoch": 0.02, + "learning_rate": 4.993797837534052e-05, + "loss": 0.0422, + "step": 22524 + }, + { + "epoch": 0.02, + "learning_rate": 4.99379728457876e-05, + "loss": 0.0397, + "step": 22525 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937967315988485e-05, + "loss": 0.0448, + "step": 22526 + }, + { + "epoch": 0.02, + "learning_rate": 4.99379617859432e-05, + "loss": 0.0389, + "step": 22527 + }, + { + "epoch": 0.02, + "learning_rate": 4.993795625565174e-05, + "loss": 0.0421, + "step": 22528 + }, + { + "epoch": 0.02, + "learning_rate": 4.99379507251141e-05, + "loss": 0.034, + "step": 22529 + }, + { + "epoch": 0.02, + "learning_rate": 4.993794519433029e-05, + "loss": 0.0353, + "step": 22530 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937939663300295e-05, + "loss": 0.0363, + "step": 22531 + }, + { + "epoch": 0.02, + "learning_rate": 4.993793413202412e-05, + "loss": 0.0348, + "step": 22532 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937928600501784e-05, + "loss": 0.8558, + "step": 22533 + }, + { + "epoch": 0.02, + "learning_rate": 4.993792306873326e-05, + "loss": 1.8739, + "step": 22534 + }, + { + "epoch": 0.02, + "learning_rate": 4.993791753671856e-05, + "loss": 1.8322, + "step": 22535 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937912004457685e-05, + "loss": 1.8161, + "step": 22536 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937906471950636e-05, + "loss": 1.4508, + "step": 22537 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937900939197405e-05, + "loss": 1.0158, + "step": 22538 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937895406198e-05, + "loss": 1.3069, + "step": 22539 + }, + { + "epoch": 0.02, + "learning_rate": 4.993788987295242e-05, + "loss": 1.2899, + "step": 22540 + }, + { + "epoch": 0.02, + "learning_rate": 4.993788433946066e-05, + "loss": 1.3676, + "step": 22541 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937878805722725e-05, + "loss": 1.4553, + "step": 22542 + }, + { + "epoch": 0.02, + "learning_rate": 4.993787327173861e-05, + "loss": 0.9864, + "step": 22543 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937867737508326e-05, + "loss": 1.1751, + "step": 22544 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937862203031864e-05, + "loss": 1.1465, + "step": 22545 + }, + { + "epoch": 0.02, + "learning_rate": 4.993785666830923e-05, + "loss": 1.0132, + "step": 22546 + }, + { + "epoch": 0.02, + "learning_rate": 4.993785113334041e-05, + "loss": 0.9624, + "step": 22547 + }, + { + "epoch": 0.02, + "learning_rate": 4.993784559812542e-05, + "loss": 0.458, + "step": 22548 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937840062664254e-05, + "loss": 0.5249, + "step": 22549 + }, + { + "epoch": 0.02, + "learning_rate": 4.993783452695691e-05, + "loss": 0.6154, + "step": 22550 + }, + { + "epoch": 0.02, + "learning_rate": 4.993782899100339e-05, + "loss": 1.3439, + "step": 22551 + }, + { + "epoch": 0.02, + "learning_rate": 4.99378234548037e-05, + "loss": 1.0807, + "step": 22552 + }, + { + "epoch": 0.02, + "learning_rate": 4.993781791835783e-05, + "loss": 1.0568, + "step": 22553 + }, + { + "epoch": 0.02, + "learning_rate": 4.993781238166578e-05, + "loss": 1.0271, + "step": 22554 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937806844727565e-05, + "loss": 1.301, + "step": 22555 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937801307543166e-05, + "loss": 1.2428, + "step": 22556 + }, + { + "epoch": 0.02, + "learning_rate": 4.993779577011259e-05, + "loss": 1.3241, + "step": 22557 + }, + { + "epoch": 0.02, + "learning_rate": 4.993779023243584e-05, + "loss": 1.1664, + "step": 22558 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937784694512926e-05, + "loss": 1.0907, + "step": 22559 + }, + { + "epoch": 0.02, + "learning_rate": 4.993777915634383e-05, + "loss": 1.214, + "step": 22560 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937773617928554e-05, + "loss": 1.1309, + "step": 22561 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937768079267106e-05, + "loss": 1.2536, + "step": 22562 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937762540359476e-05, + "loss": 0.9744, + "step": 22563 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937757001205684e-05, + "loss": 1.0108, + "step": 22564 + }, + { + "epoch": 0.02, + "learning_rate": 4.993775146180571e-05, + "loss": 0.9428, + "step": 22565 + }, + { + "epoch": 0.02, + "learning_rate": 4.993774592215956e-05, + "loss": 1.266, + "step": 22566 + }, + { + "epoch": 0.02, + "learning_rate": 4.993774038226724e-05, + "loss": 1.2137, + "step": 22567 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937734842128736e-05, + "loss": 0.9608, + "step": 22568 + }, + { + "epoch": 0.02, + "learning_rate": 4.993772930174406e-05, + "loss": 1.1149, + "step": 22569 + }, + { + "epoch": 0.02, + "learning_rate": 4.993772376111322e-05, + "loss": 1.1658, + "step": 22570 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937718220236194e-05, + "loss": 1.1381, + "step": 22571 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937712679112996e-05, + "loss": 1.1104, + "step": 22572 + }, + { + "epoch": 0.02, + "learning_rate": 4.993770713774363e-05, + "loss": 1.0456, + "step": 22573 + }, + { + "epoch": 0.02, + "learning_rate": 4.993770159612808e-05, + "loss": 0.9538, + "step": 22574 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937696054266355e-05, + "loss": 1.0949, + "step": 22575 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937690512158465e-05, + "loss": 0.9993, + "step": 22576 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937684969804394e-05, + "loss": 1.0598, + "step": 22577 + }, + { + "epoch": 0.02, + "learning_rate": 4.993767942720415e-05, + "loss": 0.911, + "step": 22578 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937673884357726e-05, + "loss": 1.1522, + "step": 22579 + }, + { + "epoch": 0.02, + "learning_rate": 4.993766834126514e-05, + "loss": 1.2352, + "step": 22580 + }, + { + "epoch": 0.02, + "learning_rate": 4.993766279792637e-05, + "loss": 0.928, + "step": 22581 + }, + { + "epoch": 0.02, + "learning_rate": 4.993765725434143e-05, + "loss": 1.0037, + "step": 22582 + }, + { + "epoch": 0.02, + "learning_rate": 4.993765171051031e-05, + "loss": 1.1558, + "step": 22583 + }, + { + "epoch": 0.02, + "learning_rate": 4.993764616643303e-05, + "loss": 0.8408, + "step": 22584 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937640622109566e-05, + "loss": 1.0436, + "step": 22585 + }, + { + "epoch": 0.02, + "learning_rate": 4.993763507753993e-05, + "loss": 1.0615, + "step": 22586 + }, + { + "epoch": 0.02, + "learning_rate": 4.993762953272412e-05, + "loss": 0.9861, + "step": 22587 + }, + { + "epoch": 0.02, + "learning_rate": 4.993762398766213e-05, + "loss": 1.1553, + "step": 22588 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937618442353975e-05, + "loss": 1.164, + "step": 22589 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937612896799644e-05, + "loss": 1.1346, + "step": 22590 + }, + { + "epoch": 0.02, + "learning_rate": 4.993760735099914e-05, + "loss": 1.0086, + "step": 22591 + }, + { + "epoch": 0.02, + "learning_rate": 4.993760180495246e-05, + "loss": 0.9379, + "step": 22592 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937596258659607e-05, + "loss": 1.0963, + "step": 22593 + }, + { + "epoch": 0.02, + "learning_rate": 4.993759071212058e-05, + "loss": 1.3239, + "step": 22594 + }, + { + "epoch": 0.02, + "learning_rate": 4.993758516533538e-05, + "loss": 1.1023, + "step": 22595 + }, + { + "epoch": 0.02, + "learning_rate": 4.993757961830401e-05, + "loss": 0.9658, + "step": 22596 + }, + { + "epoch": 0.02, + "learning_rate": 4.993757407102647e-05, + "loss": 0.9631, + "step": 22597 + }, + { + "epoch": 0.02, + "learning_rate": 4.993756852350274e-05, + "loss": 0.8693, + "step": 22598 + }, + { + "epoch": 0.02, + "learning_rate": 4.993756297573285e-05, + "loss": 1.0443, + "step": 22599 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937557427716785e-05, + "loss": 1.0653, + "step": 22600 + }, + { + "epoch": 0.02, + "learning_rate": 4.993755187945455e-05, + "loss": 0.8518, + "step": 22601 + }, + { + "epoch": 0.02, + "learning_rate": 4.993754633094614e-05, + "loss": 1.1191, + "step": 22602 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937540782191556e-05, + "loss": 1.3057, + "step": 22603 + }, + { + "epoch": 0.02, + "learning_rate": 4.99375352331908e-05, + "loss": 1.0854, + "step": 22604 + }, + { + "epoch": 0.02, + "learning_rate": 4.993752968394387e-05, + "loss": 1.1266, + "step": 22605 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937524134450766e-05, + "loss": 1.2959, + "step": 22606 + }, + { + "epoch": 0.02, + "learning_rate": 4.993751858471149e-05, + "loss": 1.373, + "step": 22607 + }, + { + "epoch": 0.02, + "learning_rate": 4.993751303472604e-05, + "loss": 1.1307, + "step": 22608 + }, + { + "epoch": 0.02, + "learning_rate": 4.993750748449442e-05, + "loss": 1.1271, + "step": 22609 + }, + { + "epoch": 0.02, + "learning_rate": 4.993750193401663e-05, + "loss": 1.2049, + "step": 22610 + }, + { + "epoch": 0.02, + "learning_rate": 4.993749638329266e-05, + "loss": 0.8657, + "step": 22611 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937490832322526e-05, + "loss": 0.8164, + "step": 22612 + }, + { + "epoch": 0.02, + "learning_rate": 4.993748528110621e-05, + "loss": 1.0827, + "step": 22613 + }, + { + "epoch": 0.02, + "learning_rate": 4.993747972964373e-05, + "loss": 1.0471, + "step": 22614 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937474177935076e-05, + "loss": 0.9298, + "step": 22615 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937468625980245e-05, + "loss": 1.0358, + "step": 22616 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937463073779246e-05, + "loss": 0.6894, + "step": 22617 + }, + { + "epoch": 0.02, + "learning_rate": 4.993745752133208e-05, + "loss": 1.1969, + "step": 22618 + }, + { + "epoch": 0.02, + "learning_rate": 4.993745196863873e-05, + "loss": 0.6646, + "step": 22619 + }, + { + "epoch": 0.02, + "learning_rate": 4.993744641569922e-05, + "loss": 0.9686, + "step": 22620 + }, + { + "epoch": 0.02, + "learning_rate": 4.993744086251353e-05, + "loss": 1.3989, + "step": 22621 + }, + { + "epoch": 0.02, + "learning_rate": 4.993743530908167e-05, + "loss": 0.5438, + "step": 22622 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937429755403634e-05, + "loss": 0.636, + "step": 22623 + }, + { + "epoch": 0.02, + "learning_rate": 4.993742420147943e-05, + "loss": 1.0628, + "step": 22624 + }, + { + "epoch": 0.02, + "learning_rate": 4.993741864730906e-05, + "loss": 1.2207, + "step": 22625 + }, + { + "epoch": 0.02, + "learning_rate": 4.993741309289251e-05, + "loss": 1.3564, + "step": 22626 + }, + { + "epoch": 0.02, + "learning_rate": 4.99374075382298e-05, + "loss": 1.1934, + "step": 22627 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937401983320904e-05, + "loss": 0.9505, + "step": 22628 + }, + { + "epoch": 0.02, + "learning_rate": 4.993739642816584e-05, + "loss": 1.0725, + "step": 22629 + }, + { + "epoch": 0.02, + "learning_rate": 4.993739087276461e-05, + "loss": 0.9544, + "step": 22630 + }, + { + "epoch": 0.02, + "learning_rate": 4.99373853171172e-05, + "loss": 1.0974, + "step": 22631 + }, + { + "epoch": 0.02, + "learning_rate": 4.993737976122362e-05, + "loss": 1.1036, + "step": 22632 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937374205083884e-05, + "loss": 1.3183, + "step": 22633 + }, + { + "epoch": 0.02, + "learning_rate": 4.993736864869796e-05, + "loss": 1.1828, + "step": 22634 + }, + { + "epoch": 0.02, + "learning_rate": 4.993736309206587e-05, + "loss": 1.1859, + "step": 22635 + }, + { + "epoch": 0.02, + "learning_rate": 4.993735753518761e-05, + "loss": 1.3911, + "step": 22636 + }, + { + "epoch": 0.02, + "learning_rate": 4.993735197806318e-05, + "loss": 1.2263, + "step": 22637 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937346420692574e-05, + "loss": 1.4497, + "step": 22638 + }, + { + "epoch": 0.02, + "learning_rate": 4.99373408630758e-05, + "loss": 1.1906, + "step": 22639 + }, + { + "epoch": 0.02, + "learning_rate": 4.993733530521286e-05, + "loss": 1.1292, + "step": 22640 + }, + { + "epoch": 0.02, + "learning_rate": 4.993732974710375e-05, + "loss": 1.119, + "step": 22641 + }, + { + "epoch": 0.02, + "learning_rate": 4.993732418874846e-05, + "loss": 0.6021, + "step": 22642 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937318630147e-05, + "loss": 1.2337, + "step": 22643 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937313071299374e-05, + "loss": 0.8921, + "step": 22644 + }, + { + "epoch": 0.02, + "learning_rate": 4.993730751220557e-05, + "loss": 1.1524, + "step": 22645 + }, + { + "epoch": 0.02, + "learning_rate": 4.99373019528656e-05, + "loss": 1.1733, + "step": 22646 + }, + { + "epoch": 0.02, + "learning_rate": 4.993729639327947e-05, + "loss": 0.8224, + "step": 22647 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937290833447155e-05, + "loss": 0.9192, + "step": 22648 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937285273368675e-05, + "loss": 1.0595, + "step": 22649 + }, + { + "epoch": 0.02, + "learning_rate": 4.993727971304402e-05, + "loss": 1.2095, + "step": 22650 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937274152473204e-05, + "loss": 1.2862, + "step": 22651 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937268591656206e-05, + "loss": 1.1416, + "step": 22652 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937263030593054e-05, + "loss": 1.0873, + "step": 22653 + }, + { + "epoch": 0.02, + "learning_rate": 4.993725746928371e-05, + "loss": 1.1832, + "step": 22654 + }, + { + "epoch": 0.02, + "learning_rate": 4.993725190772821e-05, + "loss": 1.2774, + "step": 22655 + }, + { + "epoch": 0.02, + "learning_rate": 4.993724634592654e-05, + "loss": 1.099, + "step": 22656 + }, + { + "epoch": 0.02, + "learning_rate": 4.99372407838787e-05, + "loss": 1.3481, + "step": 22657 + }, + { + "epoch": 0.02, + "learning_rate": 4.993723522158469e-05, + "loss": 1.4918, + "step": 22658 + }, + { + "epoch": 0.02, + "learning_rate": 4.993722965904451e-05, + "loss": 1.1506, + "step": 22659 + }, + { + "epoch": 0.02, + "learning_rate": 4.993722409625815e-05, + "loss": 1.1658, + "step": 22660 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937218533225634e-05, + "loss": 0.724, + "step": 22661 + }, + { + "epoch": 0.02, + "learning_rate": 4.993721296994695e-05, + "loss": 1.2022, + "step": 22662 + }, + { + "epoch": 0.02, + "learning_rate": 4.993720740642208e-05, + "loss": 1.2759, + "step": 22663 + }, + { + "epoch": 0.02, + "learning_rate": 4.993720184265105e-05, + "loss": 1.2355, + "step": 22664 + }, + { + "epoch": 0.02, + "learning_rate": 4.993719627863385e-05, + "loss": 1.2328, + "step": 22665 + }, + { + "epoch": 0.02, + "learning_rate": 4.993719071437048e-05, + "loss": 1.0191, + "step": 22666 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937185149860944e-05, + "loss": 1.076, + "step": 22667 + }, + { + "epoch": 0.02, + "learning_rate": 4.993717958510523e-05, + "loss": 1.0552, + "step": 22668 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937174020103354e-05, + "loss": 1.0761, + "step": 22669 + }, + { + "epoch": 0.02, + "learning_rate": 4.993716845485531e-05, + "loss": 1.0696, + "step": 22670 + }, + { + "epoch": 0.02, + "learning_rate": 4.993716288936109e-05, + "loss": 0.9892, + "step": 22671 + }, + { + "epoch": 0.02, + "learning_rate": 4.993715732362071e-05, + "loss": 1.1709, + "step": 22672 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937151757634155e-05, + "loss": 0.9486, + "step": 22673 + }, + { + "epoch": 0.02, + "learning_rate": 4.993714619140143e-05, + "loss": 1.5936, + "step": 22674 + }, + { + "epoch": 0.02, + "learning_rate": 4.993714062492254e-05, + "loss": 1.0812, + "step": 22675 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937135058197475e-05, + "loss": 1.2617, + "step": 22676 + }, + { + "epoch": 0.02, + "learning_rate": 4.993712949122624e-05, + "loss": 0.8049, + "step": 22677 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937123924008845e-05, + "loss": 1.1228, + "step": 22678 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937118356545274e-05, + "loss": 1.129, + "step": 22679 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937112788835536e-05, + "loss": 1.0454, + "step": 22680 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937107220879636e-05, + "loss": 0.8853, + "step": 22681 + }, + { + "epoch": 0.02, + "learning_rate": 4.993710165267757e-05, + "loss": 1.0691, + "step": 22682 + }, + { + "epoch": 0.02, + "learning_rate": 4.993709608422932e-05, + "loss": 1.3264, + "step": 22683 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937090515534914e-05, + "loss": 1.2246, + "step": 22684 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937084946594334e-05, + "loss": 0.8565, + "step": 22685 + }, + { + "epoch": 0.02, + "learning_rate": 4.993707937740758e-05, + "loss": 1.1212, + "step": 22686 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937073807974665e-05, + "loss": 0.9594, + "step": 22687 + }, + { + "epoch": 0.02, + "learning_rate": 4.993706823829558e-05, + "loss": 0.8823, + "step": 22688 + }, + { + "epoch": 0.02, + "learning_rate": 4.993706266837033e-05, + "loss": 1.4009, + "step": 22689 + }, + { + "epoch": 0.02, + "learning_rate": 4.993705709819891e-05, + "loss": 1.297, + "step": 22690 + }, + { + "epoch": 0.02, + "learning_rate": 4.993705152778132e-05, + "loss": 1.189, + "step": 22691 + }, + { + "epoch": 0.02, + "learning_rate": 4.993704595711756e-05, + "loss": 1.0223, + "step": 22692 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937040386207636e-05, + "loss": 1.1296, + "step": 22693 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937034815051544e-05, + "loss": 1.077, + "step": 22694 + }, + { + "epoch": 0.02, + "learning_rate": 4.993702924364928e-05, + "loss": 0.9934, + "step": 22695 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937023672000856e-05, + "loss": 1.1961, + "step": 22696 + }, + { + "epoch": 0.02, + "learning_rate": 4.993701810010626e-05, + "loss": 0.8879, + "step": 22697 + }, + { + "epoch": 0.02, + "learning_rate": 4.9937012527965495e-05, + "loss": 1.2375, + "step": 22698 + }, + { + "epoch": 0.02, + "learning_rate": 4.993700695557856e-05, + "loss": 1.0417, + "step": 22699 + }, + { + "epoch": 0.02, + "learning_rate": 4.993700138294546e-05, + "loss": 1.1838, + "step": 22700 + }, + { + "epoch": 0.02, + "learning_rate": 4.993699581006619e-05, + "loss": 0.7993, + "step": 22701 + }, + { + "epoch": 0.02, + "learning_rate": 4.993699023694076e-05, + "loss": 0.6067, + "step": 22702 + }, + { + "epoch": 0.02, + "learning_rate": 4.993698466356916e-05, + "loss": 0.8747, + "step": 22703 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936979089951384e-05, + "loss": 1.0813, + "step": 22704 + }, + { + "epoch": 0.02, + "learning_rate": 4.993697351608746e-05, + "loss": 1.3342, + "step": 22705 + }, + { + "epoch": 0.02, + "learning_rate": 4.993696794197735e-05, + "loss": 1.3443, + "step": 22706 + }, + { + "epoch": 0.02, + "learning_rate": 4.993696236762108e-05, + "loss": 1.2694, + "step": 22707 + }, + { + "epoch": 0.02, + "learning_rate": 4.993695679301864e-05, + "loss": 1.1238, + "step": 22708 + }, + { + "epoch": 0.02, + "learning_rate": 4.993695121817004e-05, + "loss": 0.9825, + "step": 22709 + }, + { + "epoch": 0.02, + "learning_rate": 4.993694564307526e-05, + "loss": 0.9064, + "step": 22710 + }, + { + "epoch": 0.02, + "learning_rate": 4.993694006773432e-05, + "loss": 1.0396, + "step": 22711 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936934492147216e-05, + "loss": 1.0247, + "step": 22712 + }, + { + "epoch": 0.02, + "learning_rate": 4.993692891631394e-05, + "loss": 1.232, + "step": 22713 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936923340234496e-05, + "loss": 1.4698, + "step": 22714 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936917763908894e-05, + "loss": 1.0355, + "step": 22715 + }, + { + "epoch": 0.02, + "learning_rate": 4.993691218733712e-05, + "loss": 1.1536, + "step": 22716 + }, + { + "epoch": 0.02, + "learning_rate": 4.993690661051917e-05, + "loss": 1.0708, + "step": 22717 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936901033455066e-05, + "loss": 1.2156, + "step": 22718 + }, + { + "epoch": 0.02, + "learning_rate": 4.993689545614479e-05, + "loss": 0.9829, + "step": 22719 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936889878588356e-05, + "loss": 1.1797, + "step": 22720 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936884300785746e-05, + "loss": 1.1594, + "step": 22721 + }, + { + "epoch": 0.02, + "learning_rate": 4.993687872273698e-05, + "loss": 1.2054, + "step": 22722 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936873144442034e-05, + "loss": 1.093, + "step": 22723 + }, + { + "epoch": 0.02, + "learning_rate": 4.993686756590093e-05, + "loss": 1.1236, + "step": 22724 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936861987113656e-05, + "loss": 1.7612, + "step": 22725 + }, + { + "epoch": 0.02, + "learning_rate": 4.993685640808022e-05, + "loss": 1.2637, + "step": 22726 + }, + { + "epoch": 0.02, + "learning_rate": 4.993685082880061e-05, + "loss": 1.1045, + "step": 22727 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936845249274846e-05, + "loss": 1.166, + "step": 22728 + }, + { + "epoch": 0.02, + "learning_rate": 4.993683966950291e-05, + "loss": 1.0644, + "step": 22729 + }, + { + "epoch": 0.02, + "learning_rate": 4.993683408948481e-05, + "loss": 1.6695, + "step": 22730 + }, + { + "epoch": 0.02, + "learning_rate": 4.993682850922054e-05, + "loss": 1.2037, + "step": 22731 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936822928710104e-05, + "loss": 0.8885, + "step": 22732 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936817347953504e-05, + "loss": 0.8305, + "step": 22733 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936811766950735e-05, + "loss": 0.71, + "step": 22734 + }, + { + "epoch": 0.02, + "learning_rate": 4.99368061857018e-05, + "loss": 1.0804, + "step": 22735 + }, + { + "epoch": 0.02, + "learning_rate": 4.993680060420671e-05, + "loss": 1.2445, + "step": 22736 + }, + { + "epoch": 0.02, + "learning_rate": 4.993679502246544e-05, + "loss": 1.0393, + "step": 22737 + }, + { + "epoch": 0.02, + "learning_rate": 4.993678944047801e-05, + "loss": 1.0069, + "step": 22738 + }, + { + "epoch": 0.02, + "learning_rate": 4.993678385824442e-05, + "loss": 1.1425, + "step": 22739 + }, + { + "epoch": 0.02, + "learning_rate": 4.993677827576466e-05, + "loss": 1.4789, + "step": 22740 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936772693038735e-05, + "loss": 1.2097, + "step": 22741 + }, + { + "epoch": 0.02, + "learning_rate": 4.993676711006665e-05, + "loss": 1.133, + "step": 22742 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936761526848394e-05, + "loss": 1.1156, + "step": 22743 + }, + { + "epoch": 0.02, + "learning_rate": 4.993675594338397e-05, + "loss": 1.1182, + "step": 22744 + }, + { + "epoch": 0.02, + "learning_rate": 4.993675035967339e-05, + "loss": 0.815, + "step": 22745 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936744775716635e-05, + "loss": 2.0168, + "step": 22746 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936739191513715e-05, + "loss": 1.0715, + "step": 22747 + }, + { + "epoch": 0.02, + "learning_rate": 4.993673360706463e-05, + "loss": 1.2664, + "step": 22748 + }, + { + "epoch": 0.02, + "learning_rate": 4.99367280223694e-05, + "loss": 0.9799, + "step": 22749 + }, + { + "epoch": 0.02, + "learning_rate": 4.993672243742798e-05, + "loss": 1.0327, + "step": 22750 + }, + { + "epoch": 0.02, + "learning_rate": 4.993671685224041e-05, + "loss": 1.2776, + "step": 22751 + }, + { + "epoch": 0.02, + "learning_rate": 4.993671126680667e-05, + "loss": 1.0275, + "step": 22752 + }, + { + "epoch": 0.02, + "learning_rate": 4.993670568112676e-05, + "loss": 1.2539, + "step": 22753 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936700095200695e-05, + "loss": 1.359, + "step": 22754 + }, + { + "epoch": 0.02, + "learning_rate": 4.993669450902846e-05, + "loss": 0.7562, + "step": 22755 + }, + { + "epoch": 0.02, + "learning_rate": 4.993668892261006e-05, + "loss": 0.8791, + "step": 22756 + }, + { + "epoch": 0.02, + "learning_rate": 4.99366833359455e-05, + "loss": 1.0477, + "step": 22757 + }, + { + "epoch": 0.02, + "learning_rate": 4.993667774903477e-05, + "loss": 0.5487, + "step": 22758 + }, + { + "epoch": 0.02, + "learning_rate": 4.993667216187787e-05, + "loss": 0.7209, + "step": 22759 + }, + { + "epoch": 0.02, + "learning_rate": 4.993666657447482e-05, + "loss": 0.6032, + "step": 22760 + }, + { + "epoch": 0.02, + "learning_rate": 4.99366609868256e-05, + "loss": 0.773, + "step": 22761 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936655398930213e-05, + "loss": 0.7667, + "step": 22762 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936649810788665e-05, + "loss": 0.8161, + "step": 22763 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936644222400954e-05, + "loss": 0.9563, + "step": 22764 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936638633767076e-05, + "loss": 1.283, + "step": 22765 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936633044887036e-05, + "loss": 0.9568, + "step": 22766 + }, + { + "epoch": 0.02, + "learning_rate": 4.993662745576083e-05, + "loss": 1.1037, + "step": 22767 + }, + { + "epoch": 0.02, + "learning_rate": 4.993662186638846e-05, + "loss": 0.899, + "step": 22768 + }, + { + "epoch": 0.02, + "learning_rate": 4.993661627676993e-05, + "loss": 1.0854, + "step": 22769 + }, + { + "epoch": 0.02, + "learning_rate": 4.993661068690523e-05, + "loss": 0.7627, + "step": 22770 + }, + { + "epoch": 0.02, + "learning_rate": 4.993660509679437e-05, + "loss": 1.1997, + "step": 22771 + }, + { + "epoch": 0.02, + "learning_rate": 4.993659950643735e-05, + "loss": 1.1211, + "step": 22772 + }, + { + "epoch": 0.02, + "learning_rate": 4.993659391583416e-05, + "loss": 1.008, + "step": 22773 + }, + { + "epoch": 0.02, + "learning_rate": 4.993658832498481e-05, + "loss": 0.8119, + "step": 22774 + }, + { + "epoch": 0.02, + "learning_rate": 4.993658273388929e-05, + "loss": 0.4886, + "step": 22775 + }, + { + "epoch": 0.02, + "learning_rate": 4.993657714254761e-05, + "loss": 0.5779, + "step": 22776 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936571550959777e-05, + "loss": 0.5804, + "step": 22777 + }, + { + "epoch": 0.02, + "learning_rate": 4.993656595912577e-05, + "loss": 1.0595, + "step": 22778 + }, + { + "epoch": 0.02, + "learning_rate": 4.99365603670456e-05, + "loss": 1.3707, + "step": 22779 + }, + { + "epoch": 0.02, + "learning_rate": 4.993655477471927e-05, + "loss": 1.3533, + "step": 22780 + }, + { + "epoch": 0.02, + "learning_rate": 4.993654918214678e-05, + "loss": 1.0336, + "step": 22781 + }, + { + "epoch": 0.02, + "learning_rate": 4.993654358932812e-05, + "loss": 0.7583, + "step": 22782 + }, + { + "epoch": 0.02, + "learning_rate": 4.99365379962633e-05, + "loss": 0.9931, + "step": 22783 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936532402952316e-05, + "loss": 1.2337, + "step": 22784 + }, + { + "epoch": 0.02, + "learning_rate": 4.993652680939517e-05, + "loss": 0.7133, + "step": 22785 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936521215591857e-05, + "loss": 0.992, + "step": 22786 + }, + { + "epoch": 0.02, + "learning_rate": 4.993651562154239e-05, + "loss": 1.0192, + "step": 22787 + }, + { + "epoch": 0.02, + "learning_rate": 4.993651002724675e-05, + "loss": 0.9643, + "step": 22788 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936504432704955e-05, + "loss": 1.1712, + "step": 22789 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936498837917e-05, + "loss": 1.2167, + "step": 22790 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936493242882875e-05, + "loss": 1.1559, + "step": 22791 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936487647602594e-05, + "loss": 1.1, + "step": 22792 + }, + { + "epoch": 0.02, + "learning_rate": 4.993648205207614e-05, + "loss": 1.1241, + "step": 22793 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936476456303526e-05, + "loss": 1.1747, + "step": 22794 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936470860284754e-05, + "loss": 1.0487, + "step": 22795 + }, + { + "epoch": 0.02, + "learning_rate": 4.993646526401982e-05, + "loss": 1.1629, + "step": 22796 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936459667508725e-05, + "loss": 1.2597, + "step": 22797 + }, + { + "epoch": 0.02, + "learning_rate": 4.993645407075146e-05, + "loss": 1.2533, + "step": 22798 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936448473748045e-05, + "loss": 1.0461, + "step": 22799 + }, + { + "epoch": 0.02, + "learning_rate": 4.993644287649846e-05, + "loss": 1.277, + "step": 22800 + }, + { + "epoch": 0.02, + "learning_rate": 4.993643727900271e-05, + "loss": 1.1808, + "step": 22801 + }, + { + "epoch": 0.02, + "learning_rate": 4.993643168126081e-05, + "loss": 1.0565, + "step": 22802 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936426083272734e-05, + "loss": 1.0575, + "step": 22803 + }, + { + "epoch": 0.02, + "learning_rate": 4.99364204850385e-05, + "loss": 1.0956, + "step": 22804 + }, + { + "epoch": 0.02, + "learning_rate": 4.993641488655812e-05, + "loss": 1.1178, + "step": 22805 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936409287831556e-05, + "loss": 1.4916, + "step": 22806 + }, + { + "epoch": 0.02, + "learning_rate": 4.993640368885884e-05, + "loss": 1.0628, + "step": 22807 + }, + { + "epoch": 0.02, + "learning_rate": 4.993639808963996e-05, + "loss": 0.8721, + "step": 22808 + }, + { + "epoch": 0.02, + "learning_rate": 4.993639249017492e-05, + "loss": 1.106, + "step": 22809 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936386890463715e-05, + "loss": 1.1806, + "step": 22810 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936381290506354e-05, + "loss": 1.1241, + "step": 22811 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936375690302826e-05, + "loss": 1.2374, + "step": 22812 + }, + { + "epoch": 0.02, + "learning_rate": 4.993637008985314e-05, + "loss": 1.1573, + "step": 22813 + }, + { + "epoch": 0.02, + "learning_rate": 4.993636448915729e-05, + "loss": 1.2514, + "step": 22814 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936358888215286e-05, + "loss": 1.3566, + "step": 22815 + }, + { + "epoch": 0.02, + "learning_rate": 4.993635328702711e-05, + "loss": 1.031, + "step": 22816 + }, + { + "epoch": 0.02, + "learning_rate": 4.993634768559279e-05, + "loss": 0.8074, + "step": 22817 + }, + { + "epoch": 0.02, + "learning_rate": 4.993634208391229e-05, + "loss": 1.1266, + "step": 22818 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936336481985644e-05, + "loss": 1.0654, + "step": 22819 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936330879812825e-05, + "loss": 1.3112, + "step": 22820 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936325277393844e-05, + "loss": 0.8976, + "step": 22821 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936319674728716e-05, + "loss": 1.0811, + "step": 22822 + }, + { + "epoch": 0.02, + "learning_rate": 4.993631407181741e-05, + "loss": 1.2365, + "step": 22823 + }, + { + "epoch": 0.02, + "learning_rate": 4.993630846865996e-05, + "loss": 1.4577, + "step": 22824 + }, + { + "epoch": 0.02, + "learning_rate": 4.993630286525634e-05, + "loss": 1.161, + "step": 22825 + }, + { + "epoch": 0.02, + "learning_rate": 4.993629726160656e-05, + "loss": 1.044, + "step": 22826 + }, + { + "epoch": 0.02, + "learning_rate": 4.993629165771062e-05, + "loss": 1.4188, + "step": 22827 + }, + { + "epoch": 0.02, + "learning_rate": 4.993628605356852e-05, + "loss": 1.2865, + "step": 22828 + }, + { + "epoch": 0.02, + "learning_rate": 4.993628044918026e-05, + "loss": 0.6906, + "step": 22829 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936274844545835e-05, + "loss": 1.1752, + "step": 22830 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936269239665255e-05, + "loss": 0.6921, + "step": 22831 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936263634538514e-05, + "loss": 0.9259, + "step": 22832 + }, + { + "epoch": 0.02, + "learning_rate": 4.993625802916561e-05, + "loss": 1.0902, + "step": 22833 + }, + { + "epoch": 0.02, + "learning_rate": 4.993625242354655e-05, + "loss": 0.8924, + "step": 22834 + }, + { + "epoch": 0.02, + "learning_rate": 4.993624681768133e-05, + "loss": 1.0054, + "step": 22835 + }, + { + "epoch": 0.02, + "learning_rate": 4.993624121156995e-05, + "loss": 1.1921, + "step": 22836 + }, + { + "epoch": 0.02, + "learning_rate": 4.99362356052124e-05, + "loss": 1.0673, + "step": 22837 + }, + { + "epoch": 0.02, + "learning_rate": 4.99362299986087e-05, + "loss": 1.1527, + "step": 22838 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936224391758834e-05, + "loss": 0.9995, + "step": 22839 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936218784662815e-05, + "loss": 1.0818, + "step": 22840 + }, + { + "epoch": 0.02, + "learning_rate": 4.993621317732063e-05, + "loss": 1.1909, + "step": 22841 + }, + { + "epoch": 0.02, + "learning_rate": 4.993620756973229e-05, + "loss": 0.9967, + "step": 22842 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936201961897785e-05, + "loss": 0.9477, + "step": 22843 + }, + { + "epoch": 0.02, + "learning_rate": 4.993619635381712e-05, + "loss": 1.1626, + "step": 22844 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936190745490296e-05, + "loss": 1.0329, + "step": 22845 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936185136917316e-05, + "loss": 1.2588, + "step": 22846 + }, + { + "epoch": 0.02, + "learning_rate": 4.993617952809818e-05, + "loss": 1.1594, + "step": 22847 + }, + { + "epoch": 0.02, + "learning_rate": 4.993617391903288e-05, + "loss": 0.7747, + "step": 22848 + }, + { + "epoch": 0.02, + "learning_rate": 4.993616830972142e-05, + "loss": 1.2559, + "step": 22849 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936162700163805e-05, + "loss": 0.9532, + "step": 22850 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936157090360025e-05, + "loss": 1.1414, + "step": 22851 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936151480310084e-05, + "loss": 1.1945, + "step": 22852 + }, + { + "epoch": 0.02, + "learning_rate": 4.993614587001398e-05, + "loss": 1.1865, + "step": 22853 + }, + { + "epoch": 0.02, + "learning_rate": 4.993614025947173e-05, + "loss": 1.5611, + "step": 22854 + }, + { + "epoch": 0.02, + "learning_rate": 4.993613464868332e-05, + "loss": 1.253, + "step": 22855 + }, + { + "epoch": 0.02, + "learning_rate": 4.993612903764874e-05, + "loss": 1.3309, + "step": 22856 + }, + { + "epoch": 0.02, + "learning_rate": 4.993612342636801e-05, + "loss": 1.0304, + "step": 22857 + }, + { + "epoch": 0.02, + "learning_rate": 4.993611781484112e-05, + "loss": 0.7214, + "step": 22858 + }, + { + "epoch": 0.02, + "learning_rate": 4.993611220306807e-05, + "loss": 0.9967, + "step": 22859 + }, + { + "epoch": 0.02, + "learning_rate": 4.993610659104886e-05, + "loss": 1.2314, + "step": 22860 + }, + { + "epoch": 0.02, + "learning_rate": 4.993610097878349e-05, + "loss": 1.1125, + "step": 22861 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936095366271965e-05, + "loss": 1.1133, + "step": 22862 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936089753514284e-05, + "loss": 1.0877, + "step": 22863 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936084140510434e-05, + "loss": 1.1586, + "step": 22864 + }, + { + "epoch": 0.02, + "learning_rate": 4.993607852726043e-05, + "loss": 0.9032, + "step": 22865 + }, + { + "epoch": 0.02, + "learning_rate": 4.993607291376428e-05, + "loss": 1.2117, + "step": 22866 + }, + { + "epoch": 0.02, + "learning_rate": 4.993606730002195e-05, + "loss": 1.1376, + "step": 22867 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936061686033476e-05, + "loss": 1.1882, + "step": 22868 + }, + { + "epoch": 0.02, + "learning_rate": 4.993605607179884e-05, + "loss": 1.163, + "step": 22869 + }, + { + "epoch": 0.02, + "learning_rate": 4.993605045731804e-05, + "loss": 1.1174, + "step": 22870 + }, + { + "epoch": 0.02, + "learning_rate": 4.993604484259109e-05, + "loss": 0.9193, + "step": 22871 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936039227617984e-05, + "loss": 1.3452, + "step": 22872 + }, + { + "epoch": 0.02, + "learning_rate": 4.993603361239872e-05, + "loss": 1.2845, + "step": 22873 + }, + { + "epoch": 0.02, + "learning_rate": 4.993602799693329e-05, + "loss": 1.1429, + "step": 22874 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936022381221704e-05, + "loss": 0.2373, + "step": 22875 + }, + { + "epoch": 0.02, + "learning_rate": 4.993601676526396e-05, + "loss": 0.1727, + "step": 22876 + }, + { + "epoch": 0.02, + "learning_rate": 4.993601114906006e-05, + "loss": 0.146, + "step": 22877 + }, + { + "epoch": 0.02, + "learning_rate": 4.9936005532610005e-05, + "loss": 0.1212, + "step": 22878 + }, + { + "epoch": 0.02, + "learning_rate": 4.993599991591379e-05, + "loss": 0.137, + "step": 22879 + }, + { + "epoch": 0.02, + "learning_rate": 4.993599429897141e-05, + "loss": 0.0669, + "step": 22880 + }, + { + "epoch": 0.02, + "learning_rate": 4.993598868178289e-05, + "loss": 0.1667, + "step": 22881 + }, + { + "epoch": 0.02, + "learning_rate": 4.993598306434819e-05, + "loss": 0.9938, + "step": 22882 + }, + { + "epoch": 0.02, + "learning_rate": 4.993597744666735e-05, + "loss": 0.9944, + "step": 22883 + }, + { + "epoch": 0.02, + "learning_rate": 4.993597182874035e-05, + "loss": 1.2144, + "step": 22884 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935966210567184e-05, + "loss": 1.0508, + "step": 22885 + }, + { + "epoch": 0.02, + "learning_rate": 4.993596059214786e-05, + "loss": 0.8657, + "step": 22886 + }, + { + "epoch": 0.02, + "learning_rate": 4.993595497348239e-05, + "loss": 0.9969, + "step": 22887 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935949354570755e-05, + "loss": 1.3994, + "step": 22888 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935943735412967e-05, + "loss": 1.4817, + "step": 22889 + }, + { + "epoch": 0.02, + "learning_rate": 4.993593811600902e-05, + "loss": 1.0266, + "step": 22890 + }, + { + "epoch": 0.02, + "learning_rate": 4.993593249635892e-05, + "loss": 1.5283, + "step": 22891 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935926876462655e-05, + "loss": 0.8331, + "step": 22892 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935921256320235e-05, + "loss": 1.2384, + "step": 22893 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935915635931654e-05, + "loss": 1.339, + "step": 22894 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935910015296925e-05, + "loss": 0.8456, + "step": 22895 + }, + { + "epoch": 0.02, + "learning_rate": 4.993590439441604e-05, + "loss": 1.1507, + "step": 22896 + }, + { + "epoch": 0.02, + "learning_rate": 4.993589877328899e-05, + "loss": 1.1215, + "step": 22897 + }, + { + "epoch": 0.02, + "learning_rate": 4.993589315191579e-05, + "loss": 1.2436, + "step": 22898 + }, + { + "epoch": 0.02, + "learning_rate": 4.993588753029643e-05, + "loss": 1.1305, + "step": 22899 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935881908430915e-05, + "loss": 1.0648, + "step": 22900 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935876286319246e-05, + "loss": 1.356, + "step": 22901 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935870663961414e-05, + "loss": 1.2557, + "step": 22902 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935865041357436e-05, + "loss": 0.9333, + "step": 22903 + }, + { + "epoch": 0.02, + "learning_rate": 4.993585941850729e-05, + "loss": 1.1992, + "step": 22904 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935853795410994e-05, + "loss": 0.7205, + "step": 22905 + }, + { + "epoch": 0.02, + "learning_rate": 4.993584817206853e-05, + "loss": 0.6744, + "step": 22906 + }, + { + "epoch": 0.02, + "learning_rate": 4.993584254847993e-05, + "loss": 0.9376, + "step": 22907 + }, + { + "epoch": 0.02, + "learning_rate": 4.993583692464516e-05, + "loss": 1.2699, + "step": 22908 + }, + { + "epoch": 0.02, + "learning_rate": 4.993583130056424e-05, + "loss": 1.6732, + "step": 22909 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935825676237155e-05, + "loss": 1.2692, + "step": 22910 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935820051663926e-05, + "loss": 1.1482, + "step": 22911 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935814426844536e-05, + "loss": 1.1489, + "step": 22912 + }, + { + "epoch": 0.02, + "learning_rate": 4.993580880177899e-05, + "loss": 0.9965, + "step": 22913 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935803176467285e-05, + "loss": 1.0552, + "step": 22914 + }, + { + "epoch": 0.02, + "learning_rate": 4.993579755090943e-05, + "loss": 1.7842, + "step": 22915 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935791925105417e-05, + "loss": 1.0552, + "step": 22916 + }, + { + "epoch": 0.02, + "learning_rate": 4.993578629905525e-05, + "loss": 0.9522, + "step": 22917 + }, + { + "epoch": 0.02, + "learning_rate": 4.993578067275892e-05, + "loss": 1.0008, + "step": 22918 + }, + { + "epoch": 0.02, + "learning_rate": 4.993577504621645e-05, + "loss": 1.16, + "step": 22919 + }, + { + "epoch": 0.02, + "learning_rate": 4.993576941942781e-05, + "loss": 1.0426, + "step": 22920 + }, + { + "epoch": 0.02, + "learning_rate": 4.993576379239302e-05, + "loss": 0.9916, + "step": 22921 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935758165112074e-05, + "loss": 0.9738, + "step": 22922 + }, + { + "epoch": 0.02, + "learning_rate": 4.993575253758497e-05, + "loss": 0.9987, + "step": 22923 + }, + { + "epoch": 0.02, + "learning_rate": 4.993574690981171e-05, + "loss": 1.1087, + "step": 22924 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935741281792306e-05, + "loss": 1.1899, + "step": 22925 + }, + { + "epoch": 0.02, + "learning_rate": 4.993573565352674e-05, + "loss": 0.8281, + "step": 22926 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935730025015016e-05, + "loss": 1.1294, + "step": 22927 + }, + { + "epoch": 0.02, + "learning_rate": 4.993572439625713e-05, + "loss": 1.0034, + "step": 22928 + }, + { + "epoch": 0.02, + "learning_rate": 4.99357187672531e-05, + "loss": 0.8759, + "step": 22929 + }, + { + "epoch": 0.02, + "learning_rate": 4.993571313800292e-05, + "loss": 1.1925, + "step": 22930 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935707508506577e-05, + "loss": 1.0938, + "step": 22931 + }, + { + "epoch": 0.02, + "learning_rate": 4.993570187876408e-05, + "loss": 1.0943, + "step": 22932 + }, + { + "epoch": 0.02, + "learning_rate": 4.993569624877543e-05, + "loss": 1.1507, + "step": 22933 + }, + { + "epoch": 0.02, + "learning_rate": 4.993569061854062e-05, + "loss": 1.1888, + "step": 22934 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935684988059664e-05, + "loss": 1.1569, + "step": 22935 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935679357332544e-05, + "loss": 1.0678, + "step": 22936 + }, + { + "epoch": 0.02, + "learning_rate": 4.993567372635928e-05, + "loss": 1.1952, + "step": 22937 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935668095139855e-05, + "loss": 0.9247, + "step": 22938 + }, + { + "epoch": 0.02, + "learning_rate": 4.993566246367427e-05, + "loss": 0.9171, + "step": 22939 + }, + { + "epoch": 0.02, + "learning_rate": 4.993565683196254e-05, + "loss": 1.0093, + "step": 22940 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935651200004655e-05, + "loss": 1.2875, + "step": 22941 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935645567800615e-05, + "loss": 0.9603, + "step": 22942 + }, + { + "epoch": 0.02, + "learning_rate": 4.993563993535042e-05, + "loss": 0.858, + "step": 22943 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935634302654064e-05, + "loss": 0.7886, + "step": 22944 + }, + { + "epoch": 0.02, + "learning_rate": 4.993562866971157e-05, + "loss": 0.9116, + "step": 22945 + }, + { + "epoch": 0.02, + "learning_rate": 4.99356230365229e-05, + "loss": 0.8696, + "step": 22946 + }, + { + "epoch": 0.02, + "learning_rate": 4.993561740308809e-05, + "loss": 1.2192, + "step": 22947 + }, + { + "epoch": 0.02, + "learning_rate": 4.993561176940713e-05, + "loss": 1.1909, + "step": 22948 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935606135480006e-05, + "loss": 1.1797, + "step": 22949 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935600501306736e-05, + "loss": 1.1113, + "step": 22950 + }, + { + "epoch": 0.02, + "learning_rate": 4.993559486688731e-05, + "loss": 1.0455, + "step": 22951 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935589232221726e-05, + "loss": 0.9476, + "step": 22952 + }, + { + "epoch": 0.02, + "learning_rate": 4.993558359730999e-05, + "loss": 1.0316, + "step": 22953 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935577962152105e-05, + "loss": 0.9869, + "step": 22954 + }, + { + "epoch": 0.02, + "learning_rate": 4.993557232674806e-05, + "loss": 1.4799, + "step": 22955 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935566691097865e-05, + "loss": 0.3023, + "step": 22956 + }, + { + "epoch": 0.02, + "learning_rate": 4.993556105520152e-05, + "loss": 0.2833, + "step": 22957 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935555419059014e-05, + "loss": 0.8165, + "step": 22958 + }, + { + "epoch": 0.02, + "learning_rate": 4.993554978267037e-05, + "loss": 1.3387, + "step": 22959 + }, + { + "epoch": 0.02, + "learning_rate": 4.993554414603555e-05, + "loss": 0.8779, + "step": 22960 + }, + { + "epoch": 0.02, + "learning_rate": 4.993553850915459e-05, + "loss": 1.1595, + "step": 22961 + }, + { + "epoch": 0.02, + "learning_rate": 4.993553287202748e-05, + "loss": 1.0014, + "step": 22962 + }, + { + "epoch": 0.02, + "learning_rate": 4.993552723465421e-05, + "loss": 1.1151, + "step": 22963 + }, + { + "epoch": 0.02, + "learning_rate": 4.993552159703479e-05, + "loss": 1.195, + "step": 22964 + }, + { + "epoch": 0.02, + "learning_rate": 4.993551595916922e-05, + "loss": 1.0252, + "step": 22965 + }, + { + "epoch": 0.02, + "learning_rate": 4.993551032105749e-05, + "loss": 0.8012, + "step": 22966 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935504682699605e-05, + "loss": 1.1935, + "step": 22967 + }, + { + "epoch": 0.02, + "learning_rate": 4.993549904409558e-05, + "loss": 1.0364, + "step": 22968 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935493405245394e-05, + "loss": 1.1491, + "step": 22969 + }, + { + "epoch": 0.02, + "learning_rate": 4.993548776614906e-05, + "loss": 0.9459, + "step": 22970 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935482126806566e-05, + "loss": 1.0603, + "step": 22971 + }, + { + "epoch": 0.02, + "learning_rate": 4.993547648721792e-05, + "loss": 0.987, + "step": 22972 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935470847383125e-05, + "loss": 0.8059, + "step": 22973 + }, + { + "epoch": 0.02, + "learning_rate": 4.993546520730218e-05, + "loss": 1.0326, + "step": 22974 + }, + { + "epoch": 0.02, + "learning_rate": 4.993545956697508e-05, + "loss": 0.9547, + "step": 22975 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935453926401826e-05, + "loss": 0.4794, + "step": 22976 + }, + { + "epoch": 0.02, + "learning_rate": 4.993544828558242e-05, + "loss": 0.5014, + "step": 22977 + }, + { + "epoch": 0.02, + "learning_rate": 4.993544264451686e-05, + "loss": 0.3616, + "step": 22978 + }, + { + "epoch": 0.02, + "learning_rate": 4.993543700320516e-05, + "loss": 0.4725, + "step": 22979 + }, + { + "epoch": 0.02, + "learning_rate": 4.993543136164729e-05, + "loss": 1.3509, + "step": 22980 + }, + { + "epoch": 0.02, + "learning_rate": 4.993542571984328e-05, + "loss": 1.0847, + "step": 22981 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935420077793116e-05, + "loss": 1.2071, + "step": 22982 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935414435496794e-05, + "loss": 0.9628, + "step": 22983 + }, + { + "epoch": 0.02, + "learning_rate": 4.993540879295433e-05, + "loss": 0.97, + "step": 22984 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935403150165705e-05, + "loss": 1.2847, + "step": 22985 + }, + { + "epoch": 0.02, + "learning_rate": 4.993539750713093e-05, + "loss": 0.9338, + "step": 22986 + }, + { + "epoch": 0.02, + "learning_rate": 4.993539186385001e-05, + "loss": 1.0953, + "step": 22987 + }, + { + "epoch": 0.02, + "learning_rate": 4.993538622032293e-05, + "loss": 0.8373, + "step": 22988 + }, + { + "epoch": 0.02, + "learning_rate": 4.993538057654971e-05, + "loss": 1.5436, + "step": 22989 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935374932530324e-05, + "loss": 0.91, + "step": 22990 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935369288264785e-05, + "loss": 0.6649, + "step": 22991 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935363643753105e-05, + "loss": 1.2547, + "step": 22992 + }, + { + "epoch": 0.02, + "learning_rate": 4.993535799899527e-05, + "loss": 1.1787, + "step": 22993 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935352353991296e-05, + "loss": 1.0793, + "step": 22994 + }, + { + "epoch": 0.02, + "learning_rate": 4.993534670874115e-05, + "loss": 0.9317, + "step": 22995 + }, + { + "epoch": 0.02, + "learning_rate": 4.993534106324487e-05, + "loss": 1.3777, + "step": 22996 + }, + { + "epoch": 0.02, + "learning_rate": 4.993533541750243e-05, + "loss": 1.0168, + "step": 22997 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935329771513836e-05, + "loss": 1.1855, + "step": 22998 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935324125279095e-05, + "loss": 1.1221, + "step": 22999 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935318478798206e-05, + "loss": 1.1693, + "step": 23000 + }, + { + "epoch": 0.02, + "eval_loss": 1.0269695520401, + "eval_runtime": 85.8819, + "eval_samples_per_second": 16.127, + "eval_steps_per_second": 4.04, + "step": 23000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935312832071156e-05, + "loss": 1.469, + "step": 23001 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935307185097965e-05, + "loss": 1.1519, + "step": 23002 + }, + { + "epoch": 0.02, + "learning_rate": 4.993530153787862e-05, + "loss": 1.0409, + "step": 23003 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935295890413125e-05, + "loss": 1.6231, + "step": 23004 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935290242701484e-05, + "loss": 1.2493, + "step": 23005 + }, + { + "epoch": 0.02, + "learning_rate": 4.993528459474368e-05, + "loss": 0.8139, + "step": 23006 + }, + { + "epoch": 0.02, + "learning_rate": 4.993527894653973e-05, + "loss": 0.7774, + "step": 23007 + }, + { + "epoch": 0.02, + "learning_rate": 4.993527329808963e-05, + "loss": 0.6831, + "step": 23008 + }, + { + "epoch": 0.02, + "learning_rate": 4.993526764939339e-05, + "loss": 0.682, + "step": 23009 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935262000450986e-05, + "loss": 0.581, + "step": 23010 + }, + { + "epoch": 0.02, + "learning_rate": 4.993525635126244e-05, + "loss": 0.9875, + "step": 23011 + }, + { + "epoch": 0.02, + "learning_rate": 4.993525070182774e-05, + "loss": 1.0447, + "step": 23012 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935245052146884e-05, + "loss": 1.0847, + "step": 23013 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935239402219886e-05, + "loss": 0.8432, + "step": 23014 + }, + { + "epoch": 0.02, + "learning_rate": 4.993523375204674e-05, + "loss": 0.9486, + "step": 23015 + }, + { + "epoch": 0.02, + "learning_rate": 4.993522810162743e-05, + "loss": 0.9912, + "step": 23016 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935222450961984e-05, + "loss": 0.823, + "step": 23017 + }, + { + "epoch": 0.02, + "learning_rate": 4.993521680005038e-05, + "loss": 1.1972, + "step": 23018 + }, + { + "epoch": 0.02, + "learning_rate": 4.993521114889264e-05, + "loss": 1.4042, + "step": 23019 + }, + { + "epoch": 0.02, + "learning_rate": 4.993520549748874e-05, + "loss": 0.5818, + "step": 23020 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935199845838685e-05, + "loss": 0.9654, + "step": 23021 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935194193942484e-05, + "loss": 0.8589, + "step": 23022 + }, + { + "epoch": 0.02, + "learning_rate": 4.993518854180013e-05, + "loss": 1.1889, + "step": 23023 + }, + { + "epoch": 0.02, + "learning_rate": 4.993518288941163e-05, + "loss": 0.835, + "step": 23024 + }, + { + "epoch": 0.02, + "learning_rate": 4.993517723677698e-05, + "loss": 1.0985, + "step": 23025 + }, + { + "epoch": 0.02, + "learning_rate": 4.993517158389618e-05, + "loss": 1.3575, + "step": 23026 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935165930769235e-05, + "loss": 2.0939, + "step": 23027 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935160277396134e-05, + "loss": 1.0884, + "step": 23028 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935154623776885e-05, + "loss": 1.0751, + "step": 23029 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935148969911495e-05, + "loss": 1.0979, + "step": 23030 + }, + { + "epoch": 0.02, + "learning_rate": 4.993514331579995e-05, + "loss": 0.9313, + "step": 23031 + }, + { + "epoch": 0.02, + "learning_rate": 4.993513766144225e-05, + "loss": 1.0359, + "step": 23032 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935132006838405e-05, + "loss": 1.0045, + "step": 23033 + }, + { + "epoch": 0.02, + "learning_rate": 4.993512635198842e-05, + "loss": 1.1111, + "step": 23034 + }, + { + "epoch": 0.02, + "learning_rate": 4.993512069689227e-05, + "loss": 0.901, + "step": 23035 + }, + { + "epoch": 0.02, + "learning_rate": 4.993511504154997e-05, + "loss": 1.2885, + "step": 23036 + }, + { + "epoch": 0.02, + "learning_rate": 4.993510938596154e-05, + "loss": 1.2137, + "step": 23037 + }, + { + "epoch": 0.02, + "learning_rate": 4.993510373012695e-05, + "loss": 1.1166, + "step": 23038 + }, + { + "epoch": 0.02, + "learning_rate": 4.993509807404621e-05, + "loss": 1.1621, + "step": 23039 + }, + { + "epoch": 0.02, + "learning_rate": 4.993509241771932e-05, + "loss": 0.9863, + "step": 23040 + }, + { + "epoch": 0.02, + "learning_rate": 4.993508676114629e-05, + "loss": 1.0108, + "step": 23041 + }, + { + "epoch": 0.02, + "learning_rate": 4.99350811043271e-05, + "loss": 0.8774, + "step": 23042 + }, + { + "epoch": 0.02, + "learning_rate": 4.993507544726177e-05, + "loss": 1.2331, + "step": 23043 + }, + { + "epoch": 0.02, + "learning_rate": 4.993506978995028e-05, + "loss": 0.9145, + "step": 23044 + }, + { + "epoch": 0.02, + "learning_rate": 4.993506413239266e-05, + "loss": 1.0575, + "step": 23045 + }, + { + "epoch": 0.02, + "learning_rate": 4.993505847458888e-05, + "loss": 1.2772, + "step": 23046 + }, + { + "epoch": 0.02, + "learning_rate": 4.993505281653895e-05, + "loss": 0.9245, + "step": 23047 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935047158242875e-05, + "loss": 1.065, + "step": 23048 + }, + { + "epoch": 0.02, + "learning_rate": 4.993504149970065e-05, + "loss": 0.9264, + "step": 23049 + }, + { + "epoch": 0.02, + "learning_rate": 4.993503584091228e-05, + "loss": 0.9454, + "step": 23050 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935030181877765e-05, + "loss": 0.9317, + "step": 23051 + }, + { + "epoch": 0.02, + "learning_rate": 4.99350245225971e-05, + "loss": 0.9128, + "step": 23052 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935018863070274e-05, + "loss": 0.928, + "step": 23053 + }, + { + "epoch": 0.02, + "learning_rate": 4.993501320329731e-05, + "loss": 0.8554, + "step": 23054 + }, + { + "epoch": 0.02, + "learning_rate": 4.99350075432782e-05, + "loss": 0.8706, + "step": 23055 + }, + { + "epoch": 0.02, + "learning_rate": 4.993500188301295e-05, + "loss": 0.9101, + "step": 23056 + }, + { + "epoch": 0.02, + "learning_rate": 4.993499622250153e-05, + "loss": 0.9994, + "step": 23057 + }, + { + "epoch": 0.02, + "learning_rate": 4.993499056174398e-05, + "loss": 1.0442, + "step": 23058 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934984900740276e-05, + "loss": 1.1151, + "step": 23059 + }, + { + "epoch": 0.02, + "learning_rate": 4.993497923949042e-05, + "loss": 1.0228, + "step": 23060 + }, + { + "epoch": 0.02, + "learning_rate": 4.993497357799443e-05, + "loss": 0.9086, + "step": 23061 + }, + { + "epoch": 0.02, + "learning_rate": 4.993496791625228e-05, + "loss": 1.0301, + "step": 23062 + }, + { + "epoch": 0.02, + "learning_rate": 4.993496225426398e-05, + "loss": 1.2314, + "step": 23063 + }, + { + "epoch": 0.02, + "learning_rate": 4.993495659202955e-05, + "loss": 1.4393, + "step": 23064 + }, + { + "epoch": 0.02, + "learning_rate": 4.993495092954896e-05, + "loss": 1.0543, + "step": 23065 + }, + { + "epoch": 0.02, + "learning_rate": 4.993494526682222e-05, + "loss": 0.4764, + "step": 23066 + }, + { + "epoch": 0.02, + "learning_rate": 4.993493960384934e-05, + "loss": 0.3206, + "step": 23067 + }, + { + "epoch": 0.02, + "learning_rate": 4.993493394063031e-05, + "loss": 0.2417, + "step": 23068 + }, + { + "epoch": 0.02, + "learning_rate": 4.993492827716513e-05, + "loss": 0.2595, + "step": 23069 + }, + { + "epoch": 0.02, + "learning_rate": 4.99349226134538e-05, + "loss": 0.114, + "step": 23070 + }, + { + "epoch": 0.02, + "learning_rate": 4.993491694949633e-05, + "loss": 0.0979, + "step": 23071 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934911285292716e-05, + "loss": 0.1303, + "step": 23072 + }, + { + "epoch": 0.02, + "learning_rate": 4.993490562084295e-05, + "loss": 0.2053, + "step": 23073 + }, + { + "epoch": 0.02, + "learning_rate": 4.993489995614704e-05, + "loss": 0.4402, + "step": 23074 + }, + { + "epoch": 0.02, + "learning_rate": 4.993489429120498e-05, + "loss": 0.9164, + "step": 23075 + }, + { + "epoch": 0.02, + "learning_rate": 4.993488862601677e-05, + "loss": 1.292, + "step": 23076 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934882960582416e-05, + "loss": 1.2109, + "step": 23077 + }, + { + "epoch": 0.02, + "learning_rate": 4.993487729490192e-05, + "loss": 1.0574, + "step": 23078 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934871628975275e-05, + "loss": 1.1563, + "step": 23079 + }, + { + "epoch": 0.02, + "learning_rate": 4.993486596280248e-05, + "loss": 0.9788, + "step": 23080 + }, + { + "epoch": 0.02, + "learning_rate": 4.993486029638354e-05, + "loss": 1.0671, + "step": 23081 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934854629718455e-05, + "loss": 1.2214, + "step": 23082 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934848962807226e-05, + "loss": 1.1954, + "step": 23083 + }, + { + "epoch": 0.02, + "learning_rate": 4.993484329564985e-05, + "loss": 1.3354, + "step": 23084 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934837628246325e-05, + "loss": 1.1561, + "step": 23085 + }, + { + "epoch": 0.02, + "learning_rate": 4.993483196059665e-05, + "loss": 1.3062, + "step": 23086 + }, + { + "epoch": 0.02, + "learning_rate": 4.993482629270084e-05, + "loss": 1.6879, + "step": 23087 + }, + { + "epoch": 0.02, + "learning_rate": 4.993482062455887e-05, + "loss": 1.6889, + "step": 23088 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934814956170764e-05, + "loss": 1.0667, + "step": 23089 + }, + { + "epoch": 0.02, + "learning_rate": 4.993480928753651e-05, + "loss": 1.0738, + "step": 23090 + }, + { + "epoch": 0.02, + "learning_rate": 4.993480361865611e-05, + "loss": 1.0413, + "step": 23091 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934797949529565e-05, + "loss": 0.9875, + "step": 23092 + }, + { + "epoch": 0.02, + "learning_rate": 4.993479228015687e-05, + "loss": 1.1007, + "step": 23093 + }, + { + "epoch": 0.02, + "learning_rate": 4.993478661053803e-05, + "loss": 1.0273, + "step": 23094 + }, + { + "epoch": 0.02, + "learning_rate": 4.993478094067305e-05, + "loss": 1.1697, + "step": 23095 + }, + { + "epoch": 0.02, + "learning_rate": 4.993477527056192e-05, + "loss": 1.8186, + "step": 23096 + }, + { + "epoch": 0.02, + "learning_rate": 4.993476960020464e-05, + "loss": 1.269, + "step": 23097 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934763929601225e-05, + "loss": 1.3217, + "step": 23098 + }, + { + "epoch": 0.02, + "learning_rate": 4.993475825875166e-05, + "loss": 0.7491, + "step": 23099 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934752587655945e-05, + "loss": 1.1563, + "step": 23100 + }, + { + "epoch": 0.02, + "learning_rate": 4.993474691631409e-05, + "loss": 1.6386, + "step": 23101 + }, + { + "epoch": 0.02, + "learning_rate": 4.993474124472609e-05, + "loss": 1.1117, + "step": 23102 + }, + { + "epoch": 0.02, + "learning_rate": 4.993473557289194e-05, + "loss": 1.1203, + "step": 23103 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934729900811646e-05, + "loss": 0.688, + "step": 23104 + }, + { + "epoch": 0.02, + "learning_rate": 4.993472422848521e-05, + "loss": 1.1657, + "step": 23105 + }, + { + "epoch": 0.02, + "learning_rate": 4.993471855591263e-05, + "loss": 0.8521, + "step": 23106 + }, + { + "epoch": 0.02, + "learning_rate": 4.993471288309389e-05, + "loss": 0.8462, + "step": 23107 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934707210029016e-05, + "loss": 1.0982, + "step": 23108 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934701536718006e-05, + "loss": 0.8981, + "step": 23109 + }, + { + "epoch": 0.02, + "learning_rate": 4.993469586316084e-05, + "loss": 1.0856, + "step": 23110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934690189357537e-05, + "loss": 0.9739, + "step": 23111 + }, + { + "epoch": 0.02, + "learning_rate": 4.993468451530808e-05, + "loss": 1.2552, + "step": 23112 + }, + { + "epoch": 0.02, + "learning_rate": 4.993467884101248e-05, + "loss": 0.7775, + "step": 23113 + }, + { + "epoch": 0.02, + "learning_rate": 4.993467316647075e-05, + "loss": 1.0261, + "step": 23114 + }, + { + "epoch": 0.02, + "learning_rate": 4.993466749168286e-05, + "loss": 1.0804, + "step": 23115 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934661816648827e-05, + "loss": 1.0074, + "step": 23116 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934656141368655e-05, + "loss": 0.9904, + "step": 23117 + }, + { + "epoch": 0.02, + "learning_rate": 4.993465046584233e-05, + "loss": 1.0019, + "step": 23118 + }, + { + "epoch": 0.02, + "learning_rate": 4.993464479006987e-05, + "loss": 1.1856, + "step": 23119 + }, + { + "epoch": 0.02, + "learning_rate": 4.993463911405126e-05, + "loss": 1.1158, + "step": 23120 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934633437786504e-05, + "loss": 1.1002, + "step": 23121 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934627761275607e-05, + "loss": 1.1092, + "step": 23122 + }, + { + "epoch": 0.02, + "learning_rate": 4.993462208451857e-05, + "loss": 0.8211, + "step": 23123 + }, + { + "epoch": 0.02, + "learning_rate": 4.993461640751538e-05, + "loss": 0.993, + "step": 23124 + }, + { + "epoch": 0.02, + "learning_rate": 4.993461073026605e-05, + "loss": 0.9857, + "step": 23125 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934605052770574e-05, + "loss": 1.0475, + "step": 23126 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934599375028966e-05, + "loss": 0.6335, + "step": 23127 + }, + { + "epoch": 0.02, + "learning_rate": 4.99345936970412e-05, + "loss": 0.6242, + "step": 23128 + }, + { + "epoch": 0.02, + "learning_rate": 4.99345880188073e-05, + "loss": 0.5313, + "step": 23129 + }, + { + "epoch": 0.02, + "learning_rate": 4.993458234032725e-05, + "loss": 0.9406, + "step": 23130 + }, + { + "epoch": 0.02, + "learning_rate": 4.993457666160106e-05, + "loss": 0.8033, + "step": 23131 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934570982628725e-05, + "loss": 0.88, + "step": 23132 + }, + { + "epoch": 0.02, + "learning_rate": 4.993456530341024e-05, + "loss": 1.0469, + "step": 23133 + }, + { + "epoch": 0.02, + "learning_rate": 4.993455962394562e-05, + "loss": 0.7635, + "step": 23134 + }, + { + "epoch": 0.02, + "learning_rate": 4.993455394423485e-05, + "loss": 1.0965, + "step": 23135 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934548264277945e-05, + "loss": 1.0838, + "step": 23136 + }, + { + "epoch": 0.02, + "learning_rate": 4.993454258407489e-05, + "loss": 1.1403, + "step": 23137 + }, + { + "epoch": 0.02, + "learning_rate": 4.99345369036257e-05, + "loss": 1.1377, + "step": 23138 + }, + { + "epoch": 0.02, + "learning_rate": 4.993453122293036e-05, + "loss": 1.1707, + "step": 23139 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934525541988874e-05, + "loss": 0.9139, + "step": 23140 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934519860801244e-05, + "loss": 1.2194, + "step": 23141 + }, + { + "epoch": 0.02, + "learning_rate": 4.993451417936747e-05, + "loss": 1.0025, + "step": 23142 + }, + { + "epoch": 0.02, + "learning_rate": 4.993450849768757e-05, + "loss": 1.1549, + "step": 23143 + }, + { + "epoch": 0.02, + "learning_rate": 4.993450281576151e-05, + "loss": 1.1303, + "step": 23144 + }, + { + "epoch": 0.02, + "learning_rate": 4.993449713358931e-05, + "loss": 1.0568, + "step": 23145 + }, + { + "epoch": 0.02, + "learning_rate": 4.993449145117097e-05, + "loss": 0.9952, + "step": 23146 + }, + { + "epoch": 0.02, + "learning_rate": 4.993448576850649e-05, + "loss": 1.2394, + "step": 23147 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934480085595864e-05, + "loss": 0.9275, + "step": 23148 + }, + { + "epoch": 0.02, + "learning_rate": 4.993447440243909e-05, + "loss": 0.9323, + "step": 23149 + }, + { + "epoch": 0.02, + "learning_rate": 4.993446871903618e-05, + "loss": 1.0981, + "step": 23150 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934463035387125e-05, + "loss": 1.2442, + "step": 23151 + }, + { + "epoch": 0.02, + "learning_rate": 4.993445735149193e-05, + "loss": 1.382, + "step": 23152 + }, + { + "epoch": 0.02, + "learning_rate": 4.993445166735059e-05, + "loss": 1.5269, + "step": 23153 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934445982963105e-05, + "loss": 1.1347, + "step": 23154 + }, + { + "epoch": 0.02, + "learning_rate": 4.993444029832949e-05, + "loss": 1.1598, + "step": 23155 + }, + { + "epoch": 0.02, + "learning_rate": 4.993443461344972e-05, + "loss": 0.8793, + "step": 23156 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934428928323815e-05, + "loss": 1.562, + "step": 23157 + }, + { + "epoch": 0.02, + "learning_rate": 4.993442324295176e-05, + "loss": 1.9111, + "step": 23158 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934417557333565e-05, + "loss": 1.5534, + "step": 23159 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934411871469235e-05, + "loss": 0.8926, + "step": 23160 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934406185358765e-05, + "loss": 1.1984, + "step": 23161 + }, + { + "epoch": 0.02, + "learning_rate": 4.993440049900214e-05, + "loss": 1.3421, + "step": 23162 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934394812399374e-05, + "loss": 1.1659, + "step": 23163 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934389125550473e-05, + "loss": 1.0761, + "step": 23164 + }, + { + "epoch": 0.02, + "learning_rate": 4.993438343845543e-05, + "loss": 0.7331, + "step": 23165 + }, + { + "epoch": 0.02, + "learning_rate": 4.993437775111425e-05, + "loss": 0.8914, + "step": 23166 + }, + { + "epoch": 0.02, + "learning_rate": 4.993437206352692e-05, + "loss": 1.5132, + "step": 23167 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934366375693455e-05, + "loss": 0.9486, + "step": 23168 + }, + { + "epoch": 0.02, + "learning_rate": 4.993436068761384e-05, + "loss": 0.9884, + "step": 23169 + }, + { + "epoch": 0.02, + "learning_rate": 4.993435499928809e-05, + "loss": 0.9348, + "step": 23170 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934349310716196e-05, + "loss": 0.9606, + "step": 23171 + }, + { + "epoch": 0.02, + "learning_rate": 4.993434362189816e-05, + "loss": 0.6987, + "step": 23172 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934337932833985e-05, + "loss": 0.9939, + "step": 23173 + }, + { + "epoch": 0.02, + "learning_rate": 4.993433224352366e-05, + "loss": 0.8377, + "step": 23174 + }, + { + "epoch": 0.02, + "learning_rate": 4.99343265539672e-05, + "loss": 1.4567, + "step": 23175 + }, + { + "epoch": 0.02, + "learning_rate": 4.993432086416461e-05, + "loss": 0.8906, + "step": 23176 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934315174115864e-05, + "loss": 1.0628, + "step": 23177 + }, + { + "epoch": 0.02, + "learning_rate": 4.993430948382098e-05, + "loss": 1.4498, + "step": 23178 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934303793279954e-05, + "loss": 1.0737, + "step": 23179 + }, + { + "epoch": 0.02, + "learning_rate": 4.993429810249279e-05, + "loss": 1.1439, + "step": 23180 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934292411459494e-05, + "loss": 1.1084, + "step": 23181 + }, + { + "epoch": 0.02, + "learning_rate": 4.993428672018005e-05, + "loss": 1.1309, + "step": 23182 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934281028654456e-05, + "loss": 0.9647, + "step": 23183 + }, + { + "epoch": 0.02, + "learning_rate": 4.993427533688273e-05, + "loss": 1.1088, + "step": 23184 + }, + { + "epoch": 0.02, + "learning_rate": 4.993426964486486e-05, + "loss": 1.019, + "step": 23185 + }, + { + "epoch": 0.02, + "learning_rate": 4.993426395260086e-05, + "loss": 1.1446, + "step": 23186 + }, + { + "epoch": 0.02, + "learning_rate": 4.993425826009071e-05, + "loss": 1.3041, + "step": 23187 + }, + { + "epoch": 0.02, + "learning_rate": 4.993425256733442e-05, + "loss": 1.0589, + "step": 23188 + }, + { + "epoch": 0.02, + "learning_rate": 4.993424687433198e-05, + "loss": 1.2708, + "step": 23189 + }, + { + "epoch": 0.02, + "learning_rate": 4.993424118108341e-05, + "loss": 1.3329, + "step": 23190 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934235487588705e-05, + "loss": 1.463, + "step": 23191 + }, + { + "epoch": 0.02, + "learning_rate": 4.993422979384785e-05, + "loss": 0.9503, + "step": 23192 + }, + { + "epoch": 0.02, + "learning_rate": 4.993422409986086e-05, + "loss": 1.402, + "step": 23193 + }, + { + "epoch": 0.02, + "learning_rate": 4.993421840562773e-05, + "loss": 1.1898, + "step": 23194 + }, + { + "epoch": 0.02, + "learning_rate": 4.993421271114845e-05, + "loss": 1.3067, + "step": 23195 + }, + { + "epoch": 0.02, + "learning_rate": 4.993420701642305e-05, + "loss": 0.8574, + "step": 23196 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934201321451494e-05, + "loss": 1.1057, + "step": 23197 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934195626233806e-05, + "loss": 1.0861, + "step": 23198 + }, + { + "epoch": 0.02, + "learning_rate": 4.993418993076997e-05, + "loss": 1.071, + "step": 23199 + }, + { + "epoch": 0.02, + "learning_rate": 4.993418423506e-05, + "loss": 1.055, + "step": 23200 + }, + { + "epoch": 0.02, + "learning_rate": 4.993417853910389e-05, + "loss": 0.877, + "step": 23201 + }, + { + "epoch": 0.02, + "learning_rate": 4.993417284290164e-05, + "loss": 1.2662, + "step": 23202 + }, + { + "epoch": 0.02, + "learning_rate": 4.993416714645325e-05, + "loss": 0.9654, + "step": 23203 + }, + { + "epoch": 0.02, + "learning_rate": 4.993416144975872e-05, + "loss": 0.9656, + "step": 23204 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934155752818046e-05, + "loss": 0.8111, + "step": 23205 + }, + { + "epoch": 0.02, + "learning_rate": 4.993415005563124e-05, + "loss": 1.1934, + "step": 23206 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934144358198294e-05, + "loss": 1.1579, + "step": 23207 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934138660519203e-05, + "loss": 1.1547, + "step": 23208 + }, + { + "epoch": 0.02, + "learning_rate": 4.993413296259398e-05, + "loss": 1.155, + "step": 23209 + }, + { + "epoch": 0.02, + "learning_rate": 4.993412726442261e-05, + "loss": 1.1375, + "step": 23210 + }, + { + "epoch": 0.02, + "learning_rate": 4.993412156600511e-05, + "loss": 1.2495, + "step": 23211 + }, + { + "epoch": 0.02, + "learning_rate": 4.993411586734146e-05, + "loss": 1.0842, + "step": 23212 + }, + { + "epoch": 0.02, + "learning_rate": 4.993411016843168e-05, + "loss": 1.1401, + "step": 23213 + }, + { + "epoch": 0.02, + "learning_rate": 4.993410446927575e-05, + "loss": 1.3073, + "step": 23214 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934098769873695e-05, + "loss": 1.1376, + "step": 23215 + }, + { + "epoch": 0.02, + "learning_rate": 4.99340930702255e-05, + "loss": 0.9504, + "step": 23216 + }, + { + "epoch": 0.02, + "learning_rate": 4.993408737033115e-05, + "loss": 1.1906, + "step": 23217 + }, + { + "epoch": 0.02, + "learning_rate": 4.993408167019068e-05, + "loss": 1.2061, + "step": 23218 + }, + { + "epoch": 0.02, + "learning_rate": 4.993407596980406e-05, + "loss": 1.0791, + "step": 23219 + }, + { + "epoch": 0.02, + "learning_rate": 4.993407026917131e-05, + "loss": 1.0531, + "step": 23220 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934064568292404e-05, + "loss": 1.1521, + "step": 23221 + }, + { + "epoch": 0.02, + "learning_rate": 4.993405886716738e-05, + "loss": 1.025, + "step": 23222 + }, + { + "epoch": 0.02, + "learning_rate": 4.99340531657962e-05, + "loss": 1.3181, + "step": 23223 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934047464178895e-05, + "loss": 1.1022, + "step": 23224 + }, + { + "epoch": 0.02, + "learning_rate": 4.993404176231544e-05, + "loss": 0.4759, + "step": 23225 + }, + { + "epoch": 0.02, + "learning_rate": 4.993403606020586e-05, + "loss": 1.0561, + "step": 23226 + }, + { + "epoch": 0.02, + "learning_rate": 4.993403035785014e-05, + "loss": 1.3042, + "step": 23227 + }, + { + "epoch": 0.02, + "learning_rate": 4.993402465524827e-05, + "loss": 1.0135, + "step": 23228 + }, + { + "epoch": 0.02, + "learning_rate": 4.993401895240027e-05, + "loss": 1.0974, + "step": 23229 + }, + { + "epoch": 0.02, + "learning_rate": 4.993401324930613e-05, + "loss": 1.1836, + "step": 23230 + }, + { + "epoch": 0.02, + "learning_rate": 4.993400754596585e-05, + "loss": 1.1168, + "step": 23231 + }, + { + "epoch": 0.02, + "learning_rate": 4.993400184237944e-05, + "loss": 1.0508, + "step": 23232 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933996138546884e-05, + "loss": 1.1428, + "step": 23233 + }, + { + "epoch": 0.02, + "learning_rate": 4.993399043446819e-05, + "loss": 1.2427, + "step": 23234 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933984730143366e-05, + "loss": 0.7216, + "step": 23235 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933979025572395e-05, + "loss": 4.1468, + "step": 23236 + }, + { + "epoch": 0.02, + "learning_rate": 4.993397332075529e-05, + "loss": 5.7936, + "step": 23237 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933967615692044e-05, + "loss": 5.7593, + "step": 23238 + }, + { + "epoch": 0.02, + "learning_rate": 4.993396191038266e-05, + "loss": 5.7522, + "step": 23239 + }, + { + "epoch": 0.02, + "learning_rate": 4.993395620482715e-05, + "loss": 5.2589, + "step": 23240 + }, + { + "epoch": 0.02, + "learning_rate": 4.993395049902549e-05, + "loss": 1.6599, + "step": 23241 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933944792977703e-05, + "loss": 1.0671, + "step": 23242 + }, + { + "epoch": 0.02, + "learning_rate": 4.993393908668377e-05, + "loss": 1.2064, + "step": 23243 + }, + { + "epoch": 0.02, + "learning_rate": 4.993393338014371e-05, + "loss": 1.4545, + "step": 23244 + }, + { + "epoch": 0.02, + "learning_rate": 4.99339276733575e-05, + "loss": 1.3643, + "step": 23245 + }, + { + "epoch": 0.02, + "learning_rate": 4.993392196632516e-05, + "loss": 0.9779, + "step": 23246 + }, + { + "epoch": 0.02, + "learning_rate": 4.993391625904668e-05, + "loss": 0.8743, + "step": 23247 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933910551522065e-05, + "loss": 0.9627, + "step": 23248 + }, + { + "epoch": 0.02, + "learning_rate": 4.993390484375131e-05, + "loss": 1.1667, + "step": 23249 + }, + { + "epoch": 0.02, + "learning_rate": 4.993389913573442e-05, + "loss": 1.6021, + "step": 23250 + }, + { + "epoch": 0.02, + "learning_rate": 4.993389342747139e-05, + "loss": 1.0278, + "step": 23251 + }, + { + "epoch": 0.02, + "learning_rate": 4.993388771896223e-05, + "loss": 1.1331, + "step": 23252 + }, + { + "epoch": 0.02, + "learning_rate": 4.993388201020693e-05, + "loss": 1.1256, + "step": 23253 + }, + { + "epoch": 0.02, + "learning_rate": 4.993387630120549e-05, + "loss": 1.2151, + "step": 23254 + }, + { + "epoch": 0.02, + "learning_rate": 4.993387059195792e-05, + "loss": 0.8404, + "step": 23255 + }, + { + "epoch": 0.02, + "learning_rate": 4.99338648824642e-05, + "loss": 1.0377, + "step": 23256 + }, + { + "epoch": 0.02, + "learning_rate": 4.993385917272436e-05, + "loss": 1.2286, + "step": 23257 + }, + { + "epoch": 0.02, + "learning_rate": 4.993385346273837e-05, + "loss": 0.8818, + "step": 23258 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933847752506254e-05, + "loss": 1.2151, + "step": 23259 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933842042027994e-05, + "loss": 1.0584, + "step": 23260 + }, + { + "epoch": 0.02, + "learning_rate": 4.99338363313036e-05, + "loss": 1.1344, + "step": 23261 + }, + { + "epoch": 0.02, + "learning_rate": 4.993383062033307e-05, + "loss": 0.9592, + "step": 23262 + }, + { + "epoch": 0.02, + "learning_rate": 4.993382490911641e-05, + "loss": 0.8394, + "step": 23263 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933819197653605e-05, + "loss": 0.9241, + "step": 23264 + }, + { + "epoch": 0.02, + "learning_rate": 4.993381348594467e-05, + "loss": 0.9839, + "step": 23265 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933807773989595e-05, + "loss": 0.9413, + "step": 23266 + }, + { + "epoch": 0.02, + "learning_rate": 4.993380206178838e-05, + "loss": 1.6318, + "step": 23267 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933796349341035e-05, + "loss": 5.3795, + "step": 23268 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933790636647546e-05, + "loss": 5.4089, + "step": 23269 + }, + { + "epoch": 0.02, + "learning_rate": 4.993378492370793e-05, + "loss": 5.741, + "step": 23270 + }, + { + "epoch": 0.02, + "learning_rate": 4.993377921052218e-05, + "loss": 5.5781, + "step": 23271 + }, + { + "epoch": 0.02, + "learning_rate": 4.993377349709028e-05, + "loss": 5.1289, + "step": 23272 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933767783412265e-05, + "loss": 5.3855, + "step": 23273 + }, + { + "epoch": 0.02, + "learning_rate": 4.99337620694881e-05, + "loss": 3.9171, + "step": 23274 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933756355317805e-05, + "loss": 1.0562, + "step": 23275 + }, + { + "epoch": 0.02, + "learning_rate": 4.993375064090137e-05, + "loss": 1.0869, + "step": 23276 + }, + { + "epoch": 0.02, + "learning_rate": 4.99337449262388e-05, + "loss": 1.0745, + "step": 23277 + }, + { + "epoch": 0.02, + "learning_rate": 4.99337392113301e-05, + "loss": 1.3202, + "step": 23278 + }, + { + "epoch": 0.02, + "learning_rate": 4.993373349617526e-05, + "loss": 0.9736, + "step": 23279 + }, + { + "epoch": 0.02, + "learning_rate": 4.993372778077429e-05, + "loss": 0.9415, + "step": 23280 + }, + { + "epoch": 0.02, + "learning_rate": 4.993372206512718e-05, + "loss": 0.9958, + "step": 23281 + }, + { + "epoch": 0.02, + "learning_rate": 4.993371634923393e-05, + "loss": 1.1369, + "step": 23282 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933710633094556e-05, + "loss": 1.2525, + "step": 23283 + }, + { + "epoch": 0.02, + "learning_rate": 4.993370491670904e-05, + "loss": 1.0063, + "step": 23284 + }, + { + "epoch": 0.02, + "learning_rate": 4.993369920007739e-05, + "loss": 1.4447, + "step": 23285 + }, + { + "epoch": 0.02, + "learning_rate": 4.993369348319961e-05, + "loss": 1.1304, + "step": 23286 + }, + { + "epoch": 0.02, + "learning_rate": 4.993368776607569e-05, + "loss": 0.9209, + "step": 23287 + }, + { + "epoch": 0.02, + "learning_rate": 4.993368204870563e-05, + "loss": 0.8852, + "step": 23288 + }, + { + "epoch": 0.02, + "learning_rate": 4.993367633108944e-05, + "loss": 0.7293, + "step": 23289 + }, + { + "epoch": 0.02, + "learning_rate": 4.993367061322712e-05, + "loss": 0.3413, + "step": 23290 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933664895118656e-05, + "loss": 0.277, + "step": 23291 + }, + { + "epoch": 0.02, + "learning_rate": 4.993365917676406e-05, + "loss": 0.3108, + "step": 23292 + }, + { + "epoch": 0.02, + "learning_rate": 4.993365345816334e-05, + "loss": 0.2486, + "step": 23293 + }, + { + "epoch": 0.02, + "learning_rate": 4.993364773931647e-05, + "loss": 0.4615, + "step": 23294 + }, + { + "epoch": 0.02, + "learning_rate": 4.993364202022348e-05, + "loss": 0.3067, + "step": 23295 + }, + { + "epoch": 0.02, + "learning_rate": 4.993363630088435e-05, + "loss": 0.38, + "step": 23296 + }, + { + "epoch": 0.02, + "learning_rate": 4.993363058129908e-05, + "loss": 0.4469, + "step": 23297 + }, + { + "epoch": 0.02, + "learning_rate": 4.993362486146768e-05, + "loss": 0.2989, + "step": 23298 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933619141390146e-05, + "loss": 0.5672, + "step": 23299 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933613421066476e-05, + "loss": 1.0456, + "step": 23300 + }, + { + "epoch": 0.02, + "learning_rate": 4.993360770049668e-05, + "loss": 1.089, + "step": 23301 + }, + { + "epoch": 0.02, + "learning_rate": 4.993360197968074e-05, + "loss": 0.8896, + "step": 23302 + }, + { + "epoch": 0.02, + "learning_rate": 4.993359625861866e-05, + "loss": 0.7015, + "step": 23303 + }, + { + "epoch": 0.02, + "learning_rate": 4.993359053731046e-05, + "loss": 1.0919, + "step": 23304 + }, + { + "epoch": 0.02, + "learning_rate": 4.993358481575612e-05, + "loss": 1.232, + "step": 23305 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933579093955654e-05, + "loss": 1.3141, + "step": 23306 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933573371909045e-05, + "loss": 1.7366, + "step": 23307 + }, + { + "epoch": 0.02, + "learning_rate": 4.993356764961631e-05, + "loss": 1.3943, + "step": 23308 + }, + { + "epoch": 0.02, + "learning_rate": 4.993356192707743e-05, + "loss": 1.0011, + "step": 23309 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933556204292426e-05, + "loss": 1.1579, + "step": 23310 + }, + { + "epoch": 0.02, + "learning_rate": 4.993355048126128e-05, + "loss": 1.3984, + "step": 23311 + }, + { + "epoch": 0.02, + "learning_rate": 4.993354475798401e-05, + "loss": 1.0276, + "step": 23312 + }, + { + "epoch": 0.02, + "learning_rate": 4.99335390344606e-05, + "loss": 0.5449, + "step": 23313 + }, + { + "epoch": 0.02, + "learning_rate": 4.993353331069106e-05, + "loss": 0.6579, + "step": 23314 + }, + { + "epoch": 0.02, + "learning_rate": 4.993352758667539e-05, + "loss": 1.2305, + "step": 23315 + }, + { + "epoch": 0.02, + "learning_rate": 4.993352186241358e-05, + "loss": 1.143, + "step": 23316 + }, + { + "epoch": 0.02, + "learning_rate": 4.993351613790564e-05, + "loss": 1.1576, + "step": 23317 + }, + { + "epoch": 0.02, + "learning_rate": 4.993351041315156e-05, + "loss": 1.1882, + "step": 23318 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933504688151355e-05, + "loss": 0.9701, + "step": 23319 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933498962905015e-05, + "loss": 0.7091, + "step": 23320 + }, + { + "epoch": 0.02, + "learning_rate": 4.993349323741254e-05, + "loss": 0.275, + "step": 23321 + }, + { + "epoch": 0.02, + "learning_rate": 4.993348751167394e-05, + "loss": 0.9396, + "step": 23322 + }, + { + "epoch": 0.02, + "learning_rate": 4.99334817856892e-05, + "loss": 1.3934, + "step": 23323 + }, + { + "epoch": 0.02, + "learning_rate": 4.993347605945833e-05, + "loss": 1.0501, + "step": 23324 + }, + { + "epoch": 0.02, + "learning_rate": 4.993347033298133e-05, + "loss": 0.4584, + "step": 23325 + }, + { + "epoch": 0.02, + "learning_rate": 4.993346460625819e-05, + "loss": 0.7367, + "step": 23326 + }, + { + "epoch": 0.02, + "learning_rate": 4.993345887928892e-05, + "loss": 0.7613, + "step": 23327 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933453152073514e-05, + "loss": 0.8282, + "step": 23328 + }, + { + "epoch": 0.02, + "learning_rate": 4.993344742461198e-05, + "loss": 1.2878, + "step": 23329 + }, + { + "epoch": 0.02, + "learning_rate": 4.993344169690431e-05, + "loss": 1.081, + "step": 23330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933435968950514e-05, + "loss": 0.9425, + "step": 23331 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933430240750576e-05, + "loss": 1.0906, + "step": 23332 + }, + { + "epoch": 0.02, + "learning_rate": 4.993342451230452e-05, + "loss": 1.1445, + "step": 23333 + }, + { + "epoch": 0.02, + "learning_rate": 4.993341878361232e-05, + "loss": 1.0742, + "step": 23334 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933413054673985e-05, + "loss": 1.0938, + "step": 23335 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933407325489525e-05, + "loss": 1.2609, + "step": 23336 + }, + { + "epoch": 0.02, + "learning_rate": 4.993340159605893e-05, + "loss": 0.944, + "step": 23337 + }, + { + "epoch": 0.02, + "learning_rate": 4.993339586638221e-05, + "loss": 1.0673, + "step": 23338 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933390136459344e-05, + "loss": 1.0947, + "step": 23339 + }, + { + "epoch": 0.02, + "learning_rate": 4.993338440629036e-05, + "loss": 1.0315, + "step": 23340 + }, + { + "epoch": 0.02, + "learning_rate": 4.993337867587524e-05, + "loss": 1.1006, + "step": 23341 + }, + { + "epoch": 0.02, + "learning_rate": 4.993337294521399e-05, + "loss": 1.1156, + "step": 23342 + }, + { + "epoch": 0.02, + "learning_rate": 4.99333672143066e-05, + "loss": 1.1108, + "step": 23343 + }, + { + "epoch": 0.02, + "learning_rate": 4.993336148315309e-05, + "loss": 1.1668, + "step": 23344 + }, + { + "epoch": 0.02, + "learning_rate": 4.993335575175344e-05, + "loss": 1.0407, + "step": 23345 + }, + { + "epoch": 0.02, + "learning_rate": 4.993335002010766e-05, + "loss": 1.1181, + "step": 23346 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933344288215746e-05, + "loss": 0.9547, + "step": 23347 + }, + { + "epoch": 0.02, + "learning_rate": 4.99333385560777e-05, + "loss": 1.1953, + "step": 23348 + }, + { + "epoch": 0.02, + "learning_rate": 4.993333282369353e-05, + "loss": 1.2058, + "step": 23349 + }, + { + "epoch": 0.02, + "learning_rate": 4.993332709106322e-05, + "loss": 0.9971, + "step": 23350 + }, + { + "epoch": 0.02, + "learning_rate": 4.993332135818679e-05, + "loss": 0.8413, + "step": 23351 + }, + { + "epoch": 0.02, + "learning_rate": 4.993331562506422e-05, + "loss": 0.6554, + "step": 23352 + }, + { + "epoch": 0.02, + "learning_rate": 4.993330989169552e-05, + "loss": 1.5378, + "step": 23353 + }, + { + "epoch": 0.02, + "learning_rate": 4.993330415808069e-05, + "loss": 1.1317, + "step": 23354 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933298424219734e-05, + "loss": 0.9963, + "step": 23355 + }, + { + "epoch": 0.02, + "learning_rate": 4.993329269011263e-05, + "loss": 0.9579, + "step": 23356 + }, + { + "epoch": 0.02, + "learning_rate": 4.993328695575941e-05, + "loss": 1.16, + "step": 23357 + }, + { + "epoch": 0.02, + "learning_rate": 4.993328122116006e-05, + "loss": 0.8475, + "step": 23358 + }, + { + "epoch": 0.02, + "learning_rate": 4.993327548631457e-05, + "loss": 1.0802, + "step": 23359 + }, + { + "epoch": 0.02, + "learning_rate": 4.993326975122296e-05, + "loss": 1.1835, + "step": 23360 + }, + { + "epoch": 0.02, + "learning_rate": 4.993326401588521e-05, + "loss": 1.1793, + "step": 23361 + }, + { + "epoch": 0.02, + "learning_rate": 4.993325828030133e-05, + "loss": 1.0708, + "step": 23362 + }, + { + "epoch": 0.02, + "learning_rate": 4.993325254447133e-05, + "loss": 0.9444, + "step": 23363 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933246808395184e-05, + "loss": 1.0821, + "step": 23364 + }, + { + "epoch": 0.02, + "learning_rate": 4.993324107207292e-05, + "loss": 1.0358, + "step": 23365 + }, + { + "epoch": 0.02, + "learning_rate": 4.993323533550452e-05, + "loss": 0.9806, + "step": 23366 + }, + { + "epoch": 0.02, + "learning_rate": 4.993322959868999e-05, + "loss": 0.4699, + "step": 23367 + }, + { + "epoch": 0.02, + "learning_rate": 4.993322386162933e-05, + "loss": 1.1496, + "step": 23368 + }, + { + "epoch": 0.02, + "learning_rate": 4.993321812432254e-05, + "loss": 1.1119, + "step": 23369 + }, + { + "epoch": 0.02, + "learning_rate": 4.993321238676962e-05, + "loss": 0.9411, + "step": 23370 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933206648970564e-05, + "loss": 1.6814, + "step": 23371 + }, + { + "epoch": 0.02, + "learning_rate": 4.993320091092538e-05, + "loss": 1.181, + "step": 23372 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933195172634074e-05, + "loss": 0.913, + "step": 23373 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933189434096635e-05, + "loss": 1.1855, + "step": 23374 + }, + { + "epoch": 0.02, + "learning_rate": 4.993318369531306e-05, + "loss": 1.2098, + "step": 23375 + }, + { + "epoch": 0.02, + "learning_rate": 4.993317795628336e-05, + "loss": 1.2998, + "step": 23376 + }, + { + "epoch": 0.02, + "learning_rate": 4.993317221700753e-05, + "loss": 1.1796, + "step": 23377 + }, + { + "epoch": 0.02, + "learning_rate": 4.993316647748557e-05, + "loss": 0.6056, + "step": 23378 + }, + { + "epoch": 0.02, + "learning_rate": 4.993316073771748e-05, + "loss": 0.6432, + "step": 23379 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933154997703255e-05, + "loss": 0.6672, + "step": 23380 + }, + { + "epoch": 0.02, + "learning_rate": 4.993314925744291e-05, + "loss": 0.7372, + "step": 23381 + }, + { + "epoch": 0.02, + "learning_rate": 4.993314351693643e-05, + "loss": 0.503, + "step": 23382 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933137776183824e-05, + "loss": 0.3402, + "step": 23383 + }, + { + "epoch": 0.02, + "learning_rate": 4.993313203518509e-05, + "loss": 0.9668, + "step": 23384 + }, + { + "epoch": 0.02, + "learning_rate": 4.993312629394022e-05, + "loss": 1.2158, + "step": 23385 + }, + { + "epoch": 0.02, + "learning_rate": 4.993312055244922e-05, + "loss": 1.0672, + "step": 23386 + }, + { + "epoch": 0.02, + "learning_rate": 4.99331148107121e-05, + "loss": 1.3195, + "step": 23387 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933109068728846e-05, + "loss": 0.958, + "step": 23388 + }, + { + "epoch": 0.02, + "learning_rate": 4.993310332649946e-05, + "loss": 0.96, + "step": 23389 + }, + { + "epoch": 0.02, + "learning_rate": 4.993309758402395e-05, + "loss": 0.9401, + "step": 23390 + }, + { + "epoch": 0.02, + "learning_rate": 4.99330918413023e-05, + "loss": 0.949, + "step": 23391 + }, + { + "epoch": 0.02, + "learning_rate": 4.993308609833453e-05, + "loss": 1.1524, + "step": 23392 + }, + { + "epoch": 0.02, + "learning_rate": 4.993308035512063e-05, + "loss": 1.1711, + "step": 23393 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933074611660604e-05, + "loss": 1.0838, + "step": 23394 + }, + { + "epoch": 0.02, + "learning_rate": 4.993306886795445e-05, + "loss": 1.1809, + "step": 23395 + }, + { + "epoch": 0.02, + "learning_rate": 4.993306312400216e-05, + "loss": 1.1751, + "step": 23396 + }, + { + "epoch": 0.02, + "learning_rate": 4.993305737980374e-05, + "loss": 1.0431, + "step": 23397 + }, + { + "epoch": 0.02, + "learning_rate": 4.9933051635359194e-05, + "loss": 0.9809, + "step": 23398 + }, + { + "epoch": 0.02, + "learning_rate": 4.993304589066853e-05, + "loss": 1.1022, + "step": 23399 + }, + { + "epoch": 0.02, + "learning_rate": 4.993304014573172e-05, + "loss": 1.1239, + "step": 23400 + }, + { + "epoch": 0.02, + "learning_rate": 4.993303440054879e-05, + "loss": 1.0594, + "step": 23401 + }, + { + "epoch": 0.02, + "learning_rate": 4.993302865511974e-05, + "loss": 1.2254, + "step": 23402 + }, + { + "epoch": 0.02, + "learning_rate": 4.993302290944455e-05, + "loss": 1.397, + "step": 23403 + }, + { + "epoch": 0.02, + "learning_rate": 4.993301716352323e-05, + "loss": 0.9546, + "step": 23404 + }, + { + "epoch": 0.02, + "learning_rate": 4.993301141735579e-05, + "loss": 0.8791, + "step": 23405 + }, + { + "epoch": 0.02, + "learning_rate": 4.993300567094222e-05, + "loss": 0.9978, + "step": 23406 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932999924282516e-05, + "loss": 0.9931, + "step": 23407 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932994177376697e-05, + "loss": 1.2079, + "step": 23408 + }, + { + "epoch": 0.02, + "learning_rate": 4.993298843022474e-05, + "loss": 1.3116, + "step": 23409 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932982682826654e-05, + "loss": 1.3916, + "step": 23410 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932976935182445e-05, + "loss": 1.2177, + "step": 23411 + }, + { + "epoch": 0.02, + "learning_rate": 4.993297118729211e-05, + "loss": 0.9864, + "step": 23412 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932965439155645e-05, + "loss": 0.9315, + "step": 23413 + }, + { + "epoch": 0.02, + "learning_rate": 4.993295969077305e-05, + "loss": 0.6931, + "step": 23414 + }, + { + "epoch": 0.02, + "learning_rate": 4.993295394214432e-05, + "loss": 0.7407, + "step": 23415 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932948193269475e-05, + "loss": 1.1855, + "step": 23416 + }, + { + "epoch": 0.02, + "learning_rate": 4.99329424441485e-05, + "loss": 1.2927, + "step": 23417 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932936694781393e-05, + "loss": 1.2798, + "step": 23418 + }, + { + "epoch": 0.02, + "learning_rate": 4.993293094516816e-05, + "loss": 1.0461, + "step": 23419 + }, + { + "epoch": 0.02, + "learning_rate": 4.99329251953088e-05, + "loss": 0.9474, + "step": 23420 + }, + { + "epoch": 0.02, + "learning_rate": 4.993291944520331e-05, + "loss": 2.0329, + "step": 23421 + }, + { + "epoch": 0.02, + "learning_rate": 4.99329136948517e-05, + "loss": 2.5434, + "step": 23422 + }, + { + "epoch": 0.02, + "learning_rate": 4.993290794425396e-05, + "loss": 0.9622, + "step": 23423 + }, + { + "epoch": 0.02, + "learning_rate": 4.993290219341009e-05, + "loss": 1.571, + "step": 23424 + }, + { + "epoch": 0.02, + "learning_rate": 4.99328964423201e-05, + "loss": 0.7814, + "step": 23425 + }, + { + "epoch": 0.02, + "learning_rate": 4.993289069098397e-05, + "loss": 1.139, + "step": 23426 + }, + { + "epoch": 0.02, + "learning_rate": 4.993288493940173e-05, + "loss": 1.6188, + "step": 23427 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932879187573346e-05, + "loss": 1.4267, + "step": 23428 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932873435498836e-05, + "loss": 1.28, + "step": 23429 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932867683178205e-05, + "loss": 2.7587, + "step": 23430 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932861930611454e-05, + "loss": 1.1619, + "step": 23431 + }, + { + "epoch": 0.02, + "learning_rate": 4.993285617779857e-05, + "loss": 0.9199, + "step": 23432 + }, + { + "epoch": 0.02, + "learning_rate": 4.993285042473956e-05, + "loss": 0.5233, + "step": 23433 + }, + { + "epoch": 0.02, + "learning_rate": 4.993284467143442e-05, + "loss": 0.7261, + "step": 23434 + }, + { + "epoch": 0.02, + "learning_rate": 4.993283891788316e-05, + "loss": 0.5727, + "step": 23435 + }, + { + "epoch": 0.02, + "learning_rate": 4.993283316408577e-05, + "loss": 0.8431, + "step": 23436 + }, + { + "epoch": 0.02, + "learning_rate": 4.993282741004226e-05, + "loss": 1.145, + "step": 23437 + }, + { + "epoch": 0.02, + "learning_rate": 4.993282165575261e-05, + "loss": 0.7237, + "step": 23438 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932815901216844e-05, + "loss": 1.056, + "step": 23439 + }, + { + "epoch": 0.02, + "learning_rate": 4.993281014643495e-05, + "loss": 1.0847, + "step": 23440 + }, + { + "epoch": 0.02, + "learning_rate": 4.993280439140693e-05, + "loss": 1.0575, + "step": 23441 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932798636132774e-05, + "loss": 1.1646, + "step": 23442 + }, + { + "epoch": 0.02, + "learning_rate": 4.99327928806125e-05, + "loss": 0.9696, + "step": 23443 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932787124846106e-05, + "loss": 1.0667, + "step": 23444 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932781368833584e-05, + "loss": 0.8858, + "step": 23445 + }, + { + "epoch": 0.02, + "learning_rate": 4.993277561257492e-05, + "loss": 0.7612, + "step": 23446 + }, + { + "epoch": 0.02, + "learning_rate": 4.993276985607015e-05, + "loss": 0.4589, + "step": 23447 + }, + { + "epoch": 0.02, + "learning_rate": 4.993276409931925e-05, + "loss": 0.2337, + "step": 23448 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932758342322215e-05, + "loss": 0.7243, + "step": 23449 + }, + { + "epoch": 0.02, + "learning_rate": 4.993275258507906e-05, + "loss": 1.1749, + "step": 23450 + }, + { + "epoch": 0.02, + "learning_rate": 4.993274682758978e-05, + "loss": 1.2144, + "step": 23451 + }, + { + "epoch": 0.02, + "learning_rate": 4.993274106985437e-05, + "loss": 1.5626, + "step": 23452 + }, + { + "epoch": 0.02, + "learning_rate": 4.993273531187285e-05, + "loss": 0.9917, + "step": 23453 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932729553645186e-05, + "loss": 0.8859, + "step": 23454 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932723795171404e-05, + "loss": 1.059, + "step": 23455 + }, + { + "epoch": 0.02, + "learning_rate": 4.99327180364515e-05, + "loss": 1.2748, + "step": 23456 + }, + { + "epoch": 0.02, + "learning_rate": 4.993271227748546e-05, + "loss": 1.0928, + "step": 23457 + }, + { + "epoch": 0.02, + "learning_rate": 4.993270651827331e-05, + "loss": 2.7888, + "step": 23458 + }, + { + "epoch": 0.02, + "learning_rate": 4.993270075881502e-05, + "loss": 2.442, + "step": 23459 + }, + { + "epoch": 0.02, + "learning_rate": 4.993269499911061e-05, + "loss": 1.7864, + "step": 23460 + }, + { + "epoch": 0.02, + "learning_rate": 4.993268923916008e-05, + "loss": 1.0732, + "step": 23461 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932683478963424e-05, + "loss": 0.828, + "step": 23462 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932677718520635e-05, + "loss": 1.0951, + "step": 23463 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932671957831726e-05, + "loss": 1.2802, + "step": 23464 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932666196896696e-05, + "loss": 1.3998, + "step": 23465 + }, + { + "epoch": 0.02, + "learning_rate": 4.993266043571554e-05, + "loss": 1.2392, + "step": 23466 + }, + { + "epoch": 0.02, + "learning_rate": 4.993265467428825e-05, + "loss": 1.1793, + "step": 23467 + }, + { + "epoch": 0.02, + "learning_rate": 4.993264891261485e-05, + "loss": 1.2057, + "step": 23468 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932643150695314e-05, + "loss": 0.7938, + "step": 23469 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932637388529654e-05, + "loss": 1.2047, + "step": 23470 + }, + { + "epoch": 0.02, + "learning_rate": 4.993263162611788e-05, + "loss": 0.9026, + "step": 23471 + }, + { + "epoch": 0.02, + "learning_rate": 4.993262586345997e-05, + "loss": 1.1477, + "step": 23472 + }, + { + "epoch": 0.02, + "learning_rate": 4.993262010055594e-05, + "loss": 1.1637, + "step": 23473 + }, + { + "epoch": 0.02, + "learning_rate": 4.993261433740579e-05, + "loss": 0.6325, + "step": 23474 + }, + { + "epoch": 0.02, + "learning_rate": 4.993260857400951e-05, + "loss": 1.0565, + "step": 23475 + }, + { + "epoch": 0.02, + "learning_rate": 4.993260281036711e-05, + "loss": 0.9072, + "step": 23476 + }, + { + "epoch": 0.02, + "learning_rate": 4.993259704647858e-05, + "loss": 0.6164, + "step": 23477 + }, + { + "epoch": 0.02, + "learning_rate": 4.993259128234393e-05, + "loss": 1.0499, + "step": 23478 + }, + { + "epoch": 0.02, + "learning_rate": 4.993258551796316e-05, + "loss": 2.9011, + "step": 23479 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932579753336256e-05, + "loss": 0.9936, + "step": 23480 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932573988463234e-05, + "loss": 0.9732, + "step": 23481 + }, + { + "epoch": 0.02, + "learning_rate": 4.993256822334409e-05, + "loss": 1.0005, + "step": 23482 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932562457978815e-05, + "loss": 0.9007, + "step": 23483 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932556692367425e-05, + "loss": 1.4692, + "step": 23484 + }, + { + "epoch": 0.02, + "learning_rate": 4.99325509265099e-05, + "loss": 1.117, + "step": 23485 + }, + { + "epoch": 0.02, + "learning_rate": 4.993254516040626e-05, + "loss": 1.0016, + "step": 23486 + }, + { + "epoch": 0.02, + "learning_rate": 4.99325393940565e-05, + "loss": 0.8793, + "step": 23487 + }, + { + "epoch": 0.02, + "learning_rate": 4.993253362746061e-05, + "loss": 0.934, + "step": 23488 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932527860618594e-05, + "loss": 0.9091, + "step": 23489 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932522093530466e-05, + "loss": 1.1824, + "step": 23490 + }, + { + "epoch": 0.02, + "learning_rate": 4.99325163261962e-05, + "loss": 1.2191, + "step": 23491 + }, + { + "epoch": 0.02, + "learning_rate": 4.993251055861582e-05, + "loss": 1.0254, + "step": 23492 + }, + { + "epoch": 0.02, + "learning_rate": 4.993250479078932e-05, + "loss": 0.9703, + "step": 23493 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932499022716686e-05, + "loss": 1.2049, + "step": 23494 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932493254397934e-05, + "loss": 0.8612, + "step": 23495 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932487485833055e-05, + "loss": 0.8506, + "step": 23496 + }, + { + "epoch": 0.02, + "learning_rate": 4.993248171702206e-05, + "loss": 1.1419, + "step": 23497 + }, + { + "epoch": 0.02, + "learning_rate": 4.993247594796494e-05, + "loss": 1.047, + "step": 23498 + }, + { + "epoch": 0.02, + "learning_rate": 4.993247017866169e-05, + "loss": 1.1208, + "step": 23499 + }, + { + "epoch": 0.02, + "learning_rate": 4.993246440911233e-05, + "loss": 1.4139, + "step": 23500 + }, + { + "epoch": 0.02, + "eval_loss": 1.019091248512268, + "eval_runtime": 83.505, + "eval_samples_per_second": 16.586, + "eval_steps_per_second": 4.155, + "step": 23500 + }, + { + "epoch": 0.02, + "learning_rate": 4.993245863931684e-05, + "loss": 1.7553, + "step": 23501 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932452869275225e-05, + "loss": 1.545, + "step": 23502 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932447098987494e-05, + "loss": 1.0281, + "step": 23503 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932441328453636e-05, + "loss": 0.9925, + "step": 23504 + }, + { + "epoch": 0.02, + "learning_rate": 4.993243555767366e-05, + "loss": 1.2199, + "step": 23505 + }, + { + "epoch": 0.02, + "learning_rate": 4.993242978664756e-05, + "loss": 1.6498, + "step": 23506 + }, + { + "epoch": 0.02, + "learning_rate": 4.993242401537534e-05, + "loss": 1.4192, + "step": 23507 + }, + { + "epoch": 0.02, + "learning_rate": 4.993241824385698e-05, + "loss": 0.9705, + "step": 23508 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932412472092515e-05, + "loss": 1.2505, + "step": 23509 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932406700081926e-05, + "loss": 1.1719, + "step": 23510 + }, + { + "epoch": 0.02, + "learning_rate": 4.993240092782521e-05, + "loss": 0.8925, + "step": 23511 + }, + { + "epoch": 0.02, + "learning_rate": 4.993239515532237e-05, + "loss": 1.2526, + "step": 23512 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932389382573415e-05, + "loss": 1.1179, + "step": 23513 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932383609578336e-05, + "loss": 0.7899, + "step": 23514 + }, + { + "epoch": 0.02, + "learning_rate": 4.993237783633713e-05, + "loss": 0.8602, + "step": 23515 + }, + { + "epoch": 0.02, + "learning_rate": 4.993237206284981e-05, + "loss": 1.1267, + "step": 23516 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932366289116364e-05, + "loss": 1.4289, + "step": 23517 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932360515136796e-05, + "loss": 1.0198, + "step": 23518 + }, + { + "epoch": 0.02, + "learning_rate": 4.99323547409111e-05, + "loss": 1.3792, + "step": 23519 + }, + { + "epoch": 0.02, + "learning_rate": 4.993234896643929e-05, + "loss": 1.3194, + "step": 23520 + }, + { + "epoch": 0.02, + "learning_rate": 4.993234319172136e-05, + "loss": 1.0939, + "step": 23521 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932337416757304e-05, + "loss": 1.1187, + "step": 23522 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932331641547126e-05, + "loss": 0.8891, + "step": 23523 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932325866090834e-05, + "loss": 0.9343, + "step": 23524 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932320090388415e-05, + "loss": 0.9085, + "step": 23525 + }, + { + "epoch": 0.02, + "learning_rate": 4.993231431443987e-05, + "loss": 0.9539, + "step": 23526 + }, + { + "epoch": 0.02, + "learning_rate": 4.993230853824521e-05, + "loss": 1.18, + "step": 23527 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932302761804426e-05, + "loss": 1.2567, + "step": 23528 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932296985117524e-05, + "loss": 1.0974, + "step": 23529 + }, + { + "epoch": 0.02, + "learning_rate": 4.99322912081845e-05, + "loss": 1.1708, + "step": 23530 + }, + { + "epoch": 0.02, + "learning_rate": 4.993228543100535e-05, + "loss": 1.4342, + "step": 23531 + }, + { + "epoch": 0.02, + "learning_rate": 4.993227965358008e-05, + "loss": 1.2641, + "step": 23532 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932273875908696e-05, + "loss": 1.1019, + "step": 23533 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932268097991184e-05, + "loss": 1.2149, + "step": 23534 + }, + { + "epoch": 0.02, + "learning_rate": 4.993226231982756e-05, + "loss": 0.5881, + "step": 23535 + }, + { + "epoch": 0.02, + "learning_rate": 4.993225654141781e-05, + "loss": 0.4593, + "step": 23536 + }, + { + "epoch": 0.02, + "learning_rate": 4.993225076276194e-05, + "loss": 0.7895, + "step": 23537 + }, + { + "epoch": 0.02, + "learning_rate": 4.993224498385994e-05, + "loss": 1.0561, + "step": 23538 + }, + { + "epoch": 0.02, + "learning_rate": 4.993223920471183e-05, + "loss": 1.1219, + "step": 23539 + }, + { + "epoch": 0.02, + "learning_rate": 4.99322334253176e-05, + "loss": 1.3474, + "step": 23540 + }, + { + "epoch": 0.02, + "learning_rate": 4.993222764567724e-05, + "loss": 0.7555, + "step": 23541 + }, + { + "epoch": 0.02, + "learning_rate": 4.993222186579077e-05, + "loss": 1.2043, + "step": 23542 + }, + { + "epoch": 0.02, + "learning_rate": 4.993221608565817e-05, + "loss": 1.1644, + "step": 23543 + }, + { + "epoch": 0.02, + "learning_rate": 4.993221030527946e-05, + "loss": 0.4963, + "step": 23544 + }, + { + "epoch": 0.02, + "learning_rate": 4.993220452465463e-05, + "loss": 1.0281, + "step": 23545 + }, + { + "epoch": 0.02, + "learning_rate": 4.993219874378367e-05, + "loss": 0.9727, + "step": 23546 + }, + { + "epoch": 0.02, + "learning_rate": 4.993219296266659e-05, + "loss": 1.0703, + "step": 23547 + }, + { + "epoch": 0.02, + "learning_rate": 4.99321871813034e-05, + "loss": 0.9731, + "step": 23548 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932181399694076e-05, + "loss": 1.2766, + "step": 23549 + }, + { + "epoch": 0.02, + "learning_rate": 4.993217561783865e-05, + "loss": 1.061, + "step": 23550 + }, + { + "epoch": 0.02, + "learning_rate": 4.993216983573708e-05, + "loss": 0.9679, + "step": 23551 + }, + { + "epoch": 0.02, + "learning_rate": 4.993216405338941e-05, + "loss": 0.9538, + "step": 23552 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932158270795615e-05, + "loss": 1.088, + "step": 23553 + }, + { + "epoch": 0.02, + "learning_rate": 4.99321524879557e-05, + "loss": 1.2186, + "step": 23554 + }, + { + "epoch": 0.02, + "learning_rate": 4.993214670486966e-05, + "loss": 1.0643, + "step": 23555 + }, + { + "epoch": 0.02, + "learning_rate": 4.993214092153752e-05, + "loss": 1.2714, + "step": 23556 + }, + { + "epoch": 0.02, + "learning_rate": 4.993213513795924e-05, + "loss": 0.9717, + "step": 23557 + }, + { + "epoch": 0.02, + "learning_rate": 4.993212935413485e-05, + "loss": 0.8208, + "step": 23558 + }, + { + "epoch": 0.02, + "learning_rate": 4.993212357006433e-05, + "loss": 0.3989, + "step": 23559 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932117785747704e-05, + "loss": 0.3641, + "step": 23560 + }, + { + "epoch": 0.02, + "learning_rate": 4.993211200118495e-05, + "loss": 1.021, + "step": 23561 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932106216376076e-05, + "loss": 1.3988, + "step": 23562 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932100431321085e-05, + "loss": 1.0308, + "step": 23563 + }, + { + "epoch": 0.02, + "learning_rate": 4.993209464601998e-05, + "loss": 1.3788, + "step": 23564 + }, + { + "epoch": 0.02, + "learning_rate": 4.993208886047275e-05, + "loss": 1.5581, + "step": 23565 + }, + { + "epoch": 0.02, + "learning_rate": 4.99320830746794e-05, + "loss": 1.0108, + "step": 23566 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932077288639934e-05, + "loss": 0.476, + "step": 23567 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932071502354346e-05, + "loss": 1.0627, + "step": 23568 + }, + { + "epoch": 0.02, + "learning_rate": 4.993206571582264e-05, + "loss": 0.9829, + "step": 23569 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932059929044816e-05, + "loss": 0.9804, + "step": 23570 + }, + { + "epoch": 0.02, + "learning_rate": 4.993205414202088e-05, + "loss": 1.2072, + "step": 23571 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932048354750816e-05, + "loss": 1.2994, + "step": 23572 + }, + { + "epoch": 0.02, + "learning_rate": 4.993204256723464e-05, + "loss": 1.1765, + "step": 23573 + }, + { + "epoch": 0.02, + "learning_rate": 4.993203677947233e-05, + "loss": 1.0123, + "step": 23574 + }, + { + "epoch": 0.02, + "learning_rate": 4.993203099146392e-05, + "loss": 1.1781, + "step": 23575 + }, + { + "epoch": 0.02, + "learning_rate": 4.993202520320939e-05, + "loss": 1.2677, + "step": 23576 + }, + { + "epoch": 0.02, + "learning_rate": 4.993201941470873e-05, + "loss": 1.1516, + "step": 23577 + }, + { + "epoch": 0.02, + "learning_rate": 4.993201362596196e-05, + "loss": 0.9365, + "step": 23578 + }, + { + "epoch": 0.02, + "learning_rate": 4.993200783696907e-05, + "loss": 1.0756, + "step": 23579 + }, + { + "epoch": 0.02, + "learning_rate": 4.993200204773006e-05, + "loss": 1.3574, + "step": 23580 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931996258244934e-05, + "loss": 0.922, + "step": 23581 + }, + { + "epoch": 0.02, + "learning_rate": 4.993199046851369e-05, + "loss": 1.2906, + "step": 23582 + }, + { + "epoch": 0.02, + "learning_rate": 4.993198467853633e-05, + "loss": 0.7179, + "step": 23583 + }, + { + "epoch": 0.02, + "learning_rate": 4.993197888831284e-05, + "loss": 1.1182, + "step": 23584 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931973097843244e-05, + "loss": 0.34, + "step": 23585 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931967307127525e-05, + "loss": 0.7896, + "step": 23586 + }, + { + "epoch": 0.02, + "learning_rate": 4.993196151616569e-05, + "loss": 1.0032, + "step": 23587 + }, + { + "epoch": 0.02, + "learning_rate": 4.993195572495774e-05, + "loss": 0.5165, + "step": 23588 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931949933503664e-05, + "loss": 1.0506, + "step": 23589 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931944141803476e-05, + "loss": 0.7367, + "step": 23590 + }, + { + "epoch": 0.02, + "learning_rate": 4.993193834985717e-05, + "loss": 1.178, + "step": 23591 + }, + { + "epoch": 0.02, + "learning_rate": 4.993193255766475e-05, + "loss": 1.204, + "step": 23592 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931926765226214e-05, + "loss": 1.1325, + "step": 23593 + }, + { + "epoch": 0.02, + "learning_rate": 4.993192097254154e-05, + "loss": 1.0491, + "step": 23594 + }, + { + "epoch": 0.02, + "learning_rate": 4.993191517961077e-05, + "loss": 1.0496, + "step": 23595 + }, + { + "epoch": 0.02, + "learning_rate": 4.993190938643388e-05, + "loss": 1.3832, + "step": 23596 + }, + { + "epoch": 0.02, + "learning_rate": 4.993190359301087e-05, + "loss": 1.4029, + "step": 23597 + }, + { + "epoch": 0.02, + "learning_rate": 4.993189779934174e-05, + "loss": 0.8646, + "step": 23598 + }, + { + "epoch": 0.02, + "learning_rate": 4.993189200542649e-05, + "loss": 1.147, + "step": 23599 + }, + { + "epoch": 0.02, + "learning_rate": 4.993188621126513e-05, + "loss": 1.1318, + "step": 23600 + }, + { + "epoch": 0.02, + "learning_rate": 4.993188041685765e-05, + "loss": 1.6338, + "step": 23601 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931874622204055e-05, + "loss": 1.1445, + "step": 23602 + }, + { + "epoch": 0.02, + "learning_rate": 4.993186882730434e-05, + "loss": 0.9323, + "step": 23603 + }, + { + "epoch": 0.02, + "learning_rate": 4.993186303215851e-05, + "loss": 0.7262, + "step": 23604 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931857236766564e-05, + "loss": 1.0773, + "step": 23605 + }, + { + "epoch": 0.02, + "learning_rate": 4.99318514411285e-05, + "loss": 1.1596, + "step": 23606 + }, + { + "epoch": 0.02, + "learning_rate": 4.993184564524433e-05, + "loss": 1.0628, + "step": 23607 + }, + { + "epoch": 0.02, + "learning_rate": 4.993183984911403e-05, + "loss": 1.1167, + "step": 23608 + }, + { + "epoch": 0.02, + "learning_rate": 4.993183405273761e-05, + "loss": 1.0022, + "step": 23609 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931828256115074e-05, + "loss": 0.9754, + "step": 23610 + }, + { + "epoch": 0.02, + "learning_rate": 4.993182245924643e-05, + "loss": 1.0467, + "step": 23611 + }, + { + "epoch": 0.02, + "learning_rate": 4.993181666213167e-05, + "loss": 1.1302, + "step": 23612 + }, + { + "epoch": 0.02, + "learning_rate": 4.993181086477079e-05, + "loss": 1.6587, + "step": 23613 + }, + { + "epoch": 0.02, + "learning_rate": 4.993180506716379e-05, + "loss": 5.2102, + "step": 23614 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931799269310676e-05, + "loss": 5.2838, + "step": 23615 + }, + { + "epoch": 0.02, + "learning_rate": 4.993179347121145e-05, + "loss": 5.4952, + "step": 23616 + }, + { + "epoch": 0.02, + "learning_rate": 4.993178767286611e-05, + "loss": 5.4624, + "step": 23617 + }, + { + "epoch": 0.02, + "learning_rate": 4.993178187427464e-05, + "loss": 5.7819, + "step": 23618 + }, + { + "epoch": 0.02, + "learning_rate": 4.993177607543706e-05, + "loss": 5.8194, + "step": 23619 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931770276353376e-05, + "loss": 5.9286, + "step": 23620 + }, + { + "epoch": 0.02, + "learning_rate": 4.993176447702357e-05, + "loss": 2.4721, + "step": 23621 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931758677447634e-05, + "loss": 1.0889, + "step": 23622 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931752877625606e-05, + "loss": 1.1775, + "step": 23623 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931747077557444e-05, + "loss": 1.0515, + "step": 23624 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931741277243174e-05, + "loss": 1.1736, + "step": 23625 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931735476682784e-05, + "loss": 1.2036, + "step": 23626 + }, + { + "epoch": 0.02, + "learning_rate": 4.993172967587628e-05, + "loss": 1.0255, + "step": 23627 + }, + { + "epoch": 0.02, + "learning_rate": 4.993172387482366e-05, + "loss": 1.1074, + "step": 23628 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931718073524924e-05, + "loss": 0.8979, + "step": 23629 + }, + { + "epoch": 0.02, + "learning_rate": 4.993171227198008e-05, + "loss": 1.0418, + "step": 23630 + }, + { + "epoch": 0.02, + "learning_rate": 4.993170647018911e-05, + "loss": 0.8295, + "step": 23631 + }, + { + "epoch": 0.02, + "learning_rate": 4.993170066815203e-05, + "loss": 0.5075, + "step": 23632 + }, + { + "epoch": 0.02, + "learning_rate": 4.993169486586884e-05, + "loss": 0.6138, + "step": 23633 + }, + { + "epoch": 0.02, + "learning_rate": 4.993168906333953e-05, + "loss": 0.4799, + "step": 23634 + }, + { + "epoch": 0.02, + "learning_rate": 4.99316832605641e-05, + "loss": 0.5506, + "step": 23635 + }, + { + "epoch": 0.02, + "learning_rate": 4.993167745754256e-05, + "loss": 1.0181, + "step": 23636 + }, + { + "epoch": 0.02, + "learning_rate": 4.99316716542749e-05, + "loss": 1.0466, + "step": 23637 + }, + { + "epoch": 0.02, + "learning_rate": 4.993166585076113e-05, + "loss": 0.9689, + "step": 23638 + }, + { + "epoch": 0.02, + "learning_rate": 4.993166004700125e-05, + "loss": 1.102, + "step": 23639 + }, + { + "epoch": 0.02, + "learning_rate": 4.993165424299524e-05, + "loss": 0.3556, + "step": 23640 + }, + { + "epoch": 0.02, + "learning_rate": 4.993164843874313e-05, + "loss": 0.2803, + "step": 23641 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931642634244904e-05, + "loss": 0.2194, + "step": 23642 + }, + { + "epoch": 0.02, + "learning_rate": 4.993163682950055e-05, + "loss": 0.6376, + "step": 23643 + }, + { + "epoch": 0.02, + "learning_rate": 4.99316310245101e-05, + "loss": 1.0771, + "step": 23644 + }, + { + "epoch": 0.02, + "learning_rate": 4.993162521927352e-05, + "loss": 1.171, + "step": 23645 + }, + { + "epoch": 0.02, + "learning_rate": 4.993161941379083e-05, + "loss": 1.0057, + "step": 23646 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931613608062026e-05, + "loss": 0.9148, + "step": 23647 + }, + { + "epoch": 0.02, + "learning_rate": 4.993160780208711e-05, + "loss": 1.1109, + "step": 23648 + }, + { + "epoch": 0.02, + "learning_rate": 4.993160199586608e-05, + "loss": 1.1471, + "step": 23649 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931596189398936e-05, + "loss": 1.0191, + "step": 23650 + }, + { + "epoch": 0.02, + "learning_rate": 4.993159038268567e-05, + "loss": 0.7647, + "step": 23651 + }, + { + "epoch": 0.02, + "learning_rate": 4.99315845757263e-05, + "loss": 1.1461, + "step": 23652 + }, + { + "epoch": 0.02, + "learning_rate": 4.993157876852081e-05, + "loss": 1.3627, + "step": 23653 + }, + { + "epoch": 0.02, + "learning_rate": 4.993157296106921e-05, + "loss": 1.086, + "step": 23654 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931567153371495e-05, + "loss": 1.1152, + "step": 23655 + }, + { + "epoch": 0.02, + "learning_rate": 4.993156134542766e-05, + "loss": 1.2105, + "step": 23656 + }, + { + "epoch": 0.02, + "learning_rate": 4.993155553723772e-05, + "loss": 0.5528, + "step": 23657 + }, + { + "epoch": 0.02, + "learning_rate": 4.993154972880166e-05, + "loss": 0.2873, + "step": 23658 + }, + { + "epoch": 0.02, + "learning_rate": 4.993154392011948e-05, + "loss": 0.252, + "step": 23659 + }, + { + "epoch": 0.02, + "learning_rate": 4.99315381111912e-05, + "loss": 0.176, + "step": 23660 + }, + { + "epoch": 0.02, + "learning_rate": 4.99315323020168e-05, + "loss": 0.3991, + "step": 23661 + }, + { + "epoch": 0.02, + "learning_rate": 4.993152649259629e-05, + "loss": 0.6287, + "step": 23662 + }, + { + "epoch": 0.02, + "learning_rate": 4.993152068292966e-05, + "loss": 1.0101, + "step": 23663 + }, + { + "epoch": 0.02, + "learning_rate": 4.993151487301692e-05, + "loss": 0.9747, + "step": 23664 + }, + { + "epoch": 0.02, + "learning_rate": 4.993150906285807e-05, + "loss": 0.2485, + "step": 23665 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931503252453104e-05, + "loss": 0.4099, + "step": 23666 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931497441802024e-05, + "loss": 0.2613, + "step": 23667 + }, + { + "epoch": 0.02, + "learning_rate": 4.993149163090483e-05, + "loss": 0.7652, + "step": 23668 + }, + { + "epoch": 0.02, + "learning_rate": 4.993148581976152e-05, + "loss": 1.3451, + "step": 23669 + }, + { + "epoch": 0.02, + "learning_rate": 4.99314800083721e-05, + "loss": 1.252, + "step": 23670 + }, + { + "epoch": 0.02, + "learning_rate": 4.993147419673657e-05, + "loss": 1.0193, + "step": 23671 + }, + { + "epoch": 0.02, + "learning_rate": 4.993146838485493e-05, + "loss": 0.1945, + "step": 23672 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931462572727166e-05, + "loss": 0.5495, + "step": 23673 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931456760353295e-05, + "loss": 0.8914, + "step": 23674 + }, + { + "epoch": 0.02, + "learning_rate": 4.993145094773332e-05, + "loss": 0.9787, + "step": 23675 + }, + { + "epoch": 0.02, + "learning_rate": 4.993144513486722e-05, + "loss": 1.1274, + "step": 23676 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931439321755e-05, + "loss": 0.8015, + "step": 23677 + }, + { + "epoch": 0.02, + "learning_rate": 4.993143350839669e-05, + "loss": 0.8738, + "step": 23678 + }, + { + "epoch": 0.02, + "learning_rate": 4.993142769479225e-05, + "loss": 0.8787, + "step": 23679 + }, + { + "epoch": 0.02, + "learning_rate": 4.99314218809417e-05, + "loss": 1.3542, + "step": 23680 + }, + { + "epoch": 0.02, + "learning_rate": 4.993141606684504e-05, + "loss": 1.1512, + "step": 23681 + }, + { + "epoch": 0.02, + "learning_rate": 4.993141025250227e-05, + "loss": 0.9856, + "step": 23682 + }, + { + "epoch": 0.02, + "learning_rate": 4.993140443791339e-05, + "loss": 1.2006, + "step": 23683 + }, + { + "epoch": 0.02, + "learning_rate": 4.993139862307839e-05, + "loss": 1.1093, + "step": 23684 + }, + { + "epoch": 0.02, + "learning_rate": 4.993139280799728e-05, + "loss": 1.2161, + "step": 23685 + }, + { + "epoch": 0.02, + "learning_rate": 4.993138699267006e-05, + "loss": 0.8431, + "step": 23686 + }, + { + "epoch": 0.02, + "learning_rate": 4.993138117709672e-05, + "loss": 1.0788, + "step": 23687 + }, + { + "epoch": 0.02, + "learning_rate": 4.993137536127728e-05, + "loss": 1.1481, + "step": 23688 + }, + { + "epoch": 0.02, + "learning_rate": 4.993136954521171e-05, + "loss": 1.0185, + "step": 23689 + }, + { + "epoch": 0.02, + "learning_rate": 4.993136372890005e-05, + "loss": 0.9628, + "step": 23690 + }, + { + "epoch": 0.02, + "learning_rate": 4.993135791234227e-05, + "loss": 1.2382, + "step": 23691 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931352095538374e-05, + "loss": 1.0777, + "step": 23692 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931346278488367e-05, + "loss": 0.9592, + "step": 23693 + }, + { + "epoch": 0.02, + "learning_rate": 4.993134046119225e-05, + "loss": 1.1439, + "step": 23694 + }, + { + "epoch": 0.02, + "learning_rate": 4.993133464365002e-05, + "loss": 1.1385, + "step": 23695 + }, + { + "epoch": 0.02, + "learning_rate": 4.993132882586168e-05, + "loss": 1.0657, + "step": 23696 + }, + { + "epoch": 0.02, + "learning_rate": 4.993132300782723e-05, + "loss": 1.2884, + "step": 23697 + }, + { + "epoch": 0.02, + "learning_rate": 4.993131718954667e-05, + "loss": 0.8459, + "step": 23698 + }, + { + "epoch": 0.02, + "learning_rate": 4.993131137102e-05, + "loss": 1.1862, + "step": 23699 + }, + { + "epoch": 0.02, + "learning_rate": 4.993130555224721e-05, + "loss": 1.1447, + "step": 23700 + }, + { + "epoch": 0.02, + "learning_rate": 4.993129973322831e-05, + "loss": 0.9087, + "step": 23701 + }, + { + "epoch": 0.02, + "learning_rate": 4.99312939139633e-05, + "loss": 0.8303, + "step": 23702 + }, + { + "epoch": 0.02, + "learning_rate": 4.993128809445218e-05, + "loss": 1.0653, + "step": 23703 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931282274694945e-05, + "loss": 1.1346, + "step": 23704 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931276454691604e-05, + "loss": 0.9446, + "step": 23705 + }, + { + "epoch": 0.02, + "learning_rate": 4.993127063444215e-05, + "loss": 1.0693, + "step": 23706 + }, + { + "epoch": 0.02, + "learning_rate": 4.993126481394659e-05, + "loss": 1.405, + "step": 23707 + }, + { + "epoch": 0.02, + "learning_rate": 4.993125899320491e-05, + "loss": 1.082, + "step": 23708 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931253172217125e-05, + "loss": 1.0827, + "step": 23709 + }, + { + "epoch": 0.02, + "learning_rate": 4.993124735098322e-05, + "loss": 1.0239, + "step": 23710 + }, + { + "epoch": 0.02, + "learning_rate": 4.993124152950322e-05, + "loss": 1.069, + "step": 23711 + }, + { + "epoch": 0.02, + "learning_rate": 4.99312357077771e-05, + "loss": 1.0926, + "step": 23712 + }, + { + "epoch": 0.02, + "learning_rate": 4.993122988580487e-05, + "loss": 1.0431, + "step": 23713 + }, + { + "epoch": 0.02, + "learning_rate": 4.993122406358653e-05, + "loss": 1.1593, + "step": 23714 + }, + { + "epoch": 0.02, + "learning_rate": 4.993121824112208e-05, + "loss": 0.9782, + "step": 23715 + }, + { + "epoch": 0.02, + "learning_rate": 4.993121241841152e-05, + "loss": 0.9652, + "step": 23716 + }, + { + "epoch": 0.02, + "learning_rate": 4.993120659545485e-05, + "loss": 1.4466, + "step": 23717 + }, + { + "epoch": 0.02, + "learning_rate": 4.993120077225207e-05, + "loss": 1.2158, + "step": 23718 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931194948803174e-05, + "loss": 0.9522, + "step": 23719 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931189125108166e-05, + "loss": 1.0287, + "step": 23720 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931183301167065e-05, + "loss": 1.0403, + "step": 23721 + }, + { + "epoch": 0.02, + "learning_rate": 4.993117747697984e-05, + "loss": 0.7293, + "step": 23722 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931171652546506e-05, + "loss": 1.1244, + "step": 23723 + }, + { + "epoch": 0.02, + "learning_rate": 4.993116582786706e-05, + "loss": 1.0774, + "step": 23724 + }, + { + "epoch": 0.02, + "learning_rate": 4.993116000294151e-05, + "loss": 1.0303, + "step": 23725 + }, + { + "epoch": 0.02, + "learning_rate": 4.993115417776985e-05, + "loss": 0.8624, + "step": 23726 + }, + { + "epoch": 0.02, + "learning_rate": 4.993114835235208e-05, + "loss": 1.089, + "step": 23727 + }, + { + "epoch": 0.02, + "learning_rate": 4.993114252668819e-05, + "loss": 1.1367, + "step": 23728 + }, + { + "epoch": 0.02, + "learning_rate": 4.99311367007782e-05, + "loss": 1.1349, + "step": 23729 + }, + { + "epoch": 0.02, + "learning_rate": 4.99311308746221e-05, + "loss": 0.9834, + "step": 23730 + }, + { + "epoch": 0.02, + "learning_rate": 4.993112504821989e-05, + "loss": 0.6479, + "step": 23731 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931119221571567e-05, + "loss": 0.8246, + "step": 23732 + }, + { + "epoch": 0.02, + "learning_rate": 4.993111339467714e-05, + "loss": 1.1826, + "step": 23733 + }, + { + "epoch": 0.02, + "learning_rate": 4.99311075675366e-05, + "loss": 1.0771, + "step": 23734 + }, + { + "epoch": 0.02, + "learning_rate": 4.993110174014994e-05, + "loss": 0.7828, + "step": 23735 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931095912517186e-05, + "loss": 0.9775, + "step": 23736 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931090084638324e-05, + "loss": 1.1782, + "step": 23737 + }, + { + "epoch": 0.02, + "learning_rate": 4.993108425651335e-05, + "loss": 1.6867, + "step": 23738 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931078428142256e-05, + "loss": 0.9503, + "step": 23739 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931072599525065e-05, + "loss": 0.9879, + "step": 23740 + }, + { + "epoch": 0.02, + "learning_rate": 4.993106677066176e-05, + "loss": 0.8653, + "step": 23741 + }, + { + "epoch": 0.02, + "learning_rate": 4.993106094155234e-05, + "loss": 0.9791, + "step": 23742 + }, + { + "epoch": 0.02, + "learning_rate": 4.993105511219682e-05, + "loss": 1.0325, + "step": 23743 + }, + { + "epoch": 0.02, + "learning_rate": 4.993104928259519e-05, + "loss": 1.5904, + "step": 23744 + }, + { + "epoch": 0.02, + "learning_rate": 4.993104345274745e-05, + "loss": 1.3606, + "step": 23745 + }, + { + "epoch": 0.02, + "learning_rate": 4.99310376226536e-05, + "loss": 1.1178, + "step": 23746 + }, + { + "epoch": 0.02, + "learning_rate": 4.993103179231364e-05, + "loss": 1.1239, + "step": 23747 + }, + { + "epoch": 0.02, + "learning_rate": 4.993102596172758e-05, + "loss": 1.0504, + "step": 23748 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931020130895405e-05, + "loss": 1.0831, + "step": 23749 + }, + { + "epoch": 0.02, + "learning_rate": 4.993101429981712e-05, + "loss": 1.3501, + "step": 23750 + }, + { + "epoch": 0.02, + "learning_rate": 4.993100846849273e-05, + "loss": 1.1157, + "step": 23751 + }, + { + "epoch": 0.02, + "learning_rate": 4.993100263692223e-05, + "loss": 1.5701, + "step": 23752 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930996805105625e-05, + "loss": 1.4327, + "step": 23753 + }, + { + "epoch": 0.02, + "learning_rate": 4.99309909730429e-05, + "loss": 1.0792, + "step": 23754 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930985140734076e-05, + "loss": 1.0079, + "step": 23755 + }, + { + "epoch": 0.02, + "learning_rate": 4.993097930817915e-05, + "loss": 0.8345, + "step": 23756 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930973475378105e-05, + "loss": 0.9985, + "step": 23757 + }, + { + "epoch": 0.02, + "learning_rate": 4.993096764233096e-05, + "loss": 1.067, + "step": 23758 + }, + { + "epoch": 0.02, + "learning_rate": 4.99309618090377e-05, + "loss": 1.1998, + "step": 23759 + }, + { + "epoch": 0.02, + "learning_rate": 4.993095597549833e-05, + "loss": 1.2613, + "step": 23760 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930950141712864e-05, + "loss": 0.9185, + "step": 23761 + }, + { + "epoch": 0.02, + "learning_rate": 4.993094430768128e-05, + "loss": 0.8382, + "step": 23762 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930938473403594e-05, + "loss": 0.9646, + "step": 23763 + }, + { + "epoch": 0.02, + "learning_rate": 4.99309326388798e-05, + "loss": 0.566, + "step": 23764 + }, + { + "epoch": 0.02, + "learning_rate": 4.993092680410989e-05, + "loss": 1.3314, + "step": 23765 + }, + { + "epoch": 0.02, + "learning_rate": 4.993092096909388e-05, + "loss": 1.3131, + "step": 23766 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930915133831756e-05, + "loss": 0.9487, + "step": 23767 + }, + { + "epoch": 0.02, + "learning_rate": 4.993090929832354e-05, + "loss": 1.0897, + "step": 23768 + }, + { + "epoch": 0.02, + "learning_rate": 4.99309034625692e-05, + "loss": 0.9157, + "step": 23769 + }, + { + "epoch": 0.02, + "learning_rate": 4.993089762656876e-05, + "loss": 1.2563, + "step": 23770 + }, + { + "epoch": 0.02, + "learning_rate": 4.993089179032221e-05, + "loss": 0.9168, + "step": 23771 + }, + { + "epoch": 0.02, + "learning_rate": 4.993088595382955e-05, + "loss": 1.2192, + "step": 23772 + }, + { + "epoch": 0.02, + "learning_rate": 4.993088011709079e-05, + "loss": 0.8755, + "step": 23773 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930874280105924e-05, + "loss": 1.1551, + "step": 23774 + }, + { + "epoch": 0.02, + "learning_rate": 4.993086844287494e-05, + "loss": 1.2147, + "step": 23775 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930862605397855e-05, + "loss": 1.0485, + "step": 23776 + }, + { + "epoch": 0.02, + "learning_rate": 4.993085676767467e-05, + "loss": 1.1032, + "step": 23777 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930850929705365e-05, + "loss": 0.8923, + "step": 23778 + }, + { + "epoch": 0.02, + "learning_rate": 4.993084509148996e-05, + "loss": 1.1027, + "step": 23779 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930839253028446e-05, + "loss": 1.2721, + "step": 23780 + }, + { + "epoch": 0.02, + "learning_rate": 4.993083341432083e-05, + "loss": 1.2301, + "step": 23781 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930827575367106e-05, + "loss": 1.1322, + "step": 23782 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930821736167275e-05, + "loss": 1.0031, + "step": 23783 + }, + { + "epoch": 0.02, + "learning_rate": 4.993081589672134e-05, + "loss": 0.8995, + "step": 23784 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930810057029285e-05, + "loss": 1.4999, + "step": 23785 + }, + { + "epoch": 0.02, + "learning_rate": 4.993080421709114e-05, + "loss": 1.0827, + "step": 23786 + }, + { + "epoch": 0.02, + "learning_rate": 4.993079837690689e-05, + "loss": 1.0451, + "step": 23787 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930792536476514e-05, + "loss": 1.1639, + "step": 23788 + }, + { + "epoch": 0.02, + "learning_rate": 4.993078669580005e-05, + "loss": 1.0526, + "step": 23789 + }, + { + "epoch": 0.02, + "learning_rate": 4.993078085487747e-05, + "loss": 1.1766, + "step": 23790 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930775013708786e-05, + "loss": 1.1207, + "step": 23791 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930769172294e-05, + "loss": 1.5984, + "step": 23792 + }, + { + "epoch": 0.02, + "learning_rate": 4.99307633306331e-05, + "loss": 1.1601, + "step": 23793 + }, + { + "epoch": 0.02, + "learning_rate": 4.99307574887261e-05, + "loss": 0.8567, + "step": 23794 + }, + { + "epoch": 0.02, + "learning_rate": 4.993075164657299e-05, + "loss": 1.1053, + "step": 23795 + }, + { + "epoch": 0.02, + "learning_rate": 4.993074580417378e-05, + "loss": 1.0403, + "step": 23796 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930739961528464e-05, + "loss": 0.9976, + "step": 23797 + }, + { + "epoch": 0.02, + "learning_rate": 4.993073411863703e-05, + "loss": 1.0831, + "step": 23798 + }, + { + "epoch": 0.02, + "learning_rate": 4.993072827549951e-05, + "loss": 1.1232, + "step": 23799 + }, + { + "epoch": 0.02, + "learning_rate": 4.993072243211587e-05, + "loss": 0.9499, + "step": 23800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930716588486125e-05, + "loss": 1.0696, + "step": 23801 + }, + { + "epoch": 0.02, + "learning_rate": 4.993071074461028e-05, + "loss": 1.1445, + "step": 23802 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930704900488326e-05, + "loss": 1.1471, + "step": 23803 + }, + { + "epoch": 0.02, + "learning_rate": 4.993069905612027e-05, + "loss": 1.2165, + "step": 23804 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930693211506106e-05, + "loss": 0.9736, + "step": 23805 + }, + { + "epoch": 0.02, + "learning_rate": 4.993068736664584e-05, + "loss": 1.0538, + "step": 23806 + }, + { + "epoch": 0.02, + "learning_rate": 4.993068152153947e-05, + "loss": 0.9501, + "step": 23807 + }, + { + "epoch": 0.02, + "learning_rate": 4.993067567618699e-05, + "loss": 0.2198, + "step": 23808 + }, + { + "epoch": 0.02, + "learning_rate": 4.993066983058841e-05, + "loss": 0.9532, + "step": 23809 + }, + { + "epoch": 0.02, + "learning_rate": 4.993066398474371e-05, + "loss": 1.0315, + "step": 23810 + }, + { + "epoch": 0.02, + "learning_rate": 4.993065813865292e-05, + "loss": 0.9177, + "step": 23811 + }, + { + "epoch": 0.02, + "learning_rate": 4.993065229231602e-05, + "loss": 0.8108, + "step": 23812 + }, + { + "epoch": 0.02, + "learning_rate": 4.993064644573302e-05, + "loss": 1.2054, + "step": 23813 + }, + { + "epoch": 0.02, + "learning_rate": 4.993064059890391e-05, + "loss": 1.0114, + "step": 23814 + }, + { + "epoch": 0.02, + "learning_rate": 4.99306347518287e-05, + "loss": 1.3438, + "step": 23815 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930628904507374e-05, + "loss": 1.1394, + "step": 23816 + }, + { + "epoch": 0.02, + "learning_rate": 4.993062305693995e-05, + "loss": 1.4066, + "step": 23817 + }, + { + "epoch": 0.02, + "learning_rate": 4.993061720912642e-05, + "loss": 1.3887, + "step": 23818 + }, + { + "epoch": 0.02, + "learning_rate": 4.993061136106679e-05, + "loss": 0.258, + "step": 23819 + }, + { + "epoch": 0.02, + "learning_rate": 4.993060551276105e-05, + "loss": 0.7334, + "step": 23820 + }, + { + "epoch": 0.02, + "learning_rate": 4.993059966420922e-05, + "loss": 1.4757, + "step": 23821 + }, + { + "epoch": 0.02, + "learning_rate": 4.993059381541127e-05, + "loss": 1.0754, + "step": 23822 + }, + { + "epoch": 0.02, + "learning_rate": 4.993058796636722e-05, + "loss": 0.6737, + "step": 23823 + }, + { + "epoch": 0.02, + "learning_rate": 4.993058211707706e-05, + "loss": 0.7908, + "step": 23824 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930576267540805e-05, + "loss": 1.0169, + "step": 23825 + }, + { + "epoch": 0.02, + "learning_rate": 4.993057041775845e-05, + "loss": 1.0715, + "step": 23826 + }, + { + "epoch": 0.02, + "learning_rate": 4.993056456772997e-05, + "loss": 0.8269, + "step": 23827 + }, + { + "epoch": 0.02, + "learning_rate": 4.993055871745541e-05, + "loss": 0.4865, + "step": 23828 + }, + { + "epoch": 0.02, + "learning_rate": 4.993055286693473e-05, + "loss": 0.4388, + "step": 23829 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930547016167956e-05, + "loss": 0.4696, + "step": 23830 + }, + { + "epoch": 0.02, + "learning_rate": 4.993054116515508e-05, + "loss": 0.4639, + "step": 23831 + }, + { + "epoch": 0.02, + "learning_rate": 4.993053531389609e-05, + "loss": 0.3263, + "step": 23832 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930529462390995e-05, + "loss": 0.5169, + "step": 23833 + }, + { + "epoch": 0.02, + "learning_rate": 4.99305236106398e-05, + "loss": 0.4791, + "step": 23834 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930517758642503e-05, + "loss": 1.4377, + "step": 23835 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930511906399104e-05, + "loss": 1.2656, + "step": 23836 + }, + { + "epoch": 0.02, + "learning_rate": 4.99305060539096e-05, + "loss": 1.0563, + "step": 23837 + }, + { + "epoch": 0.02, + "learning_rate": 4.993050020117399e-05, + "loss": 0.92, + "step": 23838 + }, + { + "epoch": 0.02, + "learning_rate": 4.993049434819228e-05, + "loss": 1.0601, + "step": 23839 + }, + { + "epoch": 0.02, + "learning_rate": 4.993048849496447e-05, + "loss": 0.8547, + "step": 23840 + }, + { + "epoch": 0.02, + "learning_rate": 4.993048264149055e-05, + "loss": 1.0293, + "step": 23841 + }, + { + "epoch": 0.02, + "learning_rate": 4.993047678777053e-05, + "loss": 1.0402, + "step": 23842 + }, + { + "epoch": 0.02, + "learning_rate": 4.993047093380441e-05, + "loss": 1.0667, + "step": 23843 + }, + { + "epoch": 0.02, + "learning_rate": 4.993046507959218e-05, + "loss": 1.0693, + "step": 23844 + }, + { + "epoch": 0.02, + "learning_rate": 4.993045922513385e-05, + "loss": 1.0883, + "step": 23845 + }, + { + "epoch": 0.02, + "learning_rate": 4.993045337042942e-05, + "loss": 0.7582, + "step": 23846 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930447515478875e-05, + "loss": 1.0995, + "step": 23847 + }, + { + "epoch": 0.02, + "learning_rate": 4.993044166028224e-05, + "loss": 1.0469, + "step": 23848 + }, + { + "epoch": 0.02, + "learning_rate": 4.99304358048395e-05, + "loss": 1.0867, + "step": 23849 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930429949150655e-05, + "loss": 0.9903, + "step": 23850 + }, + { + "epoch": 0.02, + "learning_rate": 4.993042409321571e-05, + "loss": 1.0323, + "step": 23851 + }, + { + "epoch": 0.02, + "learning_rate": 4.993041823703466e-05, + "loss": 1.0535, + "step": 23852 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930412380607504e-05, + "loss": 1.1562, + "step": 23853 + }, + { + "epoch": 0.02, + "learning_rate": 4.993040652393425e-05, + "loss": 0.9531, + "step": 23854 + }, + { + "epoch": 0.02, + "learning_rate": 4.993040066701489e-05, + "loss": 0.8346, + "step": 23855 + }, + { + "epoch": 0.02, + "learning_rate": 4.993039480984944e-05, + "loss": 1.071, + "step": 23856 + }, + { + "epoch": 0.02, + "learning_rate": 4.993038895243787e-05, + "loss": 0.9997, + "step": 23857 + }, + { + "epoch": 0.02, + "learning_rate": 4.993038309478021e-05, + "loss": 1.2292, + "step": 23858 + }, + { + "epoch": 0.02, + "learning_rate": 4.993037723687643e-05, + "loss": 1.0845, + "step": 23859 + }, + { + "epoch": 0.02, + "learning_rate": 4.993037137872657e-05, + "loss": 1.163, + "step": 23860 + }, + { + "epoch": 0.02, + "learning_rate": 4.99303655203306e-05, + "loss": 1.0931, + "step": 23861 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930359661688525e-05, + "loss": 1.0741, + "step": 23862 + }, + { + "epoch": 0.02, + "learning_rate": 4.993035380280035e-05, + "loss": 1.1054, + "step": 23863 + }, + { + "epoch": 0.02, + "learning_rate": 4.993034794366607e-05, + "loss": 1.2512, + "step": 23864 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930342084285694e-05, + "loss": 0.9502, + "step": 23865 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930336224659217e-05, + "loss": 1.3488, + "step": 23866 + }, + { + "epoch": 0.02, + "learning_rate": 4.993033036478663e-05, + "loss": 1.2018, + "step": 23867 + }, + { + "epoch": 0.02, + "learning_rate": 4.993032450466795e-05, + "loss": 0.9976, + "step": 23868 + }, + { + "epoch": 0.02, + "learning_rate": 4.993031864430316e-05, + "loss": 1.1617, + "step": 23869 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930312783692276e-05, + "loss": 0.9424, + "step": 23870 + }, + { + "epoch": 0.02, + "learning_rate": 4.993030692283528e-05, + "loss": 1.0249, + "step": 23871 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930301061732196e-05, + "loss": 1.0682, + "step": 23872 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930295200383e-05, + "loss": 1.039, + "step": 23873 + }, + { + "epoch": 0.02, + "learning_rate": 4.993028933878771e-05, + "loss": 1.2089, + "step": 23874 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930283476946315e-05, + "loss": 0.9747, + "step": 23875 + }, + { + "epoch": 0.02, + "learning_rate": 4.993027761485882e-05, + "loss": 1.0975, + "step": 23876 + }, + { + "epoch": 0.02, + "learning_rate": 4.993027175252522e-05, + "loss": 0.8476, + "step": 23877 + }, + { + "epoch": 0.02, + "learning_rate": 4.993026588994553e-05, + "loss": 0.937, + "step": 23878 + }, + { + "epoch": 0.02, + "learning_rate": 4.993026002711972e-05, + "loss": 0.8607, + "step": 23879 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930254164047825e-05, + "loss": 0.8889, + "step": 23880 + }, + { + "epoch": 0.02, + "learning_rate": 4.993024830072982e-05, + "loss": 1.0154, + "step": 23881 + }, + { + "epoch": 0.02, + "learning_rate": 4.993024243716572e-05, + "loss": 1.1148, + "step": 23882 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930236573355515e-05, + "loss": 0.9134, + "step": 23883 + }, + { + "epoch": 0.02, + "learning_rate": 4.993023070929921e-05, + "loss": 1.1565, + "step": 23884 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930224844996806e-05, + "loss": 1.2463, + "step": 23885 + }, + { + "epoch": 0.02, + "learning_rate": 4.993021898044831e-05, + "loss": 0.96, + "step": 23886 + }, + { + "epoch": 0.02, + "learning_rate": 4.99302131156537e-05, + "loss": 0.6576, + "step": 23887 + }, + { + "epoch": 0.02, + "learning_rate": 4.993020725061299e-05, + "loss": 1.0451, + "step": 23888 + }, + { + "epoch": 0.02, + "learning_rate": 4.993020138532618e-05, + "loss": 1.1067, + "step": 23889 + }, + { + "epoch": 0.02, + "learning_rate": 4.993019551979328e-05, + "loss": 1.0875, + "step": 23890 + }, + { + "epoch": 0.02, + "learning_rate": 4.993018965401427e-05, + "loss": 1.3352, + "step": 23891 + }, + { + "epoch": 0.02, + "learning_rate": 4.993018378798916e-05, + "loss": 0.8975, + "step": 23892 + }, + { + "epoch": 0.02, + "learning_rate": 4.993017792171795e-05, + "loss": 1.0853, + "step": 23893 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930172055200644e-05, + "loss": 0.9325, + "step": 23894 + }, + { + "epoch": 0.02, + "learning_rate": 4.993016618843723e-05, + "loss": 0.9883, + "step": 23895 + }, + { + "epoch": 0.02, + "learning_rate": 4.993016032142773e-05, + "loss": 0.836, + "step": 23896 + }, + { + "epoch": 0.02, + "learning_rate": 4.993015445417212e-05, + "loss": 0.9883, + "step": 23897 + }, + { + "epoch": 0.02, + "learning_rate": 4.993014858667041e-05, + "loss": 1.0446, + "step": 23898 + }, + { + "epoch": 0.02, + "learning_rate": 4.99301427189226e-05, + "loss": 0.7852, + "step": 23899 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930136850928686e-05, + "loss": 1.119, + "step": 23900 + }, + { + "epoch": 0.02, + "learning_rate": 4.993013098268868e-05, + "loss": 1.0756, + "step": 23901 + }, + { + "epoch": 0.02, + "learning_rate": 4.993012511420257e-05, + "loss": 1.1287, + "step": 23902 + }, + { + "epoch": 0.02, + "learning_rate": 4.993011924547036e-05, + "loss": 1.2119, + "step": 23903 + }, + { + "epoch": 0.02, + "learning_rate": 4.993011337649206e-05, + "loss": 1.1498, + "step": 23904 + }, + { + "epoch": 0.02, + "learning_rate": 4.993010750726765e-05, + "loss": 1.1328, + "step": 23905 + }, + { + "epoch": 0.02, + "learning_rate": 4.993010163779714e-05, + "loss": 1.2905, + "step": 23906 + }, + { + "epoch": 0.02, + "learning_rate": 4.993009576808054e-05, + "loss": 1.1138, + "step": 23907 + }, + { + "epoch": 0.02, + "learning_rate": 4.993008989811784e-05, + "loss": 0.9635, + "step": 23908 + }, + { + "epoch": 0.02, + "learning_rate": 4.993008402790903e-05, + "loss": 1.0723, + "step": 23909 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930078157454126e-05, + "loss": 1.093, + "step": 23910 + }, + { + "epoch": 0.02, + "learning_rate": 4.993007228675312e-05, + "loss": 1.1435, + "step": 23911 + }, + { + "epoch": 0.02, + "learning_rate": 4.993006641580602e-05, + "loss": 0.9883, + "step": 23912 + }, + { + "epoch": 0.02, + "learning_rate": 4.993006054461282e-05, + "loss": 1.2072, + "step": 23913 + }, + { + "epoch": 0.02, + "learning_rate": 4.993005467317352e-05, + "loss": 1.1739, + "step": 23914 + }, + { + "epoch": 0.02, + "learning_rate": 4.993004880148812e-05, + "loss": 1.0008, + "step": 23915 + }, + { + "epoch": 0.02, + "learning_rate": 4.993004292955662e-05, + "loss": 0.9375, + "step": 23916 + }, + { + "epoch": 0.02, + "learning_rate": 4.993003705737902e-05, + "loss": 0.8533, + "step": 23917 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930031184955325e-05, + "loss": 1.1426, + "step": 23918 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930025312285537e-05, + "loss": 1.0799, + "step": 23919 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930019439369634e-05, + "loss": 0.9404, + "step": 23920 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930013566207644e-05, + "loss": 1.2811, + "step": 23921 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930007692799554e-05, + "loss": 1.1519, + "step": 23922 + }, + { + "epoch": 0.02, + "learning_rate": 4.9930001819145364e-05, + "loss": 1.3707, + "step": 23923 + }, + { + "epoch": 0.02, + "learning_rate": 4.992999594524508e-05, + "loss": 1.3059, + "step": 23924 + }, + { + "epoch": 0.02, + "learning_rate": 4.992999007109869e-05, + "loss": 1.1786, + "step": 23925 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929984196706205e-05, + "loss": 1.7966, + "step": 23926 + }, + { + "epoch": 0.02, + "learning_rate": 4.992997832206762e-05, + "loss": 0.9254, + "step": 23927 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929972447182935e-05, + "loss": 0.8747, + "step": 23928 + }, + { + "epoch": 0.02, + "learning_rate": 4.992996657205216e-05, + "loss": 1.0476, + "step": 23929 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929960696675284e-05, + "loss": 0.7788, + "step": 23930 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929954821052304e-05, + "loss": 0.8293, + "step": 23931 + }, + { + "epoch": 0.02, + "learning_rate": 4.992994894518324e-05, + "loss": 0.5478, + "step": 23932 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929943069068065e-05, + "loss": 0.456, + "step": 23933 + }, + { + "epoch": 0.02, + "learning_rate": 4.992993719270679e-05, + "loss": 0.4008, + "step": 23934 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929931316099424e-05, + "loss": 1.0019, + "step": 23935 + }, + { + "epoch": 0.02, + "learning_rate": 4.992992543924596e-05, + "loss": 0.9696, + "step": 23936 + }, + { + "epoch": 0.02, + "learning_rate": 4.99299195621464e-05, + "loss": 1.1781, + "step": 23937 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929913684800734e-05, + "loss": 1.1525, + "step": 23938 + }, + { + "epoch": 0.02, + "learning_rate": 4.992990780720898e-05, + "loss": 1.1141, + "step": 23939 + }, + { + "epoch": 0.02, + "learning_rate": 4.992990192937112e-05, + "loss": 1.0472, + "step": 23940 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929896051287174e-05, + "loss": 0.9839, + "step": 23941 + }, + { + "epoch": 0.02, + "learning_rate": 4.992989017295712e-05, + "loss": 0.909, + "step": 23942 + }, + { + "epoch": 0.02, + "learning_rate": 4.992988429438097e-05, + "loss": 1.163, + "step": 23943 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929878415558725e-05, + "loss": 1.2155, + "step": 23944 + }, + { + "epoch": 0.02, + "learning_rate": 4.992987253649038e-05, + "loss": 0.8846, + "step": 23945 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929866657175945e-05, + "loss": 0.9096, + "step": 23946 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929860777615414e-05, + "loss": 1.2423, + "step": 23947 + }, + { + "epoch": 0.02, + "learning_rate": 4.992985489780877e-05, + "loss": 1.0584, + "step": 23948 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929849017756045e-05, + "loss": 1.1496, + "step": 23949 + }, + { + "epoch": 0.02, + "learning_rate": 4.992984313745721e-05, + "loss": 1.3065, + "step": 23950 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929837256912295e-05, + "loss": 1.0829, + "step": 23951 + }, + { + "epoch": 0.02, + "learning_rate": 4.992983137612127e-05, + "loss": 1.1409, + "step": 23952 + }, + { + "epoch": 0.02, + "learning_rate": 4.992982549508415e-05, + "loss": 1.1532, + "step": 23953 + }, + { + "epoch": 0.02, + "learning_rate": 4.992981961380093e-05, + "loss": 1.1388, + "step": 23954 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929813732271624e-05, + "loss": 1.2693, + "step": 23955 + }, + { + "epoch": 0.02, + "learning_rate": 4.992980785049621e-05, + "loss": 1.2789, + "step": 23956 + }, + { + "epoch": 0.02, + "learning_rate": 4.992980196847471e-05, + "loss": 1.1358, + "step": 23957 + }, + { + "epoch": 0.02, + "learning_rate": 4.992979608620711e-05, + "loss": 0.7598, + "step": 23958 + }, + { + "epoch": 0.02, + "learning_rate": 4.992979020369341e-05, + "loss": 0.8166, + "step": 23959 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929784320933615e-05, + "loss": 1.2738, + "step": 23960 + }, + { + "epoch": 0.02, + "learning_rate": 4.992977843792772e-05, + "loss": 1.0608, + "step": 23961 + }, + { + "epoch": 0.02, + "learning_rate": 4.992977255467574e-05, + "loss": 0.8491, + "step": 23962 + }, + { + "epoch": 0.02, + "learning_rate": 4.992976667117766e-05, + "loss": 1.1542, + "step": 23963 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929760787433474e-05, + "loss": 0.9427, + "step": 23964 + }, + { + "epoch": 0.02, + "learning_rate": 4.99297549034432e-05, + "loss": 0.9826, + "step": 23965 + }, + { + "epoch": 0.02, + "learning_rate": 4.992974901920683e-05, + "loss": 0.7197, + "step": 23966 + }, + { + "epoch": 0.02, + "learning_rate": 4.992974313472436e-05, + "loss": 1.262, + "step": 23967 + }, + { + "epoch": 0.02, + "learning_rate": 4.99297372499958e-05, + "loss": 0.9121, + "step": 23968 + }, + { + "epoch": 0.02, + "learning_rate": 4.992973136502114e-05, + "loss": 1.2549, + "step": 23969 + }, + { + "epoch": 0.02, + "learning_rate": 4.992972547980038e-05, + "loss": 1.747, + "step": 23970 + }, + { + "epoch": 0.02, + "learning_rate": 4.992971959433354e-05, + "loss": 0.9309, + "step": 23971 + }, + { + "epoch": 0.02, + "learning_rate": 4.992971370862059e-05, + "loss": 1.2747, + "step": 23972 + }, + { + "epoch": 0.02, + "learning_rate": 4.992970782266154e-05, + "loss": 1.0841, + "step": 23973 + }, + { + "epoch": 0.02, + "learning_rate": 4.992970193645641e-05, + "loss": 1.1291, + "step": 23974 + }, + { + "epoch": 0.02, + "learning_rate": 4.992969605000517e-05, + "loss": 0.9889, + "step": 23975 + }, + { + "epoch": 0.02, + "learning_rate": 4.992969016330785e-05, + "loss": 1.364, + "step": 23976 + }, + { + "epoch": 0.02, + "learning_rate": 4.992968427636442e-05, + "loss": 1.2789, + "step": 23977 + }, + { + "epoch": 0.02, + "learning_rate": 4.99296783891749e-05, + "loss": 1.0603, + "step": 23978 + }, + { + "epoch": 0.02, + "learning_rate": 4.992967250173929e-05, + "loss": 1.2268, + "step": 23979 + }, + { + "epoch": 0.02, + "learning_rate": 4.992966661405758e-05, + "loss": 1.4013, + "step": 23980 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929660726129776e-05, + "loss": 1.2344, + "step": 23981 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929654837955876e-05, + "loss": 0.8566, + "step": 23982 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929648949535875e-05, + "loss": 1.393, + "step": 23983 + }, + { + "epoch": 0.02, + "learning_rate": 4.992964306086979e-05, + "loss": 1.4354, + "step": 23984 + }, + { + "epoch": 0.02, + "learning_rate": 4.99296371719576e-05, + "loss": 1.0701, + "step": 23985 + }, + { + "epoch": 0.02, + "learning_rate": 4.992963128279932e-05, + "loss": 1.0008, + "step": 23986 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929625393394944e-05, + "loss": 1.2444, + "step": 23987 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929619503744476e-05, + "loss": 1.2604, + "step": 23988 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929613613847914e-05, + "loss": 1.4118, + "step": 23989 + }, + { + "epoch": 0.02, + "learning_rate": 4.992960772370525e-05, + "loss": 1.4226, + "step": 23990 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929601833316495e-05, + "loss": 0.8521, + "step": 23991 + }, + { + "epoch": 0.02, + "learning_rate": 4.992959594268165e-05, + "loss": 0.9982, + "step": 23992 + }, + { + "epoch": 0.02, + "learning_rate": 4.992959005180071e-05, + "loss": 0.9278, + "step": 23993 + }, + { + "epoch": 0.02, + "learning_rate": 4.992958416067367e-05, + "loss": 0.7392, + "step": 23994 + }, + { + "epoch": 0.02, + "learning_rate": 4.992957826930054e-05, + "loss": 1.1162, + "step": 23995 + }, + { + "epoch": 0.02, + "learning_rate": 4.992957237768132e-05, + "loss": 0.9021, + "step": 23996 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929566485815995e-05, + "loss": 0.9197, + "step": 23997 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929560593704584e-05, + "loss": 1.2618, + "step": 23998 + }, + { + "epoch": 0.02, + "learning_rate": 4.992955470134707e-05, + "loss": 1.0041, + "step": 23999 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929548808743475e-05, + "loss": 1.0374, + "step": 24000 + }, + { + "epoch": 0.02, + "eval_loss": 1.0256379842758179, + "eval_runtime": 86.2144, + "eval_samples_per_second": 16.065, + "eval_steps_per_second": 4.025, + "step": 24000 + }, + { + "epoch": 0.02, + "learning_rate": 4.992954291589378e-05, + "loss": 0.8192, + "step": 24001 + }, + { + "epoch": 0.02, + "learning_rate": 4.992953702279799e-05, + "loss": 0.9872, + "step": 24002 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929531129456106e-05, + "loss": 1.1955, + "step": 24003 + }, + { + "epoch": 0.02, + "learning_rate": 4.992952523586813e-05, + "loss": 1.1084, + "step": 24004 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929519342034054e-05, + "loss": 1.1383, + "step": 24005 + }, + { + "epoch": 0.02, + "learning_rate": 4.992951344795389e-05, + "loss": 1.1065, + "step": 24006 + }, + { + "epoch": 0.02, + "learning_rate": 4.992950755362763e-05, + "loss": 0.8596, + "step": 24007 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929501659055275e-05, + "loss": 1.324, + "step": 24008 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929495764236835e-05, + "loss": 1.0322, + "step": 24009 + }, + { + "epoch": 0.02, + "learning_rate": 4.992948986917229e-05, + "loss": 1.119, + "step": 24010 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929483973861654e-05, + "loss": 1.2774, + "step": 24011 + }, + { + "epoch": 0.02, + "learning_rate": 4.992947807830493e-05, + "loss": 1.9411, + "step": 24012 + }, + { + "epoch": 0.02, + "learning_rate": 4.992947218250211e-05, + "loss": 0.8934, + "step": 24013 + }, + { + "epoch": 0.02, + "learning_rate": 4.99294662864532e-05, + "loss": 0.8308, + "step": 24014 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929460390158196e-05, + "loss": 1.1224, + "step": 24015 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929454493617094e-05, + "loss": 0.81, + "step": 24016 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929448596829906e-05, + "loss": 0.7787, + "step": 24017 + }, + { + "epoch": 0.02, + "learning_rate": 4.992944269979662e-05, + "loss": 0.873, + "step": 24018 + }, + { + "epoch": 0.02, + "learning_rate": 4.992943680251724e-05, + "loss": 1.032, + "step": 24019 + }, + { + "epoch": 0.02, + "learning_rate": 4.992943090499177e-05, + "loss": 1.22, + "step": 24020 + }, + { + "epoch": 0.02, + "learning_rate": 4.99294250072202e-05, + "loss": 0.8855, + "step": 24021 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929419109202546e-05, + "loss": 1.2705, + "step": 24022 + }, + { + "epoch": 0.02, + "learning_rate": 4.99294132109388e-05, + "loss": 0.8653, + "step": 24023 + }, + { + "epoch": 0.02, + "learning_rate": 4.992940731242895e-05, + "loss": 1.1048, + "step": 24024 + }, + { + "epoch": 0.02, + "learning_rate": 4.992940141367302e-05, + "loss": 0.7766, + "step": 24025 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929395514671e-05, + "loss": 1.1276, + "step": 24026 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929389615422874e-05, + "loss": 1.0022, + "step": 24027 + }, + { + "epoch": 0.02, + "learning_rate": 4.992938371592866e-05, + "loss": 1.1292, + "step": 24028 + }, + { + "epoch": 0.02, + "learning_rate": 4.992937781618836e-05, + "loss": 1.5412, + "step": 24029 + }, + { + "epoch": 0.02, + "learning_rate": 4.992937191620196e-05, + "loss": 1.6076, + "step": 24030 + }, + { + "epoch": 0.02, + "learning_rate": 4.992936601596947e-05, + "loss": 1.5389, + "step": 24031 + }, + { + "epoch": 0.02, + "learning_rate": 4.992936011549089e-05, + "loss": 1.1828, + "step": 24032 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929354214766225e-05, + "loss": 1.3197, + "step": 24033 + }, + { + "epoch": 0.02, + "learning_rate": 4.992934831379545e-05, + "loss": 1.143, + "step": 24034 + }, + { + "epoch": 0.02, + "learning_rate": 4.992934241257859e-05, + "loss": 1.0148, + "step": 24035 + }, + { + "epoch": 0.02, + "learning_rate": 4.992933651111564e-05, + "loss": 1.2204, + "step": 24036 + }, + { + "epoch": 0.02, + "learning_rate": 4.992933060940661e-05, + "loss": 0.9826, + "step": 24037 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929324707451474e-05, + "loss": 1.2265, + "step": 24038 + }, + { + "epoch": 0.02, + "learning_rate": 4.992931880525025e-05, + "loss": 1.2869, + "step": 24039 + }, + { + "epoch": 0.02, + "learning_rate": 4.992931290280293e-05, + "loss": 1.0548, + "step": 24040 + }, + { + "epoch": 0.02, + "learning_rate": 4.992930700010952e-05, + "loss": 1.218, + "step": 24041 + }, + { + "epoch": 0.02, + "learning_rate": 4.992930109717002e-05, + "loss": 1.1817, + "step": 24042 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929295193984426e-05, + "loss": 1.2657, + "step": 24043 + }, + { + "epoch": 0.02, + "learning_rate": 4.992928929055275e-05, + "loss": 1.2041, + "step": 24044 + }, + { + "epoch": 0.02, + "learning_rate": 4.992928338687497e-05, + "loss": 1.2043, + "step": 24045 + }, + { + "epoch": 0.02, + "learning_rate": 4.992927748295111e-05, + "loss": 1.1938, + "step": 24046 + }, + { + "epoch": 0.02, + "learning_rate": 4.992927157878115e-05, + "loss": 1.1856, + "step": 24047 + }, + { + "epoch": 0.02, + "learning_rate": 4.99292656743651e-05, + "loss": 1.2109, + "step": 24048 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929259769702964e-05, + "loss": 1.4686, + "step": 24049 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929253864794735e-05, + "loss": 1.1906, + "step": 24050 + }, + { + "epoch": 0.02, + "learning_rate": 4.992924795964041e-05, + "loss": 1.1244, + "step": 24051 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929242054239997e-05, + "loss": 1.1775, + "step": 24052 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929236148593494e-05, + "loss": 0.904, + "step": 24053 + }, + { + "epoch": 0.02, + "learning_rate": 4.99292302427009e-05, + "loss": 1.0554, + "step": 24054 + }, + { + "epoch": 0.02, + "learning_rate": 4.992922433656221e-05, + "loss": 1.0955, + "step": 24055 + }, + { + "epoch": 0.02, + "learning_rate": 4.992921843017744e-05, + "loss": 1.1677, + "step": 24056 + }, + { + "epoch": 0.02, + "learning_rate": 4.992921252354657e-05, + "loss": 1.0176, + "step": 24057 + }, + { + "epoch": 0.02, + "learning_rate": 4.992920661666961e-05, + "loss": 1.1891, + "step": 24058 + }, + { + "epoch": 0.02, + "learning_rate": 4.992920070954657e-05, + "loss": 1.0973, + "step": 24059 + }, + { + "epoch": 0.02, + "learning_rate": 4.992919480217742e-05, + "loss": 1.0989, + "step": 24060 + }, + { + "epoch": 0.02, + "learning_rate": 4.99291888945622e-05, + "loss": 1.1221, + "step": 24061 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929182986700874e-05, + "loss": 1.2773, + "step": 24062 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929177078593456e-05, + "loss": 1.1757, + "step": 24063 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929171170239965e-05, + "loss": 1.1566, + "step": 24064 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929165261640374e-05, + "loss": 1.4818, + "step": 24065 + }, + { + "epoch": 0.02, + "learning_rate": 4.992915935279469e-05, + "loss": 1.0018, + "step": 24066 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929153443702916e-05, + "loss": 1.1401, + "step": 24067 + }, + { + "epoch": 0.02, + "learning_rate": 4.992914753436506e-05, + "loss": 0.9396, + "step": 24068 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929141624781105e-05, + "loss": 1.2054, + "step": 24069 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929135714951066e-05, + "loss": 1.0937, + "step": 24070 + }, + { + "epoch": 0.02, + "learning_rate": 4.992912980487493e-05, + "loss": 1.065, + "step": 24071 + }, + { + "epoch": 0.02, + "learning_rate": 4.992912389455271e-05, + "loss": 0.9308, + "step": 24072 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929117983984395e-05, + "loss": 0.915, + "step": 24073 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929112073169995e-05, + "loss": 1.0242, + "step": 24074 + }, + { + "epoch": 0.02, + "learning_rate": 4.992910616210951e-05, + "loss": 0.8787, + "step": 24075 + }, + { + "epoch": 0.02, + "learning_rate": 4.992910025080293e-05, + "loss": 0.8575, + "step": 24076 + }, + { + "epoch": 0.02, + "learning_rate": 4.992909433925026e-05, + "loss": 0.8885, + "step": 24077 + }, + { + "epoch": 0.02, + "learning_rate": 4.99290884274515e-05, + "loss": 0.8424, + "step": 24078 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929082515406647e-05, + "loss": 0.9289, + "step": 24079 + }, + { + "epoch": 0.02, + "learning_rate": 4.992907660311571e-05, + "loss": 1.2562, + "step": 24080 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929070690578685e-05, + "loss": 0.878, + "step": 24081 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929064777795564e-05, + "loss": 1.2008, + "step": 24082 + }, + { + "epoch": 0.02, + "learning_rate": 4.992905886476636e-05, + "loss": 1.0722, + "step": 24083 + }, + { + "epoch": 0.02, + "learning_rate": 4.992905295149107e-05, + "loss": 0.955, + "step": 24084 + }, + { + "epoch": 0.02, + "learning_rate": 4.992904703796968e-05, + "loss": 1.2274, + "step": 24085 + }, + { + "epoch": 0.02, + "learning_rate": 4.992904112420221e-05, + "loss": 1.19, + "step": 24086 + }, + { + "epoch": 0.02, + "learning_rate": 4.992903521018865e-05, + "loss": 1.2143, + "step": 24087 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929029295929e-05, + "loss": 1.1704, + "step": 24088 + }, + { + "epoch": 0.02, + "learning_rate": 4.992902338142326e-05, + "loss": 1.3001, + "step": 24089 + }, + { + "epoch": 0.02, + "learning_rate": 4.992901746667142e-05, + "loss": 1.0435, + "step": 24090 + }, + { + "epoch": 0.02, + "learning_rate": 4.992901155167351e-05, + "loss": 1.0097, + "step": 24091 + }, + { + "epoch": 0.02, + "learning_rate": 4.9929005636429504e-05, + "loss": 1.0777, + "step": 24092 + }, + { + "epoch": 0.02, + "learning_rate": 4.992899972093941e-05, + "loss": 1.2675, + "step": 24093 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928993805203225e-05, + "loss": 1.0016, + "step": 24094 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928987889220955e-05, + "loss": 0.8967, + "step": 24095 + }, + { + "epoch": 0.02, + "learning_rate": 4.992898197299259e-05, + "loss": 1.115, + "step": 24096 + }, + { + "epoch": 0.02, + "learning_rate": 4.992897605651814e-05, + "loss": 1.0985, + "step": 24097 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928970139797605e-05, + "loss": 1.2262, + "step": 24098 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928964222830974e-05, + "loss": 1.3129, + "step": 24099 + }, + { + "epoch": 0.02, + "learning_rate": 4.992895830561827e-05, + "loss": 0.806, + "step": 24100 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928952388159466e-05, + "loss": 0.7639, + "step": 24101 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928946470454576e-05, + "loss": 0.8922, + "step": 24102 + }, + { + "epoch": 0.02, + "learning_rate": 4.99289405525036e-05, + "loss": 0.793, + "step": 24103 + }, + { + "epoch": 0.02, + "learning_rate": 4.992893463430653e-05, + "loss": 0.7395, + "step": 24104 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928928715863375e-05, + "loss": 0.6735, + "step": 24105 + }, + { + "epoch": 0.02, + "learning_rate": 4.992892279717413e-05, + "loss": 0.7087, + "step": 24106 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928916878238806e-05, + "loss": 1.1764, + "step": 24107 + }, + { + "epoch": 0.02, + "learning_rate": 4.992891095905739e-05, + "loss": 1.1869, + "step": 24108 + }, + { + "epoch": 0.02, + "learning_rate": 4.992890503962988e-05, + "loss": 1.1303, + "step": 24109 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928899119956284e-05, + "loss": 0.9253, + "step": 24110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928893200036605e-05, + "loss": 0.8383, + "step": 24111 + }, + { + "epoch": 0.02, + "learning_rate": 4.992888727987084e-05, + "loss": 1.2807, + "step": 24112 + }, + { + "epoch": 0.02, + "learning_rate": 4.992888135945899e-05, + "loss": 0.8428, + "step": 24113 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928875438801035e-05, + "loss": 0.8443, + "step": 24114 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928869517897016e-05, + "loss": 0.9686, + "step": 24115 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928863596746896e-05, + "loss": 1.0426, + "step": 24116 + }, + { + "epoch": 0.02, + "learning_rate": 4.992885767535069e-05, + "loss": 1.0709, + "step": 24117 + }, + { + "epoch": 0.02, + "learning_rate": 4.99288517537084e-05, + "loss": 1.2419, + "step": 24118 + }, + { + "epoch": 0.02, + "learning_rate": 4.992884583182002e-05, + "loss": 0.9178, + "step": 24119 + }, + { + "epoch": 0.02, + "learning_rate": 4.992883990968556e-05, + "loss": 1.1018, + "step": 24120 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928833987305004e-05, + "loss": 1.0398, + "step": 24121 + }, + { + "epoch": 0.02, + "learning_rate": 4.992882806467836e-05, + "loss": 1.147, + "step": 24122 + }, + { + "epoch": 0.02, + "learning_rate": 4.992882214180563e-05, + "loss": 1.5939, + "step": 24123 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928816218686816e-05, + "loss": 1.562, + "step": 24124 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928810295321916e-05, + "loss": 1.6745, + "step": 24125 + }, + { + "epoch": 0.02, + "learning_rate": 4.992880437171093e-05, + "loss": 1.4223, + "step": 24126 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928798447853855e-05, + "loss": 1.651, + "step": 24127 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928792523750694e-05, + "loss": 1.5524, + "step": 24128 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928786599401446e-05, + "loss": 1.4367, + "step": 24129 + }, + { + "epoch": 0.02, + "learning_rate": 4.992878067480611e-05, + "loss": 1.455, + "step": 24130 + }, + { + "epoch": 0.02, + "learning_rate": 4.992877474996469e-05, + "loss": 1.5197, + "step": 24131 + }, + { + "epoch": 0.02, + "learning_rate": 4.992876882487719e-05, + "loss": 1.3464, + "step": 24132 + }, + { + "epoch": 0.02, + "learning_rate": 4.992876289954359e-05, + "loss": 1.1148, + "step": 24133 + }, + { + "epoch": 0.02, + "learning_rate": 4.992875697396392e-05, + "loss": 0.8889, + "step": 24134 + }, + { + "epoch": 0.02, + "learning_rate": 4.992875104813815e-05, + "loss": 0.9795, + "step": 24135 + }, + { + "epoch": 0.02, + "learning_rate": 4.99287451220663e-05, + "loss": 1.0111, + "step": 24136 + }, + { + "epoch": 0.02, + "learning_rate": 4.992873919574837e-05, + "loss": 1.0145, + "step": 24137 + }, + { + "epoch": 0.02, + "learning_rate": 4.992873326918434e-05, + "loss": 0.9554, + "step": 24138 + }, + { + "epoch": 0.02, + "learning_rate": 4.992872734237422e-05, + "loss": 1.0446, + "step": 24139 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928721415318034e-05, + "loss": 1.2072, + "step": 24140 + }, + { + "epoch": 0.02, + "learning_rate": 4.992871548801575e-05, + "loss": 1.1016, + "step": 24141 + }, + { + "epoch": 0.02, + "learning_rate": 4.992870956046739e-05, + "loss": 1.3995, + "step": 24142 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928703632672934e-05, + "loss": 1.0819, + "step": 24143 + }, + { + "epoch": 0.02, + "learning_rate": 4.99286977046324e-05, + "loss": 1.0095, + "step": 24144 + }, + { + "epoch": 0.02, + "learning_rate": 4.992869177634577e-05, + "loss": 0.9089, + "step": 24145 + }, + { + "epoch": 0.02, + "learning_rate": 4.992868584781306e-05, + "loss": 0.9564, + "step": 24146 + }, + { + "epoch": 0.02, + "learning_rate": 4.992867991903427e-05, + "loss": 0.9206, + "step": 24147 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928673990009386e-05, + "loss": 0.8088, + "step": 24148 + }, + { + "epoch": 0.02, + "learning_rate": 4.992866806073842e-05, + "loss": 0.8272, + "step": 24149 + }, + { + "epoch": 0.02, + "learning_rate": 4.992866213122137e-05, + "loss": 1.0474, + "step": 24150 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928656201458236e-05, + "loss": 1.2565, + "step": 24151 + }, + { + "epoch": 0.02, + "learning_rate": 4.992865027144902e-05, + "loss": 1.2902, + "step": 24152 + }, + { + "epoch": 0.02, + "learning_rate": 4.992864434119371e-05, + "loss": 1.393, + "step": 24153 + }, + { + "epoch": 0.02, + "learning_rate": 4.992863841069232e-05, + "loss": 0.9702, + "step": 24154 + }, + { + "epoch": 0.02, + "learning_rate": 4.992863247994484e-05, + "loss": 0.7488, + "step": 24155 + }, + { + "epoch": 0.02, + "learning_rate": 4.992862654895128e-05, + "loss": 0.5989, + "step": 24156 + }, + { + "epoch": 0.02, + "learning_rate": 4.992862061771163e-05, + "loss": 1.3384, + "step": 24157 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928614686225904e-05, + "loss": 0.9962, + "step": 24158 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928608754494086e-05, + "loss": 1.2563, + "step": 24159 + }, + { + "epoch": 0.02, + "learning_rate": 4.992860282251618e-05, + "loss": 1.0987, + "step": 24160 + }, + { + "epoch": 0.02, + "learning_rate": 4.99285968902922e-05, + "loss": 1.129, + "step": 24161 + }, + { + "epoch": 0.02, + "learning_rate": 4.992859095782213e-05, + "loss": 1.2527, + "step": 24162 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928585025105976e-05, + "loss": 1.057, + "step": 24163 + }, + { + "epoch": 0.02, + "learning_rate": 4.992857909214373e-05, + "loss": 0.9024, + "step": 24164 + }, + { + "epoch": 0.02, + "learning_rate": 4.992857315893541e-05, + "loss": 0.7036, + "step": 24165 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928567225481e-05, + "loss": 1.0307, + "step": 24166 + }, + { + "epoch": 0.02, + "learning_rate": 4.992856129178052e-05, + "loss": 1.0782, + "step": 24167 + }, + { + "epoch": 0.02, + "learning_rate": 4.992855535783394e-05, + "loss": 1.3696, + "step": 24168 + }, + { + "epoch": 0.02, + "learning_rate": 4.992854942364128e-05, + "loss": 1.2549, + "step": 24169 + }, + { + "epoch": 0.02, + "learning_rate": 4.992854348920253e-05, + "loss": 1.0177, + "step": 24170 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928537554517704e-05, + "loss": 1.2334, + "step": 24171 + }, + { + "epoch": 0.02, + "learning_rate": 4.992853161958679e-05, + "loss": 1.3228, + "step": 24172 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928525684409794e-05, + "loss": 0.9345, + "step": 24173 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928519748986715e-05, + "loss": 1.0616, + "step": 24174 + }, + { + "epoch": 0.02, + "learning_rate": 4.992851381331754e-05, + "loss": 1.0415, + "step": 24175 + }, + { + "epoch": 0.02, + "learning_rate": 4.99285078774023e-05, + "loss": 1.1505, + "step": 24176 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928501941240966e-05, + "loss": 1.1742, + "step": 24177 + }, + { + "epoch": 0.02, + "learning_rate": 4.992849600483355e-05, + "loss": 0.9986, + "step": 24178 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928490068180054e-05, + "loss": 1.0851, + "step": 24179 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928484131280475e-05, + "loss": 0.9781, + "step": 24180 + }, + { + "epoch": 0.02, + "learning_rate": 4.992847819413481e-05, + "loss": 1.1776, + "step": 24181 + }, + { + "epoch": 0.02, + "learning_rate": 4.992847225674305e-05, + "loss": 1.077, + "step": 24182 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928466319105224e-05, + "loss": 0.9565, + "step": 24183 + }, + { + "epoch": 0.02, + "learning_rate": 4.992846038122131e-05, + "loss": 1.3119, + "step": 24184 + }, + { + "epoch": 0.02, + "learning_rate": 4.992845444309131e-05, + "loss": 1.1471, + "step": 24185 + }, + { + "epoch": 0.02, + "learning_rate": 4.992844850471522e-05, + "loss": 0.9968, + "step": 24186 + }, + { + "epoch": 0.02, + "learning_rate": 4.992844256609306e-05, + "loss": 1.1051, + "step": 24187 + }, + { + "epoch": 0.02, + "learning_rate": 4.992843662722481e-05, + "loss": 0.8831, + "step": 24188 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928430688110474e-05, + "loss": 0.9841, + "step": 24189 + }, + { + "epoch": 0.02, + "learning_rate": 4.992842474875006e-05, + "loss": 0.9237, + "step": 24190 + }, + { + "epoch": 0.02, + "learning_rate": 4.992841880914356e-05, + "loss": 0.9932, + "step": 24191 + }, + { + "epoch": 0.02, + "learning_rate": 4.992841286929099e-05, + "loss": 0.8519, + "step": 24192 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928406929192315e-05, + "loss": 1.1374, + "step": 24193 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928400988847575e-05, + "loss": 1.153, + "step": 24194 + }, + { + "epoch": 0.02, + "learning_rate": 4.992839504825674e-05, + "loss": 1.1652, + "step": 24195 + }, + { + "epoch": 0.02, + "learning_rate": 4.992838910741983e-05, + "loss": 1.1358, + "step": 24196 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928383166336835e-05, + "loss": 1.2323, + "step": 24197 + }, + { + "epoch": 0.02, + "learning_rate": 4.992837722500776e-05, + "loss": 1.0047, + "step": 24198 + }, + { + "epoch": 0.02, + "learning_rate": 4.99283712834326e-05, + "loss": 1.1758, + "step": 24199 + }, + { + "epoch": 0.02, + "learning_rate": 4.992836534161136e-05, + "loss": 1.0649, + "step": 24200 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928359399544034e-05, + "loss": 0.5622, + "step": 24201 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928353457230634e-05, + "loss": 1.3003, + "step": 24202 + }, + { + "epoch": 0.02, + "learning_rate": 4.992834751467114e-05, + "loss": 1.079, + "step": 24203 + }, + { + "epoch": 0.02, + "learning_rate": 4.992834157186557e-05, + "loss": 1.06, + "step": 24204 + }, + { + "epoch": 0.02, + "learning_rate": 4.992833562881392e-05, + "loss": 1.2988, + "step": 24205 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928329685516185e-05, + "loss": 1.2342, + "step": 24206 + }, + { + "epoch": 0.02, + "learning_rate": 4.992832374197237e-05, + "loss": 1.203, + "step": 24207 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928317798182464e-05, + "loss": 1.0136, + "step": 24208 + }, + { + "epoch": 0.02, + "learning_rate": 4.992831185414649e-05, + "loss": 1.1984, + "step": 24209 + }, + { + "epoch": 0.02, + "learning_rate": 4.992830590986442e-05, + "loss": 1.2357, + "step": 24210 + }, + { + "epoch": 0.02, + "learning_rate": 4.992829996533628e-05, + "loss": 1.238, + "step": 24211 + }, + { + "epoch": 0.02, + "learning_rate": 4.992829402056206e-05, + "loss": 1.4971, + "step": 24212 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928288075541754e-05, + "loss": 0.8502, + "step": 24213 + }, + { + "epoch": 0.02, + "learning_rate": 4.992828213027536e-05, + "loss": 0.9654, + "step": 24214 + }, + { + "epoch": 0.02, + "learning_rate": 4.992827618476289e-05, + "loss": 0.9637, + "step": 24215 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928270239004345e-05, + "loss": 1.0185, + "step": 24216 + }, + { + "epoch": 0.02, + "learning_rate": 4.992826429299971e-05, + "loss": 1.1469, + "step": 24217 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928258346748996e-05, + "loss": 1.0067, + "step": 24218 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928252400252195e-05, + "loss": 1.2665, + "step": 24219 + }, + { + "epoch": 0.02, + "learning_rate": 4.992824645350932e-05, + "loss": 1.1297, + "step": 24220 + }, + { + "epoch": 0.02, + "learning_rate": 4.992824050652036e-05, + "loss": 1.2041, + "step": 24221 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928234559285325e-05, + "loss": 1.1025, + "step": 24222 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928228611804204e-05, + "loss": 1.2413, + "step": 24223 + }, + { + "epoch": 0.02, + "learning_rate": 4.992822266407701e-05, + "loss": 0.9509, + "step": 24224 + }, + { + "epoch": 0.02, + "learning_rate": 4.992821671610373e-05, + "loss": 0.8809, + "step": 24225 + }, + { + "epoch": 0.02, + "learning_rate": 4.992821076788437e-05, + "loss": 1.0775, + "step": 24226 + }, + { + "epoch": 0.02, + "learning_rate": 4.992820481941892e-05, + "loss": 0.9786, + "step": 24227 + }, + { + "epoch": 0.02, + "learning_rate": 4.992819887070739e-05, + "loss": 1.08, + "step": 24228 + }, + { + "epoch": 0.02, + "learning_rate": 4.992819292174979e-05, + "loss": 1.2905, + "step": 24229 + }, + { + "epoch": 0.02, + "learning_rate": 4.99281869725461e-05, + "loss": 1.5677, + "step": 24230 + }, + { + "epoch": 0.02, + "learning_rate": 4.992818102309634e-05, + "loss": 1.0758, + "step": 24231 + }, + { + "epoch": 0.02, + "learning_rate": 4.992817507340049e-05, + "loss": 0.9225, + "step": 24232 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928169123458565e-05, + "loss": 1.0924, + "step": 24233 + }, + { + "epoch": 0.02, + "learning_rate": 4.992816317327056e-05, + "loss": 1.1451, + "step": 24234 + }, + { + "epoch": 0.02, + "learning_rate": 4.992815722283647e-05, + "loss": 0.9253, + "step": 24235 + }, + { + "epoch": 0.02, + "learning_rate": 4.99281512721563e-05, + "loss": 1.2732, + "step": 24236 + }, + { + "epoch": 0.02, + "learning_rate": 4.992814532123005e-05, + "loss": 0.9871, + "step": 24237 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928139370057724e-05, + "loss": 0.9317, + "step": 24238 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928133418639316e-05, + "loss": 1.1775, + "step": 24239 + }, + { + "epoch": 0.02, + "learning_rate": 4.992812746697483e-05, + "loss": 1.0052, + "step": 24240 + }, + { + "epoch": 0.02, + "learning_rate": 4.992812151506426e-05, + "loss": 0.9033, + "step": 24241 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928115562907605e-05, + "loss": 0.9505, + "step": 24242 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928109610504884e-05, + "loss": 1.3743, + "step": 24243 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928103657856076e-05, + "loss": 1.2599, + "step": 24244 + }, + { + "epoch": 0.02, + "learning_rate": 4.992809770496119e-05, + "loss": 1.2924, + "step": 24245 + }, + { + "epoch": 0.02, + "learning_rate": 4.992809175182022e-05, + "loss": 1.1646, + "step": 24246 + }, + { + "epoch": 0.02, + "learning_rate": 4.992808579843317e-05, + "loss": 1.143, + "step": 24247 + }, + { + "epoch": 0.02, + "learning_rate": 4.992807984480005e-05, + "loss": 1.3022, + "step": 24248 + }, + { + "epoch": 0.02, + "learning_rate": 4.992807389092084e-05, + "loss": 0.9846, + "step": 24249 + }, + { + "epoch": 0.02, + "learning_rate": 4.992806793679555e-05, + "loss": 0.7784, + "step": 24250 + }, + { + "epoch": 0.02, + "learning_rate": 4.992806198242419e-05, + "loss": 1.0458, + "step": 24251 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928056027806743e-05, + "loss": 0.9615, + "step": 24252 + }, + { + "epoch": 0.02, + "learning_rate": 4.992805007294322e-05, + "loss": 1.2047, + "step": 24253 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928044117833614e-05, + "loss": 1.1073, + "step": 24254 + }, + { + "epoch": 0.02, + "learning_rate": 4.992803816247794e-05, + "loss": 1.1555, + "step": 24255 + }, + { + "epoch": 0.02, + "learning_rate": 4.992803220687618e-05, + "loss": 0.9771, + "step": 24256 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928026251028336e-05, + "loss": 1.0688, + "step": 24257 + }, + { + "epoch": 0.02, + "learning_rate": 4.992802029493442e-05, + "loss": 1.1013, + "step": 24258 + }, + { + "epoch": 0.02, + "learning_rate": 4.992801433859442e-05, + "loss": 0.9458, + "step": 24259 + }, + { + "epoch": 0.02, + "learning_rate": 4.992800838200834e-05, + "loss": 1.3607, + "step": 24260 + }, + { + "epoch": 0.02, + "learning_rate": 4.992800242517619e-05, + "loss": 1.7415, + "step": 24261 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927996468097955e-05, + "loss": 1.5598, + "step": 24262 + }, + { + "epoch": 0.02, + "learning_rate": 4.992799051077364e-05, + "loss": 1.0536, + "step": 24263 + }, + { + "epoch": 0.02, + "learning_rate": 4.992798455320325e-05, + "loss": 1.0542, + "step": 24264 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927978595386784e-05, + "loss": 1.1939, + "step": 24265 + }, + { + "epoch": 0.02, + "learning_rate": 4.992797263732423e-05, + "loss": 0.9606, + "step": 24266 + }, + { + "epoch": 0.02, + "learning_rate": 4.99279666790156e-05, + "loss": 1.0534, + "step": 24267 + }, + { + "epoch": 0.02, + "learning_rate": 4.99279607204609e-05, + "loss": 1.1883, + "step": 24268 + }, + { + "epoch": 0.02, + "learning_rate": 4.992795476166011e-05, + "loss": 0.8724, + "step": 24269 + }, + { + "epoch": 0.02, + "learning_rate": 4.992794880261326e-05, + "loss": 1.1025, + "step": 24270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927942843320315e-05, + "loss": 1.0987, + "step": 24271 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927936883781294e-05, + "loss": 1.1441, + "step": 24272 + }, + { + "epoch": 0.02, + "learning_rate": 4.99279309239962e-05, + "loss": 1.1854, + "step": 24273 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927924963965025e-05, + "loss": 1.282, + "step": 24274 + }, + { + "epoch": 0.02, + "learning_rate": 4.992791900368777e-05, + "loss": 1.107, + "step": 24275 + }, + { + "epoch": 0.02, + "learning_rate": 4.992791304316444e-05, + "loss": 0.934, + "step": 24276 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927907082395034e-05, + "loss": 1.0459, + "step": 24277 + }, + { + "epoch": 0.02, + "learning_rate": 4.992790112137955e-05, + "loss": 1.2678, + "step": 24278 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927895160117985e-05, + "loss": 0.8407, + "step": 24279 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927889198610344e-05, + "loss": 0.9175, + "step": 24280 + }, + { + "epoch": 0.02, + "learning_rate": 4.992788323685662e-05, + "loss": 1.0273, + "step": 24281 + }, + { + "epoch": 0.02, + "learning_rate": 4.992787727485683e-05, + "loss": 1.0564, + "step": 24282 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927871312610954e-05, + "loss": 0.1496, + "step": 24283 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927865350119e-05, + "loss": 0.0947, + "step": 24284 + }, + { + "epoch": 0.02, + "learning_rate": 4.992785938738097e-05, + "loss": 0.0527, + "step": 24285 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927853424396864e-05, + "loss": 0.0438, + "step": 24286 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927847461166683e-05, + "loss": 0.0359, + "step": 24287 + }, + { + "epoch": 0.02, + "learning_rate": 4.992784149769042e-05, + "loss": 0.1177, + "step": 24288 + }, + { + "epoch": 0.02, + "learning_rate": 4.992783553396808e-05, + "loss": 0.3107, + "step": 24289 + }, + { + "epoch": 0.02, + "learning_rate": 4.992782956999967e-05, + "loss": 0.22, + "step": 24290 + }, + { + "epoch": 0.02, + "learning_rate": 4.992782360578517e-05, + "loss": 0.5223, + "step": 24291 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927817641324606e-05, + "loss": 1.0032, + "step": 24292 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927811676617965e-05, + "loss": 0.9656, + "step": 24293 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927805711665244e-05, + "loss": 1.2079, + "step": 24294 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927799746466444e-05, + "loss": 1.0992, + "step": 24295 + }, + { + "epoch": 0.02, + "learning_rate": 4.992779378102156e-05, + "loss": 1.7526, + "step": 24296 + }, + { + "epoch": 0.02, + "learning_rate": 4.992778781533061e-05, + "loss": 1.3603, + "step": 24297 + }, + { + "epoch": 0.02, + "learning_rate": 4.992778184939358e-05, + "loss": 1.2173, + "step": 24298 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927775883210475e-05, + "loss": 1.1452, + "step": 24299 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927769916781294e-05, + "loss": 1.1124, + "step": 24300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927763950106034e-05, + "loss": 0.5364, + "step": 24301 + }, + { + "epoch": 0.02, + "learning_rate": 4.99277579831847e-05, + "loss": 0.8146, + "step": 24302 + }, + { + "epoch": 0.02, + "learning_rate": 4.992775201601729e-05, + "loss": 1.1357, + "step": 24303 + }, + { + "epoch": 0.02, + "learning_rate": 4.992774604860379e-05, + "loss": 1.3369, + "step": 24304 + }, + { + "epoch": 0.02, + "learning_rate": 4.992774008094423e-05, + "loss": 0.9853, + "step": 24305 + }, + { + "epoch": 0.02, + "learning_rate": 4.992773411303859e-05, + "loss": 1.1325, + "step": 24306 + }, + { + "epoch": 0.02, + "learning_rate": 4.992772814488688e-05, + "loss": 1.0816, + "step": 24307 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927722176489086e-05, + "loss": 0.8392, + "step": 24308 + }, + { + "epoch": 0.02, + "learning_rate": 4.992771620784522e-05, + "loss": 1.082, + "step": 24309 + }, + { + "epoch": 0.02, + "learning_rate": 4.992771023895527e-05, + "loss": 1.1061, + "step": 24310 + }, + { + "epoch": 0.02, + "learning_rate": 4.992770426981925e-05, + "loss": 0.7759, + "step": 24311 + }, + { + "epoch": 0.02, + "learning_rate": 4.992769830043715e-05, + "loss": 1.1955, + "step": 24312 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927692330808986e-05, + "loss": 1.2285, + "step": 24313 + }, + { + "epoch": 0.02, + "learning_rate": 4.992768636093474e-05, + "loss": 1.1905, + "step": 24314 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927680390814414e-05, + "loss": 0.8488, + "step": 24315 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927674420448014e-05, + "loss": 1.0147, + "step": 24316 + }, + { + "epoch": 0.02, + "learning_rate": 4.992766844983554e-05, + "loss": 0.8666, + "step": 24317 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927662478976995e-05, + "loss": 1.3212, + "step": 24318 + }, + { + "epoch": 0.02, + "learning_rate": 4.992765650787237e-05, + "loss": 1.1797, + "step": 24319 + }, + { + "epoch": 0.02, + "learning_rate": 4.992765053652166e-05, + "loss": 1.1304, + "step": 24320 + }, + { + "epoch": 0.02, + "learning_rate": 4.992764456492489e-05, + "loss": 0.9464, + "step": 24321 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927638593082045e-05, + "loss": 0.7597, + "step": 24322 + }, + { + "epoch": 0.02, + "learning_rate": 4.992763262099311e-05, + "loss": 1.1407, + "step": 24323 + }, + { + "epoch": 0.02, + "learning_rate": 4.992762664865811e-05, + "loss": 1.2424, + "step": 24324 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927620676077034e-05, + "loss": 1.0521, + "step": 24325 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927614703249895e-05, + "loss": 1.1115, + "step": 24326 + }, + { + "epoch": 0.02, + "learning_rate": 4.992760873017667e-05, + "loss": 0.933, + "step": 24327 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927602756857364e-05, + "loss": 1.0648, + "step": 24328 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927596783291985e-05, + "loss": 0.9881, + "step": 24329 + }, + { + "epoch": 0.02, + "learning_rate": 4.992759080948054e-05, + "loss": 0.9343, + "step": 24330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927584835423015e-05, + "loss": 0.8709, + "step": 24331 + }, + { + "epoch": 0.02, + "learning_rate": 4.992757886111941e-05, + "loss": 1.1466, + "step": 24332 + }, + { + "epoch": 0.02, + "learning_rate": 4.992757288656974e-05, + "loss": 1.1672, + "step": 24333 + }, + { + "epoch": 0.02, + "learning_rate": 4.992756691177399e-05, + "loss": 1.2033, + "step": 24334 + }, + { + "epoch": 0.02, + "learning_rate": 4.992756093673217e-05, + "loss": 1.2005, + "step": 24335 + }, + { + "epoch": 0.02, + "learning_rate": 4.992755496144427e-05, + "loss": 0.9952, + "step": 24336 + }, + { + "epoch": 0.02, + "learning_rate": 4.99275489859103e-05, + "loss": 1.2088, + "step": 24337 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927543010130253e-05, + "loss": 1.6756, + "step": 24338 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927537034104136e-05, + "loss": 1.1861, + "step": 24339 + }, + { + "epoch": 0.02, + "learning_rate": 4.992753105783194e-05, + "loss": 1.1627, + "step": 24340 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927525081313666e-05, + "loss": 1.3196, + "step": 24341 + }, + { + "epoch": 0.02, + "learning_rate": 4.992751910454932e-05, + "loss": 1.2791, + "step": 24342 + }, + { + "epoch": 0.02, + "learning_rate": 4.992751312753891e-05, + "loss": 1.0221, + "step": 24343 + }, + { + "epoch": 0.02, + "learning_rate": 4.992750715028242e-05, + "loss": 1.1273, + "step": 24344 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927501172779856e-05, + "loss": 1.034, + "step": 24345 + }, + { + "epoch": 0.02, + "learning_rate": 4.992749519503122e-05, + "loss": 0.861, + "step": 24346 + }, + { + "epoch": 0.02, + "learning_rate": 4.992748921703651e-05, + "loss": 1.1572, + "step": 24347 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927483238795724e-05, + "loss": 0.9568, + "step": 24348 + }, + { + "epoch": 0.02, + "learning_rate": 4.992747726030886e-05, + "loss": 0.3494, + "step": 24349 + }, + { + "epoch": 0.02, + "learning_rate": 4.992747128157593e-05, + "loss": 0.7869, + "step": 24350 + }, + { + "epoch": 0.02, + "learning_rate": 4.992746530259693e-05, + "loss": 1.0582, + "step": 24351 + }, + { + "epoch": 0.02, + "learning_rate": 4.992745932337185e-05, + "loss": 0.7102, + "step": 24352 + }, + { + "epoch": 0.02, + "learning_rate": 4.992745334390069e-05, + "loss": 0.9041, + "step": 24353 + }, + { + "epoch": 0.02, + "learning_rate": 4.992744736418346e-05, + "loss": 0.5888, + "step": 24354 + }, + { + "epoch": 0.02, + "learning_rate": 4.992744138422017e-05, + "loss": 0.887, + "step": 24355 + }, + { + "epoch": 0.02, + "learning_rate": 4.99274354040108e-05, + "loss": 1.117, + "step": 24356 + }, + { + "epoch": 0.02, + "learning_rate": 4.992742942355535e-05, + "loss": 0.7432, + "step": 24357 + }, + { + "epoch": 0.02, + "learning_rate": 4.992742344285383e-05, + "loss": 1.2744, + "step": 24358 + }, + { + "epoch": 0.02, + "learning_rate": 4.992741746190624e-05, + "loss": 0.7411, + "step": 24359 + }, + { + "epoch": 0.02, + "learning_rate": 4.992741148071258e-05, + "loss": 1.1617, + "step": 24360 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927405499272837e-05, + "loss": 0.847, + "step": 24361 + }, + { + "epoch": 0.02, + "learning_rate": 4.992739951758703e-05, + "loss": 1.1165, + "step": 24362 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927393535655145e-05, + "loss": 1.0725, + "step": 24363 + }, + { + "epoch": 0.02, + "learning_rate": 4.992738755347719e-05, + "loss": 1.1321, + "step": 24364 + }, + { + "epoch": 0.02, + "learning_rate": 4.992738157105316e-05, + "loss": 1.4605, + "step": 24365 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927375588383065e-05, + "loss": 1.2717, + "step": 24366 + }, + { + "epoch": 0.02, + "learning_rate": 4.992736960546689e-05, + "loss": 1.2921, + "step": 24367 + }, + { + "epoch": 0.02, + "learning_rate": 4.992736362230464e-05, + "loss": 1.4048, + "step": 24368 + }, + { + "epoch": 0.02, + "learning_rate": 4.992735763889632e-05, + "loss": 1.2178, + "step": 24369 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927351655241925e-05, + "loss": 1.259, + "step": 24370 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927345671341463e-05, + "loss": 0.8468, + "step": 24371 + }, + { + "epoch": 0.02, + "learning_rate": 4.992733968719493e-05, + "loss": 0.4921, + "step": 24372 + }, + { + "epoch": 0.02, + "learning_rate": 4.992733370280233e-05, + "loss": 1.2083, + "step": 24373 + }, + { + "epoch": 0.02, + "learning_rate": 4.992732771816364e-05, + "loss": 0.8192, + "step": 24374 + }, + { + "epoch": 0.02, + "learning_rate": 4.992732173327889e-05, + "loss": 1.2897, + "step": 24375 + }, + { + "epoch": 0.02, + "learning_rate": 4.992731574814807e-05, + "loss": 1.1029, + "step": 24376 + }, + { + "epoch": 0.02, + "learning_rate": 4.992730976277117e-05, + "loss": 1.0785, + "step": 24377 + }, + { + "epoch": 0.02, + "learning_rate": 4.99273037771482e-05, + "loss": 1.0746, + "step": 24378 + }, + { + "epoch": 0.02, + "learning_rate": 4.992729779127916e-05, + "loss": 0.9224, + "step": 24379 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927291805164055e-05, + "loss": 1.0075, + "step": 24380 + }, + { + "epoch": 0.02, + "learning_rate": 4.992728581880287e-05, + "loss": 0.7694, + "step": 24381 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927279832195615e-05, + "loss": 1.1058, + "step": 24382 + }, + { + "epoch": 0.02, + "learning_rate": 4.992727384534229e-05, + "loss": 1.1263, + "step": 24383 + }, + { + "epoch": 0.02, + "learning_rate": 4.992726785824289e-05, + "loss": 0.9603, + "step": 24384 + }, + { + "epoch": 0.02, + "learning_rate": 4.992726187089742e-05, + "loss": 1.2155, + "step": 24385 + }, + { + "epoch": 0.02, + "learning_rate": 4.992725588330588e-05, + "loss": 1.3387, + "step": 24386 + }, + { + "epoch": 0.02, + "learning_rate": 4.992724989546826e-05, + "loss": 1.3723, + "step": 24387 + }, + { + "epoch": 0.02, + "learning_rate": 4.992724390738458e-05, + "loss": 1.0002, + "step": 24388 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927237919054824e-05, + "loss": 1.2258, + "step": 24389 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927231930479005e-05, + "loss": 0.7858, + "step": 24390 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927225941657106e-05, + "loss": 1.2825, + "step": 24391 + }, + { + "epoch": 0.02, + "learning_rate": 4.992721995258914e-05, + "loss": 1.1404, + "step": 24392 + }, + { + "epoch": 0.02, + "learning_rate": 4.99272139632751e-05, + "loss": 0.9279, + "step": 24393 + }, + { + "epoch": 0.02, + "learning_rate": 4.992720797371498e-05, + "loss": 1.1167, + "step": 24394 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927201983908804e-05, + "loss": 1.3172, + "step": 24395 + }, + { + "epoch": 0.02, + "learning_rate": 4.992719599385656e-05, + "loss": 0.9265, + "step": 24396 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927190003558234e-05, + "loss": 1.3116, + "step": 24397 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927184013013836e-05, + "loss": 1.1274, + "step": 24398 + }, + { + "epoch": 0.02, + "learning_rate": 4.992717802222338e-05, + "loss": 1.1781, + "step": 24399 + }, + { + "epoch": 0.02, + "learning_rate": 4.992717203118684e-05, + "loss": 0.9653, + "step": 24400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927166039904236e-05, + "loss": 1.1877, + "step": 24401 + }, + { + "epoch": 0.02, + "learning_rate": 4.992716004837556e-05, + "loss": 1.15, + "step": 24402 + }, + { + "epoch": 0.02, + "learning_rate": 4.992715405660081e-05, + "loss": 1.2119, + "step": 24403 + }, + { + "epoch": 0.02, + "learning_rate": 4.992714806458e-05, + "loss": 1.0378, + "step": 24404 + }, + { + "epoch": 0.02, + "learning_rate": 4.992714207231311e-05, + "loss": 1.3089, + "step": 24405 + }, + { + "epoch": 0.02, + "learning_rate": 4.992713607980015e-05, + "loss": 1.4753, + "step": 24406 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927130087041126e-05, + "loss": 0.8042, + "step": 24407 + }, + { + "epoch": 0.02, + "learning_rate": 4.992712409403602e-05, + "loss": 0.8297, + "step": 24408 + }, + { + "epoch": 0.02, + "learning_rate": 4.992711810078487e-05, + "loss": 1.0259, + "step": 24409 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927112107287624e-05, + "loss": 1.1014, + "step": 24410 + }, + { + "epoch": 0.02, + "learning_rate": 4.992710611354432e-05, + "loss": 0.9907, + "step": 24411 + }, + { + "epoch": 0.02, + "learning_rate": 4.992710011955494e-05, + "loss": 1.1886, + "step": 24412 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927094125319496e-05, + "loss": 0.9516, + "step": 24413 + }, + { + "epoch": 0.02, + "learning_rate": 4.992708813083797e-05, + "loss": 0.9904, + "step": 24414 + }, + { + "epoch": 0.02, + "learning_rate": 4.992708213611039e-05, + "loss": 1.2044, + "step": 24415 + }, + { + "epoch": 0.02, + "learning_rate": 4.992707614113673e-05, + "loss": 0.9392, + "step": 24416 + }, + { + "epoch": 0.02, + "learning_rate": 4.992707014591701e-05, + "loss": 1.1496, + "step": 24417 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927064150451205e-05, + "loss": 1.4856, + "step": 24418 + }, + { + "epoch": 0.02, + "learning_rate": 4.992705815473935e-05, + "loss": 0.8448, + "step": 24419 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927052158781415e-05, + "loss": 0.8749, + "step": 24420 + }, + { + "epoch": 0.02, + "learning_rate": 4.992704616257741e-05, + "loss": 1.014, + "step": 24421 + }, + { + "epoch": 0.02, + "learning_rate": 4.992704016612734e-05, + "loss": 1.2585, + "step": 24422 + }, + { + "epoch": 0.02, + "learning_rate": 4.99270341694312e-05, + "loss": 1.0814, + "step": 24423 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927028172488984e-05, + "loss": 1.3848, + "step": 24424 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927022175300704e-05, + "loss": 0.945, + "step": 24425 + }, + { + "epoch": 0.02, + "learning_rate": 4.992701617786636e-05, + "loss": 0.6783, + "step": 24426 + }, + { + "epoch": 0.02, + "learning_rate": 4.9927010180185936e-05, + "loss": 0.7239, + "step": 24427 + }, + { + "epoch": 0.02, + "learning_rate": 4.992700418225945e-05, + "loss": 1.0416, + "step": 24428 + }, + { + "epoch": 0.02, + "learning_rate": 4.992699818408689e-05, + "loss": 0.9032, + "step": 24429 + }, + { + "epoch": 0.02, + "learning_rate": 4.992699218566826e-05, + "loss": 1.2013, + "step": 24430 + }, + { + "epoch": 0.02, + "learning_rate": 4.992698618700358e-05, + "loss": 0.8852, + "step": 24431 + }, + { + "epoch": 0.02, + "learning_rate": 4.992698018809281e-05, + "loss": 1.1553, + "step": 24432 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926974188935985e-05, + "loss": 1.0157, + "step": 24433 + }, + { + "epoch": 0.02, + "learning_rate": 4.992696818953308e-05, + "loss": 0.914, + "step": 24434 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926962189884107e-05, + "loss": 1.0104, + "step": 24435 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926956189989075e-05, + "loss": 1.0219, + "step": 24436 + }, + { + "epoch": 0.02, + "learning_rate": 4.992695018984797e-05, + "loss": 2.6764, + "step": 24437 + }, + { + "epoch": 0.02, + "learning_rate": 4.99269441894608e-05, + "loss": 1.0226, + "step": 24438 + }, + { + "epoch": 0.02, + "learning_rate": 4.992693818882755e-05, + "loss": 1.2257, + "step": 24439 + }, + { + "epoch": 0.02, + "learning_rate": 4.992693218794824e-05, + "loss": 1.2859, + "step": 24440 + }, + { + "epoch": 0.02, + "learning_rate": 4.992692618682286e-05, + "loss": 1.3383, + "step": 24441 + }, + { + "epoch": 0.02, + "learning_rate": 4.992692018545142e-05, + "loss": 1.6692, + "step": 24442 + }, + { + "epoch": 0.02, + "learning_rate": 4.992691418383391e-05, + "loss": 1.1903, + "step": 24443 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926908181970324e-05, + "loss": 0.7703, + "step": 24444 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926902179860674e-05, + "loss": 1.1083, + "step": 24445 + }, + { + "epoch": 0.02, + "learning_rate": 4.992689617750496e-05, + "loss": 1.1153, + "step": 24446 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926890174903166e-05, + "loss": 1.2537, + "step": 24447 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926884172055317e-05, + "loss": 0.9862, + "step": 24448 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926878168961393e-05, + "loss": 1.203, + "step": 24449 + }, + { + "epoch": 0.02, + "learning_rate": 4.992687216562141e-05, + "loss": 0.42, + "step": 24450 + }, + { + "epoch": 0.02, + "learning_rate": 4.992686616203535e-05, + "loss": 0.9616, + "step": 24451 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926860158203225e-05, + "loss": 1.6385, + "step": 24452 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926854154125036e-05, + "loss": 1.2251, + "step": 24453 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926848149800774e-05, + "loss": 0.8885, + "step": 24454 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926842145230446e-05, + "loss": 0.9874, + "step": 24455 + }, + { + "epoch": 0.02, + "learning_rate": 4.992683614041405e-05, + "loss": 1.2189, + "step": 24456 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926830135351596e-05, + "loss": 0.8753, + "step": 24457 + }, + { + "epoch": 0.02, + "learning_rate": 4.992682413004307e-05, + "loss": 0.9704, + "step": 24458 + }, + { + "epoch": 0.02, + "learning_rate": 4.992681812448847e-05, + "loss": 1.2693, + "step": 24459 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926812118687806e-05, + "loss": 0.6925, + "step": 24460 + }, + { + "epoch": 0.02, + "learning_rate": 4.992680611264108e-05, + "loss": 0.965, + "step": 24461 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926800106348284e-05, + "loss": 0.7978, + "step": 24462 + }, + { + "epoch": 0.02, + "learning_rate": 4.992679409980942e-05, + "loss": 1.3209, + "step": 24463 + }, + { + "epoch": 0.02, + "learning_rate": 4.992678809302449e-05, + "loss": 1.4389, + "step": 24464 + }, + { + "epoch": 0.02, + "learning_rate": 4.99267820859935e-05, + "loss": 1.2622, + "step": 24465 + }, + { + "epoch": 0.02, + "learning_rate": 4.992677607871643e-05, + "loss": 1.139, + "step": 24466 + }, + { + "epoch": 0.02, + "learning_rate": 4.99267700711933e-05, + "loss": 0.3752, + "step": 24467 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926764063424105e-05, + "loss": 0.3013, + "step": 24468 + }, + { + "epoch": 0.02, + "learning_rate": 4.992675805540884e-05, + "loss": 1.2789, + "step": 24469 + }, + { + "epoch": 0.02, + "learning_rate": 4.992675204714751e-05, + "loss": 0.8498, + "step": 24470 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926746038640115e-05, + "loss": 0.9297, + "step": 24471 + }, + { + "epoch": 0.02, + "learning_rate": 4.992674002988666e-05, + "loss": 1.2087, + "step": 24472 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926734020887125e-05, + "loss": 0.9166, + "step": 24473 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926728011641534e-05, + "loss": 1.0591, + "step": 24474 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926722002149876e-05, + "loss": 1.1709, + "step": 24475 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926715992412145e-05, + "loss": 1.163, + "step": 24476 + }, + { + "epoch": 0.02, + "learning_rate": 4.992670998242835e-05, + "loss": 1.0705, + "step": 24477 + }, + { + "epoch": 0.02, + "learning_rate": 4.992670397219849e-05, + "loss": 1.2218, + "step": 24478 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926697961722566e-05, + "loss": 1.2339, + "step": 24479 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926691951000576e-05, + "loss": 1.131, + "step": 24480 + }, + { + "epoch": 0.02, + "learning_rate": 4.992668594003252e-05, + "loss": 1.1568, + "step": 24481 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926679928818397e-05, + "loss": 1.0256, + "step": 24482 + }, + { + "epoch": 0.02, + "learning_rate": 4.992667391735821e-05, + "loss": 0.9029, + "step": 24483 + }, + { + "epoch": 0.02, + "learning_rate": 4.992666790565196e-05, + "loss": 1.1322, + "step": 24484 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926661893699635e-05, + "loss": 1.4244, + "step": 24485 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926655881501247e-05, + "loss": 1.3805, + "step": 24486 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926649869056805e-05, + "loss": 1.053, + "step": 24487 + }, + { + "epoch": 0.02, + "learning_rate": 4.992664385636628e-05, + "loss": 1.303, + "step": 24488 + }, + { + "epoch": 0.02, + "learning_rate": 4.99266378434297e-05, + "loss": 0.9932, + "step": 24489 + }, + { + "epoch": 0.02, + "learning_rate": 4.992663183024706e-05, + "loss": 1.2436, + "step": 24490 + }, + { + "epoch": 0.02, + "learning_rate": 4.992662581681835e-05, + "loss": 0.926, + "step": 24491 + }, + { + "epoch": 0.02, + "learning_rate": 4.992661980314357e-05, + "loss": 1.2446, + "step": 24492 + }, + { + "epoch": 0.02, + "learning_rate": 4.992661378922273e-05, + "loss": 0.9082, + "step": 24493 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926607775055824e-05, + "loss": 1.2247, + "step": 24494 + }, + { + "epoch": 0.02, + "learning_rate": 4.992660176064286e-05, + "loss": 1.2939, + "step": 24495 + }, + { + "epoch": 0.02, + "learning_rate": 4.992659574598382e-05, + "loss": 1.1958, + "step": 24496 + }, + { + "epoch": 0.02, + "learning_rate": 4.992658973107872e-05, + "loss": 1.2497, + "step": 24497 + }, + { + "epoch": 0.02, + "learning_rate": 4.992658371592755e-05, + "loss": 1.1057, + "step": 24498 + }, + { + "epoch": 0.02, + "learning_rate": 4.992657770053032e-05, + "loss": 1.0389, + "step": 24499 + }, + { + "epoch": 0.02, + "learning_rate": 4.992657168488703e-05, + "loss": 1.1692, + "step": 24500 + }, + { + "epoch": 0.02, + "eval_loss": 1.0119251012802124, + "eval_runtime": 83.4411, + "eval_samples_per_second": 16.599, + "eval_steps_per_second": 4.159, + "step": 24500 + }, + { + "epoch": 0.02, + "learning_rate": 4.992656566899766e-05, + "loss": 1.1565, + "step": 24501 + }, + { + "epoch": 0.02, + "learning_rate": 4.992655965286224e-05, + "loss": 1.0502, + "step": 24502 + }, + { + "epoch": 0.02, + "learning_rate": 4.992655363648075e-05, + "loss": 0.9798, + "step": 24503 + }, + { + "epoch": 0.02, + "learning_rate": 4.99265476198532e-05, + "loss": 0.919, + "step": 24504 + }, + { + "epoch": 0.02, + "learning_rate": 4.992654160297958e-05, + "loss": 1.1145, + "step": 24505 + }, + { + "epoch": 0.02, + "learning_rate": 4.99265355858599e-05, + "loss": 1.0337, + "step": 24506 + }, + { + "epoch": 0.02, + "learning_rate": 4.992652956849415e-05, + "loss": 1.0843, + "step": 24507 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926523550882345e-05, + "loss": 0.9274, + "step": 24508 + }, + { + "epoch": 0.02, + "learning_rate": 4.992651753302446e-05, + "loss": 1.1041, + "step": 24509 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926511514920525e-05, + "loss": 1.0121, + "step": 24510 + }, + { + "epoch": 0.02, + "learning_rate": 4.992650549657052e-05, + "loss": 0.9965, + "step": 24511 + }, + { + "epoch": 0.02, + "learning_rate": 4.992649947797445e-05, + "loss": 0.8199, + "step": 24512 + }, + { + "epoch": 0.02, + "learning_rate": 4.992649345913233e-05, + "loss": 1.2358, + "step": 24513 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926487440044134e-05, + "loss": 1.5196, + "step": 24514 + }, + { + "epoch": 0.02, + "learning_rate": 4.992648142070987e-05, + "loss": 0.8739, + "step": 24515 + }, + { + "epoch": 0.02, + "learning_rate": 4.992647540112955e-05, + "loss": 1.2081, + "step": 24516 + }, + { + "epoch": 0.02, + "learning_rate": 4.992646938130316e-05, + "loss": 1.113, + "step": 24517 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926463361230715e-05, + "loss": 1.2594, + "step": 24518 + }, + { + "epoch": 0.02, + "learning_rate": 4.99264573409122e-05, + "loss": 1.1732, + "step": 24519 + }, + { + "epoch": 0.02, + "learning_rate": 4.992645132034763e-05, + "loss": 1.0771, + "step": 24520 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926445299536986e-05, + "loss": 1.1254, + "step": 24521 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926439278480286e-05, + "loss": 0.9157, + "step": 24522 + }, + { + "epoch": 0.02, + "learning_rate": 4.992643325717752e-05, + "loss": 0.945, + "step": 24523 + }, + { + "epoch": 0.02, + "learning_rate": 4.992642723562869e-05, + "loss": 1.0745, + "step": 24524 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926421213833794e-05, + "loss": 0.8978, + "step": 24525 + }, + { + "epoch": 0.02, + "learning_rate": 4.992641519179284e-05, + "loss": 1.1127, + "step": 24526 + }, + { + "epoch": 0.02, + "learning_rate": 4.992640916950582e-05, + "loss": 1.1984, + "step": 24527 + }, + { + "epoch": 0.02, + "learning_rate": 4.992640314697274e-05, + "loss": 0.9217, + "step": 24528 + }, + { + "epoch": 0.02, + "learning_rate": 4.992639712419359e-05, + "loss": 1.1399, + "step": 24529 + }, + { + "epoch": 0.02, + "learning_rate": 4.992639110116839e-05, + "loss": 0.7392, + "step": 24530 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926385077897117e-05, + "loss": 0.7058, + "step": 24531 + }, + { + "epoch": 0.02, + "learning_rate": 4.992637905437978e-05, + "loss": 0.6144, + "step": 24532 + }, + { + "epoch": 0.02, + "learning_rate": 4.992637303061639e-05, + "loss": 0.5799, + "step": 24533 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926367006606925e-05, + "loss": 0.576, + "step": 24534 + }, + { + "epoch": 0.02, + "learning_rate": 4.992636098235141e-05, + "loss": 0.6727, + "step": 24535 + }, + { + "epoch": 0.02, + "learning_rate": 4.992635495784982e-05, + "loss": 0.6656, + "step": 24536 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926348933102175e-05, + "loss": 0.6454, + "step": 24537 + }, + { + "epoch": 0.02, + "learning_rate": 4.992634290810847e-05, + "loss": 0.8548, + "step": 24538 + }, + { + "epoch": 0.02, + "learning_rate": 4.99263368828687e-05, + "loss": 0.9307, + "step": 24539 + }, + { + "epoch": 0.02, + "learning_rate": 4.992633085738286e-05, + "loss": 1.1607, + "step": 24540 + }, + { + "epoch": 0.02, + "learning_rate": 4.992632483165097e-05, + "loss": 1.2171, + "step": 24541 + }, + { + "epoch": 0.02, + "learning_rate": 4.992631880567301e-05, + "loss": 1.1779, + "step": 24542 + }, + { + "epoch": 0.02, + "learning_rate": 4.992631277944899e-05, + "loss": 1.2085, + "step": 24543 + }, + { + "epoch": 0.02, + "learning_rate": 4.992630675297891e-05, + "loss": 1.1175, + "step": 24544 + }, + { + "epoch": 0.02, + "learning_rate": 4.992630072626276e-05, + "loss": 0.9825, + "step": 24545 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926294699300556e-05, + "loss": 1.4949, + "step": 24546 + }, + { + "epoch": 0.02, + "learning_rate": 4.992628867209229e-05, + "loss": 1.4488, + "step": 24547 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926282644637956e-05, + "loss": 1.0446, + "step": 24548 + }, + { + "epoch": 0.02, + "learning_rate": 4.992627661693757e-05, + "loss": 1.0881, + "step": 24549 + }, + { + "epoch": 0.02, + "learning_rate": 4.992627058899112e-05, + "loss": 1.4002, + "step": 24550 + }, + { + "epoch": 0.02, + "learning_rate": 4.99262645607986e-05, + "loss": 1.203, + "step": 24551 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926258532360026e-05, + "loss": 0.9141, + "step": 24552 + }, + { + "epoch": 0.02, + "learning_rate": 4.992625250367539e-05, + "loss": 1.1793, + "step": 24553 + }, + { + "epoch": 0.02, + "learning_rate": 4.992624647474469e-05, + "loss": 1.056, + "step": 24554 + }, + { + "epoch": 0.02, + "learning_rate": 4.992624044556793e-05, + "loss": 1.0449, + "step": 24555 + }, + { + "epoch": 0.02, + "learning_rate": 4.99262344161451e-05, + "loss": 1.2581, + "step": 24556 + }, + { + "epoch": 0.02, + "learning_rate": 4.992622838647622e-05, + "loss": 1.0222, + "step": 24557 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926222356561274e-05, + "loss": 1.0478, + "step": 24558 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926216326400264e-05, + "loss": 1.0678, + "step": 24559 + }, + { + "epoch": 0.02, + "learning_rate": 4.99262102959932e-05, + "loss": 0.9311, + "step": 24560 + }, + { + "epoch": 0.02, + "learning_rate": 4.992620426534007e-05, + "loss": 1.2125, + "step": 24561 + }, + { + "epoch": 0.02, + "learning_rate": 4.992619823444088e-05, + "loss": 0.8974, + "step": 24562 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926192203295634e-05, + "loss": 1.0543, + "step": 24563 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926186171904325e-05, + "loss": 1.0005, + "step": 24564 + }, + { + "epoch": 0.02, + "learning_rate": 4.992618014026696e-05, + "loss": 1.0912, + "step": 24565 + }, + { + "epoch": 0.02, + "learning_rate": 4.992617410838352e-05, + "loss": 1.1329, + "step": 24566 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926168076254035e-05, + "loss": 1.211, + "step": 24567 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926162043878474e-05, + "loss": 1.0138, + "step": 24568 + }, + { + "epoch": 0.02, + "learning_rate": 4.992615601125686e-05, + "loss": 1.0469, + "step": 24569 + }, + { + "epoch": 0.02, + "learning_rate": 4.992614997838919e-05, + "loss": 0.9207, + "step": 24570 + }, + { + "epoch": 0.02, + "learning_rate": 4.992614394527545e-05, + "loss": 1.1993, + "step": 24571 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926137911915655e-05, + "loss": 1.0654, + "step": 24572 + }, + { + "epoch": 0.02, + "learning_rate": 4.99261318783098e-05, + "loss": 1.504, + "step": 24573 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926125844457884e-05, + "loss": 1.1942, + "step": 24574 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926119810359905e-05, + "loss": 1.1104, + "step": 24575 + }, + { + "epoch": 0.02, + "learning_rate": 4.992611377601587e-05, + "loss": 0.8931, + "step": 24576 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926107741425776e-05, + "loss": 0.9287, + "step": 24577 + }, + { + "epoch": 0.02, + "learning_rate": 4.992610170658962e-05, + "loss": 1.2326, + "step": 24578 + }, + { + "epoch": 0.02, + "learning_rate": 4.99260956715074e-05, + "loss": 1.0249, + "step": 24579 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926089636179124e-05, + "loss": 0.7135, + "step": 24580 + }, + { + "epoch": 0.02, + "learning_rate": 4.992608360060479e-05, + "loss": 0.621, + "step": 24581 + }, + { + "epoch": 0.02, + "learning_rate": 4.992607756478439e-05, + "loss": 0.8204, + "step": 24582 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926071528717935e-05, + "loss": 1.1137, + "step": 24583 + }, + { + "epoch": 0.02, + "learning_rate": 4.992606549240542e-05, + "loss": 0.9056, + "step": 24584 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926059455846844e-05, + "loss": 1.1476, + "step": 24585 + }, + { + "epoch": 0.02, + "learning_rate": 4.992605341904221e-05, + "loss": 0.3556, + "step": 24586 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926047381991515e-05, + "loss": 0.8778, + "step": 24587 + }, + { + "epoch": 0.02, + "learning_rate": 4.992604134469476e-05, + "loss": 0.8891, + "step": 24588 + }, + { + "epoch": 0.02, + "learning_rate": 4.992603530715195e-05, + "loss": 1.1661, + "step": 24589 + }, + { + "epoch": 0.02, + "learning_rate": 4.992602926936308e-05, + "loss": 1.3675, + "step": 24590 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926023231328146e-05, + "loss": 1.1644, + "step": 24591 + }, + { + "epoch": 0.02, + "learning_rate": 4.992601719304716e-05, + "loss": 1.0584, + "step": 24592 + }, + { + "epoch": 0.02, + "learning_rate": 4.99260111545201e-05, + "loss": 1.1671, + "step": 24593 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926005115747e-05, + "loss": 0.9462, + "step": 24594 + }, + { + "epoch": 0.02, + "learning_rate": 4.992599907672783e-05, + "loss": 0.9978, + "step": 24595 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925993037462604e-05, + "loss": 1.2193, + "step": 24596 + }, + { + "epoch": 0.02, + "learning_rate": 4.992598699795132e-05, + "loss": 0.6275, + "step": 24597 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925980958193975e-05, + "loss": 1.0742, + "step": 24598 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925974918190564e-05, + "loss": 1.2055, + "step": 24599 + }, + { + "epoch": 0.02, + "learning_rate": 4.992596887794111e-05, + "loss": 0.8926, + "step": 24600 + }, + { + "epoch": 0.02, + "learning_rate": 4.992596283744558e-05, + "loss": 1.2732, + "step": 24601 + }, + { + "epoch": 0.02, + "learning_rate": 4.992595679670401e-05, + "loss": 1.0264, + "step": 24602 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925950755716364e-05, + "loss": 0.6964, + "step": 24603 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925944714482675e-05, + "loss": 1.2781, + "step": 24604 + }, + { + "epoch": 0.02, + "learning_rate": 4.992593867300291e-05, + "loss": 1.0458, + "step": 24605 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925932631277105e-05, + "loss": 1.132, + "step": 24606 + }, + { + "epoch": 0.02, + "learning_rate": 4.992592658930523e-05, + "loss": 1.0652, + "step": 24607 + }, + { + "epoch": 0.02, + "learning_rate": 4.99259205470873e-05, + "loss": 0.8006, + "step": 24608 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925914504623315e-05, + "loss": 0.9243, + "step": 24609 + }, + { + "epoch": 0.02, + "learning_rate": 4.992590846191327e-05, + "loss": 0.9712, + "step": 24610 + }, + { + "epoch": 0.02, + "learning_rate": 4.992590241895716e-05, + "loss": 0.9978, + "step": 24611 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925896375755e-05, + "loss": 0.8557, + "step": 24612 + }, + { + "epoch": 0.02, + "learning_rate": 4.992589033230678e-05, + "loss": 1.0034, + "step": 24613 + }, + { + "epoch": 0.02, + "learning_rate": 4.99258842886125e-05, + "loss": 0.884, + "step": 24614 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925878244672164e-05, + "loss": 0.839, + "step": 24615 + }, + { + "epoch": 0.02, + "learning_rate": 4.992587220048577e-05, + "loss": 1.0983, + "step": 24616 + }, + { + "epoch": 0.02, + "learning_rate": 4.992586615605333e-05, + "loss": 1.2438, + "step": 24617 + }, + { + "epoch": 0.02, + "learning_rate": 4.992586011137481e-05, + "loss": 0.9623, + "step": 24618 + }, + { + "epoch": 0.02, + "learning_rate": 4.992585406645025e-05, + "loss": 0.9664, + "step": 24619 + }, + { + "epoch": 0.02, + "learning_rate": 4.992584802127962e-05, + "loss": 1.1733, + "step": 24620 + }, + { + "epoch": 0.02, + "learning_rate": 4.992584197586294e-05, + "loss": 1.3532, + "step": 24621 + }, + { + "epoch": 0.02, + "learning_rate": 4.99258359302002e-05, + "loss": 0.9588, + "step": 24622 + }, + { + "epoch": 0.02, + "learning_rate": 4.992582988429141e-05, + "loss": 1.8154, + "step": 24623 + }, + { + "epoch": 0.02, + "learning_rate": 4.992582383813655e-05, + "loss": 0.7109, + "step": 24624 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925817791735644e-05, + "loss": 0.761, + "step": 24625 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925811745088675e-05, + "loss": 0.9894, + "step": 24626 + }, + { + "epoch": 0.02, + "learning_rate": 4.992580569819565e-05, + "loss": 0.8915, + "step": 24627 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925799651056574e-05, + "loss": 0.9638, + "step": 24628 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925793603671426e-05, + "loss": 0.9968, + "step": 24629 + }, + { + "epoch": 0.02, + "learning_rate": 4.992578755604023e-05, + "loss": 0.9739, + "step": 24630 + }, + { + "epoch": 0.02, + "learning_rate": 4.992578150816298e-05, + "loss": 1.0269, + "step": 24631 + }, + { + "epoch": 0.02, + "learning_rate": 4.992577546003967e-05, + "loss": 1.0936, + "step": 24632 + }, + { + "epoch": 0.02, + "learning_rate": 4.99257694116703e-05, + "loss": 1.0479, + "step": 24633 + }, + { + "epoch": 0.02, + "learning_rate": 4.992576336305488e-05, + "loss": 1.2263, + "step": 24634 + }, + { + "epoch": 0.02, + "learning_rate": 4.99257573141934e-05, + "loss": 0.9192, + "step": 24635 + }, + { + "epoch": 0.02, + "learning_rate": 4.992575126508586e-05, + "loss": 1.0719, + "step": 24636 + }, + { + "epoch": 0.02, + "learning_rate": 4.992574521573227e-05, + "loss": 0.8153, + "step": 24637 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925739166132626e-05, + "loss": 1.0202, + "step": 24638 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925733116286916e-05, + "loss": 1.0385, + "step": 24639 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925727066195146e-05, + "loss": 1.1777, + "step": 24640 + }, + { + "epoch": 0.02, + "learning_rate": 4.992572101585734e-05, + "loss": 1.1301, + "step": 24641 + }, + { + "epoch": 0.02, + "learning_rate": 4.992571496527346e-05, + "loss": 0.521, + "step": 24642 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925708914443526e-05, + "loss": 0.3122, + "step": 24643 + }, + { + "epoch": 0.02, + "learning_rate": 4.992570286336754e-05, + "loss": 0.4167, + "step": 24644 + }, + { + "epoch": 0.02, + "learning_rate": 4.99256968120455e-05, + "loss": 0.9854, + "step": 24645 + }, + { + "epoch": 0.02, + "learning_rate": 4.99256907604774e-05, + "loss": 1.1117, + "step": 24646 + }, + { + "epoch": 0.02, + "learning_rate": 4.992568470866324e-05, + "loss": 0.9005, + "step": 24647 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925678656603036e-05, + "loss": 0.6621, + "step": 24648 + }, + { + "epoch": 0.02, + "learning_rate": 4.992567260429677e-05, + "loss": 1.0432, + "step": 24649 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925666551744444e-05, + "loss": 1.05, + "step": 24650 + }, + { + "epoch": 0.02, + "learning_rate": 4.992566049894607e-05, + "loss": 0.9411, + "step": 24651 + }, + { + "epoch": 0.02, + "learning_rate": 4.992565444590164e-05, + "loss": 1.0306, + "step": 24652 + }, + { + "epoch": 0.02, + "learning_rate": 4.992564839261115e-05, + "loss": 1.1315, + "step": 24653 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925642339074604e-05, + "loss": 1.261, + "step": 24654 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925636285292e-05, + "loss": 1.1779, + "step": 24655 + }, + { + "epoch": 0.02, + "learning_rate": 4.992563023126334e-05, + "loss": 0.8318, + "step": 24656 + }, + { + "epoch": 0.02, + "learning_rate": 4.992562417698864e-05, + "loss": 1.0357, + "step": 24657 + }, + { + "epoch": 0.02, + "learning_rate": 4.992561812246787e-05, + "loss": 1.0619, + "step": 24658 + }, + { + "epoch": 0.02, + "learning_rate": 4.992561206770105e-05, + "loss": 1.4076, + "step": 24659 + }, + { + "epoch": 0.02, + "learning_rate": 4.992560601268817e-05, + "loss": 0.951, + "step": 24660 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925599957429244e-05, + "loss": 1.1716, + "step": 24661 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925593901924256e-05, + "loss": 1.1523, + "step": 24662 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925587846173215e-05, + "loss": 0.7192, + "step": 24663 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925581790176115e-05, + "loss": 0.9731, + "step": 24664 + }, + { + "epoch": 0.02, + "learning_rate": 4.992557573393297e-05, + "loss": 1.1622, + "step": 24665 + }, + { + "epoch": 0.02, + "learning_rate": 4.992556967744376e-05, + "loss": 0.9156, + "step": 24666 + }, + { + "epoch": 0.02, + "learning_rate": 4.99255636207085e-05, + "loss": 0.807, + "step": 24667 + }, + { + "epoch": 0.02, + "learning_rate": 4.992555756372719e-05, + "loss": 0.9207, + "step": 24668 + }, + { + "epoch": 0.02, + "learning_rate": 4.992555150649981e-05, + "loss": 0.3062, + "step": 24669 + }, + { + "epoch": 0.02, + "learning_rate": 4.992554544902639e-05, + "loss": 0.2771, + "step": 24670 + }, + { + "epoch": 0.02, + "learning_rate": 4.992553939130691e-05, + "loss": 0.3224, + "step": 24671 + }, + { + "epoch": 0.02, + "learning_rate": 4.992553333334138e-05, + "loss": 0.9651, + "step": 24672 + }, + { + "epoch": 0.02, + "learning_rate": 4.992552727512979e-05, + "loss": 0.8582, + "step": 24673 + }, + { + "epoch": 0.02, + "learning_rate": 4.992552121667215e-05, + "loss": 0.8406, + "step": 24674 + }, + { + "epoch": 0.02, + "learning_rate": 4.992551515796845e-05, + "loss": 1.1696, + "step": 24675 + }, + { + "epoch": 0.02, + "learning_rate": 4.99255090990187e-05, + "loss": 0.9383, + "step": 24676 + }, + { + "epoch": 0.02, + "learning_rate": 4.99255030398229e-05, + "loss": 1.0347, + "step": 24677 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925496980381037e-05, + "loss": 0.9808, + "step": 24678 + }, + { + "epoch": 0.02, + "learning_rate": 4.992549092069313e-05, + "loss": 1.2078, + "step": 24679 + }, + { + "epoch": 0.02, + "learning_rate": 4.992548486075916e-05, + "loss": 1.2208, + "step": 24680 + }, + { + "epoch": 0.02, + "learning_rate": 4.992547880057914e-05, + "loss": 0.4045, + "step": 24681 + }, + { + "epoch": 0.02, + "learning_rate": 4.992547274015307e-05, + "loss": 1.1093, + "step": 24682 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925466679480935e-05, + "loss": 1.2134, + "step": 24683 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925460618562756e-05, + "loss": 0.7896, + "step": 24684 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925454557398524e-05, + "loss": 0.4962, + "step": 24685 + }, + { + "epoch": 0.02, + "learning_rate": 4.992544849598824e-05, + "loss": 0.5568, + "step": 24686 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925442434331895e-05, + "loss": 0.8523, + "step": 24687 + }, + { + "epoch": 0.02, + "learning_rate": 4.992543637242949e-05, + "loss": 1.1279, + "step": 24688 + }, + { + "epoch": 0.02, + "learning_rate": 4.992543031028105e-05, + "loss": 1.1394, + "step": 24689 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925424247886544e-05, + "loss": 1.688, + "step": 24690 + }, + { + "epoch": 0.02, + "learning_rate": 4.992541818524599e-05, + "loss": 1.5875, + "step": 24691 + }, + { + "epoch": 0.02, + "learning_rate": 4.992541212235938e-05, + "loss": 1.1066, + "step": 24692 + }, + { + "epoch": 0.02, + "learning_rate": 4.992540605922672e-05, + "loss": 1.1852, + "step": 24693 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925399995848e-05, + "loss": 1.0187, + "step": 24694 + }, + { + "epoch": 0.02, + "learning_rate": 4.992539393222323e-05, + "loss": 1.1848, + "step": 24695 + }, + { + "epoch": 0.02, + "learning_rate": 4.992538786835241e-05, + "loss": 1.0521, + "step": 24696 + }, + { + "epoch": 0.02, + "learning_rate": 4.992538180423554e-05, + "loss": 1.072, + "step": 24697 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925375739872607e-05, + "loss": 1.0174, + "step": 24698 + }, + { + "epoch": 0.02, + "learning_rate": 4.992536967526363e-05, + "loss": 0.8966, + "step": 24699 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925363610408595e-05, + "loss": 0.576, + "step": 24700 + }, + { + "epoch": 0.02, + "learning_rate": 4.992535754530751e-05, + "loss": 0.0022, + "step": 24701 + }, + { + "epoch": 0.02, + "learning_rate": 4.992535147996037e-05, + "loss": 0.6625, + "step": 24702 + }, + { + "epoch": 0.02, + "learning_rate": 4.992534541436718e-05, + "loss": 0.0012, + "step": 24703 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925339348527937e-05, + "loss": 0.4065, + "step": 24704 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925333282442646e-05, + "loss": 1.0099, + "step": 24705 + }, + { + "epoch": 0.02, + "learning_rate": 4.99253272161113e-05, + "loss": 0.7672, + "step": 24706 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925321149533894e-05, + "loss": 1.1957, + "step": 24707 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925315082710445e-05, + "loss": 1.0584, + "step": 24708 + }, + { + "epoch": 0.02, + "learning_rate": 4.992530901564094e-05, + "loss": 0.949, + "step": 24709 + }, + { + "epoch": 0.02, + "learning_rate": 4.992530294832538e-05, + "loss": 0.9009, + "step": 24710 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925296880763775e-05, + "loss": 1.4002, + "step": 24711 + }, + { + "epoch": 0.02, + "learning_rate": 4.992529081295611e-05, + "loss": 1.1565, + "step": 24712 + }, + { + "epoch": 0.02, + "learning_rate": 4.99252847449024e-05, + "loss": 0.6269, + "step": 24713 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925278676602635e-05, + "loss": 1.0191, + "step": 24714 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925272608056816e-05, + "loss": 1.0224, + "step": 24715 + }, + { + "epoch": 0.02, + "learning_rate": 4.992526653926495e-05, + "loss": 1.3352, + "step": 24716 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925260470227026e-05, + "loss": 1.1393, + "step": 24717 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925254400943054e-05, + "loss": 1.3045, + "step": 24718 + }, + { + "epoch": 0.02, + "learning_rate": 4.992524833141303e-05, + "loss": 1.1809, + "step": 24719 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925242261636954e-05, + "loss": 1.1072, + "step": 24720 + }, + { + "epoch": 0.02, + "learning_rate": 4.992523619161483e-05, + "loss": 0.8779, + "step": 24721 + }, + { + "epoch": 0.02, + "learning_rate": 4.992523012134665e-05, + "loss": 0.9891, + "step": 24722 + }, + { + "epoch": 0.02, + "learning_rate": 4.992522405083242e-05, + "loss": 1.0986, + "step": 24723 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925217980072144e-05, + "loss": 0.815, + "step": 24724 + }, + { + "epoch": 0.02, + "learning_rate": 4.99252119090658e-05, + "loss": 1.2872, + "step": 24725 + }, + { + "epoch": 0.02, + "learning_rate": 4.992520583781343e-05, + "loss": 0.8402, + "step": 24726 + }, + { + "epoch": 0.02, + "learning_rate": 4.992519976631499e-05, + "loss": 1.0987, + "step": 24727 + }, + { + "epoch": 0.02, + "learning_rate": 4.99251936945705e-05, + "loss": 1.0828, + "step": 24728 + }, + { + "epoch": 0.02, + "learning_rate": 4.992518762257997e-05, + "loss": 0.8079, + "step": 24729 + }, + { + "epoch": 0.02, + "learning_rate": 4.992518155034338e-05, + "loss": 0.6275, + "step": 24730 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925175477860745e-05, + "loss": 1.0216, + "step": 24731 + }, + { + "epoch": 0.02, + "learning_rate": 4.992516940513205e-05, + "loss": 0.8008, + "step": 24732 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925163332157316e-05, + "loss": 1.1504, + "step": 24733 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925157258936525e-05, + "loss": 1.0071, + "step": 24734 + }, + { + "epoch": 0.02, + "learning_rate": 4.992515118546969e-05, + "loss": 1.0931, + "step": 24735 + }, + { + "epoch": 0.02, + "learning_rate": 4.992514511175679e-05, + "loss": 1.1209, + "step": 24736 + }, + { + "epoch": 0.02, + "learning_rate": 4.992513903779785e-05, + "loss": 1.419, + "step": 24737 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925132963592855e-05, + "loss": 2.4437, + "step": 24738 + }, + { + "epoch": 0.02, + "learning_rate": 4.992512688914181e-05, + "loss": 2.3806, + "step": 24739 + }, + { + "epoch": 0.02, + "learning_rate": 4.992512081444472e-05, + "loss": 2.2704, + "step": 24740 + }, + { + "epoch": 0.02, + "learning_rate": 4.992511473950158e-05, + "loss": 2.2244, + "step": 24741 + }, + { + "epoch": 0.02, + "learning_rate": 4.992510866431238e-05, + "loss": 2.1305, + "step": 24742 + }, + { + "epoch": 0.02, + "learning_rate": 4.992510258887714e-05, + "loss": 1.9933, + "step": 24743 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925096513195846e-05, + "loss": 1.8875, + "step": 24744 + }, + { + "epoch": 0.02, + "learning_rate": 4.99250904372685e-05, + "loss": 1.69, + "step": 24745 + }, + { + "epoch": 0.02, + "learning_rate": 4.992508436109511e-05, + "loss": 1.5305, + "step": 24746 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925078284675665e-05, + "loss": 1.503, + "step": 24747 + }, + { + "epoch": 0.02, + "learning_rate": 4.992507220801017e-05, + "loss": 1.4654, + "step": 24748 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925066131098624e-05, + "loss": 1.4004, + "step": 24749 + }, + { + "epoch": 0.02, + "learning_rate": 4.992506005394103e-05, + "loss": 1.4038, + "step": 24750 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925053976537386e-05, + "loss": 1.3233, + "step": 24751 + }, + { + "epoch": 0.02, + "learning_rate": 4.99250478988877e-05, + "loss": 1.3287, + "step": 24752 + }, + { + "epoch": 0.02, + "learning_rate": 4.992504182099195e-05, + "loss": 1.289, + "step": 24753 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925035742850154e-05, + "loss": 1.4036, + "step": 24754 + }, + { + "epoch": 0.02, + "learning_rate": 4.992502966446232e-05, + "loss": 1.286, + "step": 24755 + }, + { + "epoch": 0.02, + "learning_rate": 4.992502358582842e-05, + "loss": 1.4152, + "step": 24756 + }, + { + "epoch": 0.02, + "learning_rate": 4.992501750694849e-05, + "loss": 1.322, + "step": 24757 + }, + { + "epoch": 0.02, + "learning_rate": 4.99250114278225e-05, + "loss": 1.2045, + "step": 24758 + }, + { + "epoch": 0.02, + "learning_rate": 4.992500534845046e-05, + "loss": 1.1986, + "step": 24759 + }, + { + "epoch": 0.02, + "learning_rate": 4.992499926883237e-05, + "loss": 1.16, + "step": 24760 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924993188968234e-05, + "loss": 1.1795, + "step": 24761 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924987108858043e-05, + "loss": 1.238, + "step": 24762 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924981028501814e-05, + "loss": 1.1965, + "step": 24763 + }, + { + "epoch": 0.02, + "learning_rate": 4.992497494789953e-05, + "loss": 0.8114, + "step": 24764 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924968867051195e-05, + "loss": 1.0925, + "step": 24765 + }, + { + "epoch": 0.02, + "learning_rate": 4.992496278595681e-05, + "loss": 0.9681, + "step": 24766 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924956704616385e-05, + "loss": 1.2626, + "step": 24767 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924950623029904e-05, + "loss": 1.1054, + "step": 24768 + }, + { + "epoch": 0.02, + "learning_rate": 4.992494454119738e-05, + "loss": 0.8193, + "step": 24769 + }, + { + "epoch": 0.02, + "learning_rate": 4.99249384591188e-05, + "loss": 1.2712, + "step": 24770 + }, + { + "epoch": 0.02, + "learning_rate": 4.992493237679418e-05, + "loss": 1.3307, + "step": 24771 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924926294223506e-05, + "loss": 0.9741, + "step": 24772 + }, + { + "epoch": 0.02, + "learning_rate": 4.992492021140678e-05, + "loss": 1.1622, + "step": 24773 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924914128344016e-05, + "loss": 1.1095, + "step": 24774 + }, + { + "epoch": 0.02, + "learning_rate": 4.992490804503519e-05, + "loss": 1.3685, + "step": 24775 + }, + { + "epoch": 0.02, + "learning_rate": 4.992490196148033e-05, + "loss": 1.0401, + "step": 24776 + }, + { + "epoch": 0.02, + "learning_rate": 4.992489587767941e-05, + "loss": 1.0276, + "step": 24777 + }, + { + "epoch": 0.02, + "learning_rate": 4.992488979363245e-05, + "loss": 0.583, + "step": 24778 + }, + { + "epoch": 0.02, + "learning_rate": 4.992488370933944e-05, + "loss": 0.9058, + "step": 24779 + }, + { + "epoch": 0.02, + "learning_rate": 4.992487762480038e-05, + "loss": 0.9159, + "step": 24780 + }, + { + "epoch": 0.02, + "learning_rate": 4.992487154001528e-05, + "loss": 1.078, + "step": 24781 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924865454984125e-05, + "loss": 1.3363, + "step": 24782 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924859369706925e-05, + "loss": 1.0105, + "step": 24783 + }, + { + "epoch": 0.02, + "learning_rate": 4.992485328418367e-05, + "loss": 0.9579, + "step": 24784 + }, + { + "epoch": 0.02, + "learning_rate": 4.992484719841437e-05, + "loss": 1.2003, + "step": 24785 + }, + { + "epoch": 0.02, + "learning_rate": 4.992484111239903e-05, + "loss": 1.2073, + "step": 24786 + }, + { + "epoch": 0.02, + "learning_rate": 4.992483502613764e-05, + "loss": 1.2123, + "step": 24787 + }, + { + "epoch": 0.02, + "learning_rate": 4.99248289396302e-05, + "loss": 0.9298, + "step": 24788 + }, + { + "epoch": 0.02, + "learning_rate": 4.992482285287671e-05, + "loss": 1.3311, + "step": 24789 + }, + { + "epoch": 0.02, + "learning_rate": 4.992481676587717e-05, + "loss": 1.2886, + "step": 24790 + }, + { + "epoch": 0.02, + "learning_rate": 4.99248106786316e-05, + "loss": 1.2508, + "step": 24791 + }, + { + "epoch": 0.02, + "learning_rate": 4.992480459113996e-05, + "loss": 0.8291, + "step": 24792 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924798503402294e-05, + "loss": 1.194, + "step": 24793 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924792415418566e-05, + "loss": 1.1389, + "step": 24794 + }, + { + "epoch": 0.02, + "learning_rate": 4.99247863271888e-05, + "loss": 1.5175, + "step": 24795 + }, + { + "epoch": 0.02, + "learning_rate": 4.992478023871298e-05, + "loss": 0.9886, + "step": 24796 + }, + { + "epoch": 0.02, + "learning_rate": 4.992477414999111e-05, + "loss": 1.0344, + "step": 24797 + }, + { + "epoch": 0.02, + "learning_rate": 4.99247680610232e-05, + "loss": 1.4679, + "step": 24798 + }, + { + "epoch": 0.02, + "learning_rate": 4.992476197180924e-05, + "loss": 0.8572, + "step": 24799 + }, + { + "epoch": 0.02, + "learning_rate": 4.992475588234924e-05, + "loss": 0.8238, + "step": 24800 + }, + { + "epoch": 0.02, + "learning_rate": 4.992474979264319e-05, + "loss": 1.404, + "step": 24801 + }, + { + "epoch": 0.02, + "learning_rate": 4.992474370269109e-05, + "loss": 1.3466, + "step": 24802 + }, + { + "epoch": 0.02, + "learning_rate": 4.992473761249295e-05, + "loss": 0.5016, + "step": 24803 + }, + { + "epoch": 0.02, + "learning_rate": 4.992473152204876e-05, + "loss": 1.1906, + "step": 24804 + }, + { + "epoch": 0.02, + "learning_rate": 4.992472543135852e-05, + "loss": 0.546, + "step": 24805 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924719340422235e-05, + "loss": 1.372, + "step": 24806 + }, + { + "epoch": 0.02, + "learning_rate": 4.992471324923991e-05, + "loss": 0.6404, + "step": 24807 + }, + { + "epoch": 0.02, + "learning_rate": 4.992470715781153e-05, + "loss": 0.9759, + "step": 24808 + }, + { + "epoch": 0.02, + "learning_rate": 4.99247010661371e-05, + "loss": 0.7342, + "step": 24809 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924694974216635e-05, + "loss": 0.9617, + "step": 24810 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924688882050116e-05, + "loss": 0.8479, + "step": 24811 + }, + { + "epoch": 0.02, + "learning_rate": 4.992468278963756e-05, + "loss": 1.0129, + "step": 24812 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924676696978954e-05, + "loss": 0.9736, + "step": 24813 + }, + { + "epoch": 0.02, + "learning_rate": 4.99246706040743e-05, + "loss": 1.1784, + "step": 24814 + }, + { + "epoch": 0.02, + "learning_rate": 4.99246645109236e-05, + "loss": 1.0536, + "step": 24815 + }, + { + "epoch": 0.02, + "learning_rate": 4.992465841752686e-05, + "loss": 1.1448, + "step": 24816 + }, + { + "epoch": 0.02, + "learning_rate": 4.992465232388407e-05, + "loss": 0.4279, + "step": 24817 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924646229995234e-05, + "loss": 1.0984, + "step": 24818 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924640135860354e-05, + "loss": 1.0264, + "step": 24819 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924634041479426e-05, + "loss": 1.0276, + "step": 24820 + }, + { + "epoch": 0.02, + "learning_rate": 4.992462794685245e-05, + "loss": 1.2264, + "step": 24821 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924621851979434e-05, + "loss": 0.5969, + "step": 24822 + }, + { + "epoch": 0.02, + "learning_rate": 4.992461575686037e-05, + "loss": 0.6712, + "step": 24823 + }, + { + "epoch": 0.02, + "learning_rate": 4.992460966149526e-05, + "loss": 1.2668, + "step": 24824 + }, + { + "epoch": 0.02, + "learning_rate": 4.992460356588411e-05, + "loss": 1.0837, + "step": 24825 + }, + { + "epoch": 0.02, + "learning_rate": 4.992459747002691e-05, + "loss": 0.9508, + "step": 24826 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924591373923665e-05, + "loss": 0.9767, + "step": 24827 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924585277574376e-05, + "loss": 1.109, + "step": 24828 + }, + { + "epoch": 0.02, + "learning_rate": 4.992457918097904e-05, + "loss": 0.8894, + "step": 24829 + }, + { + "epoch": 0.02, + "learning_rate": 4.992457308413766e-05, + "loss": 1.5066, + "step": 24830 + }, + { + "epoch": 0.02, + "learning_rate": 4.992456698705024e-05, + "loss": 1.1599, + "step": 24831 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924560889716764e-05, + "loss": 1.2186, + "step": 24832 + }, + { + "epoch": 0.02, + "learning_rate": 4.992455479213726e-05, + "loss": 1.219, + "step": 24833 + }, + { + "epoch": 0.02, + "learning_rate": 4.99245486943117e-05, + "loss": 1.1151, + "step": 24834 + }, + { + "epoch": 0.02, + "learning_rate": 4.99245425962401e-05, + "loss": 1.089, + "step": 24835 + }, + { + "epoch": 0.02, + "learning_rate": 4.992453649792245e-05, + "loss": 1.0572, + "step": 24836 + }, + { + "epoch": 0.02, + "learning_rate": 4.992453039935876e-05, + "loss": 1.0204, + "step": 24837 + }, + { + "epoch": 0.02, + "learning_rate": 4.992452430054902e-05, + "loss": 1.1187, + "step": 24838 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924518201493236e-05, + "loss": 0.7574, + "step": 24839 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924512102191414e-05, + "loss": 0.2656, + "step": 24840 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924506002643545e-05, + "loss": 0.2236, + "step": 24841 + }, + { + "epoch": 0.02, + "learning_rate": 4.992449990284963e-05, + "loss": 0.2413, + "step": 24842 + }, + { + "epoch": 0.02, + "learning_rate": 4.992449380280967e-05, + "loss": 0.362, + "step": 24843 + }, + { + "epoch": 0.02, + "learning_rate": 4.992448770252367e-05, + "loss": 0.7748, + "step": 24844 + }, + { + "epoch": 0.02, + "learning_rate": 4.992448160199162e-05, + "loss": 1.1207, + "step": 24845 + }, + { + "epoch": 0.02, + "learning_rate": 4.992447550121353e-05, + "loss": 0.9493, + "step": 24846 + }, + { + "epoch": 0.02, + "learning_rate": 4.99244694001894e-05, + "loss": 0.9208, + "step": 24847 + }, + { + "epoch": 0.02, + "learning_rate": 4.992446329891922e-05, + "loss": 0.9672, + "step": 24848 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924457197402995e-05, + "loss": 0.9119, + "step": 24849 + }, + { + "epoch": 0.02, + "learning_rate": 4.992445109564073e-05, + "loss": 1.0942, + "step": 24850 + }, + { + "epoch": 0.02, + "learning_rate": 4.992444499363242e-05, + "loss": 1.1255, + "step": 24851 + }, + { + "epoch": 0.02, + "learning_rate": 4.992443889137807e-05, + "loss": 0.9548, + "step": 24852 + }, + { + "epoch": 0.02, + "learning_rate": 4.992443278887767e-05, + "loss": 0.989, + "step": 24853 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924426686131236e-05, + "loss": 0.9453, + "step": 24854 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924420583138756e-05, + "loss": 1.0301, + "step": 24855 + }, + { + "epoch": 0.02, + "learning_rate": 4.992441447990023e-05, + "loss": 1.357, + "step": 24856 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924408376415656e-05, + "loss": 0.834, + "step": 24857 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924402272685044e-05, + "loss": 0.9754, + "step": 24858 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924396168708385e-05, + "loss": 1.2009, + "step": 24859 + }, + { + "epoch": 0.02, + "learning_rate": 4.992439006448569e-05, + "loss": 0.9311, + "step": 24860 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924383960016944e-05, + "loss": 0.8317, + "step": 24861 + }, + { + "epoch": 0.02, + "learning_rate": 4.992437785530216e-05, + "loss": 1.2131, + "step": 24862 + }, + { + "epoch": 0.02, + "learning_rate": 4.992437175034133e-05, + "loss": 1.0865, + "step": 24863 + }, + { + "epoch": 0.02, + "learning_rate": 4.992436564513446e-05, + "loss": 1.0464, + "step": 24864 + }, + { + "epoch": 0.02, + "learning_rate": 4.992435953968155e-05, + "loss": 1.1109, + "step": 24865 + }, + { + "epoch": 0.02, + "learning_rate": 4.992435343398259e-05, + "loss": 0.9513, + "step": 24866 + }, + { + "epoch": 0.02, + "learning_rate": 4.992434732803759e-05, + "loss": 1.1218, + "step": 24867 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924341221846546e-05, + "loss": 0.9772, + "step": 24868 + }, + { + "epoch": 0.02, + "learning_rate": 4.992433511540946e-05, + "loss": 1.1315, + "step": 24869 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924329008726335e-05, + "loss": 1.028, + "step": 24870 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924322901797164e-05, + "loss": 1.6592, + "step": 24871 + }, + { + "epoch": 0.02, + "learning_rate": 4.992431679462195e-05, + "loss": 1.175, + "step": 24872 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924310687200696e-05, + "loss": 1.2921, + "step": 24873 + }, + { + "epoch": 0.02, + "learning_rate": 4.99243045795334e-05, + "loss": 1.3849, + "step": 24874 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924298471620064e-05, + "loss": 1.0969, + "step": 24875 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924292363460676e-05, + "loss": 0.4001, + "step": 24876 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924286255055255e-05, + "loss": 0.2367, + "step": 24877 + }, + { + "epoch": 0.02, + "learning_rate": 4.992428014640379e-05, + "loss": 0.657, + "step": 24878 + }, + { + "epoch": 0.02, + "learning_rate": 4.992427403750629e-05, + "loss": 1.054, + "step": 24879 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924267928362736e-05, + "loss": 1.1969, + "step": 24880 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924261818973144e-05, + "loss": 0.9004, + "step": 24881 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924255709337505e-05, + "loss": 0.8162, + "step": 24882 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924249599455835e-05, + "loss": 0.9602, + "step": 24883 + }, + { + "epoch": 0.02, + "learning_rate": 4.992424348932812e-05, + "loss": 1.1458, + "step": 24884 + }, + { + "epoch": 0.02, + "learning_rate": 4.992423737895436e-05, + "loss": 1.1507, + "step": 24885 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924231268334566e-05, + "loss": 1.1043, + "step": 24886 + }, + { + "epoch": 0.02, + "learning_rate": 4.992422515746872e-05, + "loss": 0.9218, + "step": 24887 + }, + { + "epoch": 0.02, + "learning_rate": 4.992421904635684e-05, + "loss": 1.115, + "step": 24888 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924212934998916e-05, + "loss": 1.0292, + "step": 24889 + }, + { + "epoch": 0.02, + "learning_rate": 4.992420682339495e-05, + "loss": 1.059, + "step": 24890 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924200711544944e-05, + "loss": 1.1494, + "step": 24891 + }, + { + "epoch": 0.02, + "learning_rate": 4.992419459944889e-05, + "loss": 1.7477, + "step": 24892 + }, + { + "epoch": 0.02, + "learning_rate": 4.992418848710681e-05, + "loss": 1.6383, + "step": 24893 + }, + { + "epoch": 0.02, + "learning_rate": 4.992418237451868e-05, + "loss": 1.7584, + "step": 24894 + }, + { + "epoch": 0.02, + "learning_rate": 4.99241762616845e-05, + "loss": 0.9766, + "step": 24895 + }, + { + "epoch": 0.02, + "learning_rate": 4.992417014860429e-05, + "loss": 1.1291, + "step": 24896 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924164035278035e-05, + "loss": 1.0837, + "step": 24897 + }, + { + "epoch": 0.02, + "learning_rate": 4.992415792170575e-05, + "loss": 1.2559, + "step": 24898 + }, + { + "epoch": 0.02, + "learning_rate": 4.992415180788741e-05, + "loss": 1.5923, + "step": 24899 + }, + { + "epoch": 0.02, + "learning_rate": 4.992414569382303e-05, + "loss": 1.6998, + "step": 24900 + }, + { + "epoch": 0.02, + "learning_rate": 4.992413957951262e-05, + "loss": 1.0448, + "step": 24901 + }, + { + "epoch": 0.02, + "learning_rate": 4.992413346495616e-05, + "loss": 0.9542, + "step": 24902 + }, + { + "epoch": 0.02, + "learning_rate": 4.992412735015366e-05, + "loss": 1.3388, + "step": 24903 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924121235105124e-05, + "loss": 0.9491, + "step": 24904 + }, + { + "epoch": 0.02, + "learning_rate": 4.992411511981055e-05, + "loss": 0.9821, + "step": 24905 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924109004269923e-05, + "loss": 0.6625, + "step": 24906 + }, + { + "epoch": 0.02, + "learning_rate": 4.992410288848327e-05, + "loss": 1.0136, + "step": 24907 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924096772450566e-05, + "loss": 1.0689, + "step": 24908 + }, + { + "epoch": 0.02, + "learning_rate": 4.992409065617183e-05, + "loss": 1.1115, + "step": 24909 + }, + { + "epoch": 0.02, + "learning_rate": 4.992408453964705e-05, + "loss": 1.0123, + "step": 24910 + }, + { + "epoch": 0.02, + "learning_rate": 4.992407842287623e-05, + "loss": 1.1278, + "step": 24911 + }, + { + "epoch": 0.02, + "learning_rate": 4.992407230585937e-05, + "loss": 1.0213, + "step": 24912 + }, + { + "epoch": 0.02, + "learning_rate": 4.992406618859647e-05, + "loss": 1.0596, + "step": 24913 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924060071087534e-05, + "loss": 1.1565, + "step": 24914 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924053953332557e-05, + "loss": 0.9526, + "step": 24915 + }, + { + "epoch": 0.02, + "learning_rate": 4.992404783533153e-05, + "loss": 1.1842, + "step": 24916 + }, + { + "epoch": 0.02, + "learning_rate": 4.992404171708448e-05, + "loss": 1.0103, + "step": 24917 + }, + { + "epoch": 0.02, + "learning_rate": 4.992403559859138e-05, + "loss": 1.273, + "step": 24918 + }, + { + "epoch": 0.02, + "learning_rate": 4.992402947985224e-05, + "loss": 1.1352, + "step": 24919 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924023360867065e-05, + "loss": 1.0047, + "step": 24920 + }, + { + "epoch": 0.02, + "learning_rate": 4.9924017241635845e-05, + "loss": 1.1872, + "step": 24921 + }, + { + "epoch": 0.02, + "learning_rate": 4.992401112215859e-05, + "loss": 1.0912, + "step": 24922 + }, + { + "epoch": 0.02, + "learning_rate": 4.99240050024353e-05, + "loss": 1.0857, + "step": 24923 + }, + { + "epoch": 0.02, + "learning_rate": 4.992399888246596e-05, + "loss": 0.9572, + "step": 24924 + }, + { + "epoch": 0.02, + "learning_rate": 4.992399276225059e-05, + "loss": 1.0034, + "step": 24925 + }, + { + "epoch": 0.02, + "learning_rate": 4.992398664178918e-05, + "loss": 1.0605, + "step": 24926 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923980521081723e-05, + "loss": 0.9983, + "step": 24927 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923974400128234e-05, + "loss": 1.1913, + "step": 24928 + }, + { + "epoch": 0.02, + "learning_rate": 4.99239682789287e-05, + "loss": 1.0678, + "step": 24929 + }, + { + "epoch": 0.02, + "learning_rate": 4.992396215748313e-05, + "loss": 1.0793, + "step": 24930 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923956035791524e-05, + "loss": 0.9825, + "step": 24931 + }, + { + "epoch": 0.02, + "learning_rate": 4.992394991385388e-05, + "loss": 0.868, + "step": 24932 + }, + { + "epoch": 0.02, + "learning_rate": 4.992394379167019e-05, + "loss": 1.0859, + "step": 24933 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923937669240474e-05, + "loss": 1.1075, + "step": 24934 + }, + { + "epoch": 0.02, + "learning_rate": 4.992393154656471e-05, + "loss": 1.2649, + "step": 24935 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923925423642905e-05, + "loss": 0.6111, + "step": 24936 + }, + { + "epoch": 0.02, + "learning_rate": 4.992391930047506e-05, + "loss": 0.4073, + "step": 24937 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923913177061186e-05, + "loss": 0.514, + "step": 24938 + }, + { + "epoch": 0.02, + "learning_rate": 4.992390705340127e-05, + "loss": 0.5093, + "step": 24939 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923900929495316e-05, + "loss": 0.4732, + "step": 24940 + }, + { + "epoch": 0.02, + "learning_rate": 4.992389480534332e-05, + "loss": 0.3586, + "step": 24941 + }, + { + "epoch": 0.02, + "learning_rate": 4.992388868094529e-05, + "loss": 0.4708, + "step": 24942 + }, + { + "epoch": 0.02, + "learning_rate": 4.992388255630122e-05, + "loss": 0.4855, + "step": 24943 + }, + { + "epoch": 0.02, + "learning_rate": 4.992387643141112e-05, + "loss": 0.5097, + "step": 24944 + }, + { + "epoch": 0.02, + "learning_rate": 4.992387030627497e-05, + "loss": 0.4146, + "step": 24945 + }, + { + "epoch": 0.02, + "learning_rate": 4.992386418089279e-05, + "loss": 0.2928, + "step": 24946 + }, + { + "epoch": 0.02, + "learning_rate": 4.992385805526456e-05, + "loss": 0.2805, + "step": 24947 + }, + { + "epoch": 0.02, + "learning_rate": 4.992385192939031e-05, + "loss": 0.301, + "step": 24948 + }, + { + "epoch": 0.02, + "learning_rate": 4.992384580327001e-05, + "loss": 0.2897, + "step": 24949 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923839676903674e-05, + "loss": 0.2696, + "step": 24950 + }, + { + "epoch": 0.02, + "learning_rate": 4.99238335502913e-05, + "loss": 0.2173, + "step": 24951 + }, + { + "epoch": 0.02, + "learning_rate": 4.992382742343289e-05, + "loss": 0.2524, + "step": 24952 + }, + { + "epoch": 0.02, + "learning_rate": 4.992382129632844e-05, + "loss": 0.189, + "step": 24953 + }, + { + "epoch": 0.02, + "learning_rate": 4.992381516897796e-05, + "loss": 0.2358, + "step": 24954 + }, + { + "epoch": 0.02, + "learning_rate": 4.992380904138144e-05, + "loss": 0.2233, + "step": 24955 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923802913538874e-05, + "loss": 0.2514, + "step": 24956 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923796785450285e-05, + "loss": 0.7897, + "step": 24957 + }, + { + "epoch": 0.02, + "learning_rate": 4.992379065711564e-05, + "loss": 1.0508, + "step": 24958 + }, + { + "epoch": 0.02, + "learning_rate": 4.992378452853498e-05, + "loss": 0.9697, + "step": 24959 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923778399708266e-05, + "loss": 0.8024, + "step": 24960 + }, + { + "epoch": 0.02, + "learning_rate": 4.992377227063552e-05, + "loss": 1.1697, + "step": 24961 + }, + { + "epoch": 0.02, + "learning_rate": 4.992376614131674e-05, + "loss": 1.3325, + "step": 24962 + }, + { + "epoch": 0.02, + "learning_rate": 4.992376001175192e-05, + "loss": 1.0017, + "step": 24963 + }, + { + "epoch": 0.02, + "learning_rate": 4.992375388194107e-05, + "loss": 0.8457, + "step": 24964 + }, + { + "epoch": 0.02, + "learning_rate": 4.992374775188417e-05, + "loss": 1.2833, + "step": 24965 + }, + { + "epoch": 0.02, + "learning_rate": 4.992374162158124e-05, + "loss": 1.2185, + "step": 24966 + }, + { + "epoch": 0.02, + "learning_rate": 4.992373549103228e-05, + "loss": 1.0207, + "step": 24967 + }, + { + "epoch": 0.02, + "learning_rate": 4.992372936023727e-05, + "loss": 1.1256, + "step": 24968 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923723229196234e-05, + "loss": 1.2243, + "step": 24969 + }, + { + "epoch": 0.02, + "learning_rate": 4.992371709790916e-05, + "loss": 1.1101, + "step": 24970 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923710966376046e-05, + "loss": 0.9294, + "step": 24971 + }, + { + "epoch": 0.02, + "learning_rate": 4.992370483459689e-05, + "loss": 0.9307, + "step": 24972 + }, + { + "epoch": 0.02, + "learning_rate": 4.992369870257171e-05, + "loss": 0.9323, + "step": 24973 + }, + { + "epoch": 0.02, + "learning_rate": 4.992369257030049e-05, + "loss": 0.9699, + "step": 24974 + }, + { + "epoch": 0.02, + "learning_rate": 4.992368643778323e-05, + "loss": 1.0039, + "step": 24975 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923680305019935e-05, + "loss": 1.1591, + "step": 24976 + }, + { + "epoch": 0.02, + "learning_rate": 4.992367417201061e-05, + "loss": 0.9625, + "step": 24977 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923668038755244e-05, + "loss": 1.2073, + "step": 24978 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923661905253836e-05, + "loss": 1.2866, + "step": 24979 + }, + { + "epoch": 0.02, + "learning_rate": 4.992365577150641e-05, + "loss": 1.1023, + "step": 24980 + }, + { + "epoch": 0.02, + "learning_rate": 4.992364963751293e-05, + "loss": 1.2063, + "step": 24981 + }, + { + "epoch": 0.02, + "learning_rate": 4.992364350327342e-05, + "loss": 1.1871, + "step": 24982 + }, + { + "epoch": 0.02, + "learning_rate": 4.992363736878788e-05, + "loss": 1.288, + "step": 24983 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923631234056293e-05, + "loss": 0.7788, + "step": 24984 + }, + { + "epoch": 0.02, + "learning_rate": 4.992362509907868e-05, + "loss": 0.5713, + "step": 24985 + }, + { + "epoch": 0.02, + "learning_rate": 4.992361896385503e-05, + "loss": 1.1372, + "step": 24986 + }, + { + "epoch": 0.02, + "learning_rate": 4.992361282838534e-05, + "loss": 1.0635, + "step": 24987 + }, + { + "epoch": 0.02, + "learning_rate": 4.992360669266962e-05, + "loss": 1.2737, + "step": 24988 + }, + { + "epoch": 0.02, + "learning_rate": 4.992360055670786e-05, + "loss": 1.0093, + "step": 24989 + }, + { + "epoch": 0.02, + "learning_rate": 4.992359442050006e-05, + "loss": 0.8532, + "step": 24990 + }, + { + "epoch": 0.02, + "learning_rate": 4.992358828404624e-05, + "loss": 1.0843, + "step": 24991 + }, + { + "epoch": 0.02, + "learning_rate": 4.992358214734638e-05, + "loss": 1.3212, + "step": 24992 + }, + { + "epoch": 0.02, + "learning_rate": 4.992357601040047e-05, + "loss": 1.1594, + "step": 24993 + }, + { + "epoch": 0.02, + "learning_rate": 4.992356987320854e-05, + "loss": 0.9182, + "step": 24994 + }, + { + "epoch": 0.02, + "learning_rate": 4.992356373577057e-05, + "loss": 1.2352, + "step": 24995 + }, + { + "epoch": 0.02, + "learning_rate": 4.992355759808657e-05, + "loss": 1.2275, + "step": 24996 + }, + { + "epoch": 0.02, + "learning_rate": 4.9923551460156534e-05, + "loss": 1.3346, + "step": 24997 + }, + { + "epoch": 0.02, + "learning_rate": 4.992354532198046e-05, + "loss": 1.5031, + "step": 24998 + }, + { + "epoch": 0.02, + "learning_rate": 4.992353918355835e-05, + "loss": 1.3621, + "step": 24999 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923533044890206e-05, + "loss": 0.9476, + "step": 25000 + }, + { + "epoch": 0.03, + "eval_loss": 1.0197904109954834, + "eval_runtime": 85.2004, + "eval_samples_per_second": 16.256, + "eval_steps_per_second": 4.073, + "step": 25000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923526905976034e-05, + "loss": 0.8347, + "step": 25001 + }, + { + "epoch": 0.03, + "learning_rate": 4.992352076681582e-05, + "loss": 0.817, + "step": 25002 + }, + { + "epoch": 0.03, + "learning_rate": 4.992351462740957e-05, + "loss": 1.15, + "step": 25003 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923508487757295e-05, + "loss": 0.6978, + "step": 25004 + }, + { + "epoch": 0.03, + "learning_rate": 4.992350234785897e-05, + "loss": 0.5877, + "step": 25005 + }, + { + "epoch": 0.03, + "learning_rate": 4.992349620771462e-05, + "loss": 1.0021, + "step": 25006 + }, + { + "epoch": 0.03, + "learning_rate": 4.992349006732424e-05, + "loss": 0.9068, + "step": 25007 + }, + { + "epoch": 0.03, + "learning_rate": 4.992348392668783e-05, + "loss": 1.0197, + "step": 25008 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923477785805375e-05, + "loss": 0.8391, + "step": 25009 + }, + { + "epoch": 0.03, + "learning_rate": 4.992347164467689e-05, + "loss": 1.0577, + "step": 25010 + }, + { + "epoch": 0.03, + "learning_rate": 4.992346550330237e-05, + "loss": 0.8527, + "step": 25011 + }, + { + "epoch": 0.03, + "learning_rate": 4.992345936168181e-05, + "loss": 0.649, + "step": 25012 + }, + { + "epoch": 0.03, + "learning_rate": 4.992345321981523e-05, + "loss": 0.8938, + "step": 25013 + }, + { + "epoch": 0.03, + "learning_rate": 4.992344707770261e-05, + "loss": 0.7799, + "step": 25014 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923440935343945e-05, + "loss": 0.9586, + "step": 25015 + }, + { + "epoch": 0.03, + "learning_rate": 4.992343479273926e-05, + "loss": 1.0116, + "step": 25016 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923428649888536e-05, + "loss": 1.134, + "step": 25017 + }, + { + "epoch": 0.03, + "learning_rate": 4.992342250679178e-05, + "loss": 1.0014, + "step": 25018 + }, + { + "epoch": 0.03, + "learning_rate": 4.992341636344899e-05, + "loss": 0.7448, + "step": 25019 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923410219860166e-05, + "loss": 0.7103, + "step": 25020 + }, + { + "epoch": 0.03, + "learning_rate": 4.992340407602531e-05, + "loss": 0.8879, + "step": 25021 + }, + { + "epoch": 0.03, + "learning_rate": 4.992339793194442e-05, + "loss": 0.5409, + "step": 25022 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923391787617495e-05, + "loss": 0.7493, + "step": 25023 + }, + { + "epoch": 0.03, + "learning_rate": 4.992338564304454e-05, + "loss": 1.0124, + "step": 25024 + }, + { + "epoch": 0.03, + "learning_rate": 4.992337949822555e-05, + "loss": 1.0126, + "step": 25025 + }, + { + "epoch": 0.03, + "learning_rate": 4.992337335316053e-05, + "loss": 0.9065, + "step": 25026 + }, + { + "epoch": 0.03, + "learning_rate": 4.992336720784947e-05, + "loss": 1.2448, + "step": 25027 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923361062292386e-05, + "loss": 1.074, + "step": 25028 + }, + { + "epoch": 0.03, + "learning_rate": 4.992335491648926e-05, + "loss": 0.7659, + "step": 25029 + }, + { + "epoch": 0.03, + "learning_rate": 4.99233487704401e-05, + "loss": 0.606, + "step": 25030 + }, + { + "epoch": 0.03, + "learning_rate": 4.992334262414492e-05, + "loss": 1.0034, + "step": 25031 + }, + { + "epoch": 0.03, + "learning_rate": 4.99233364776037e-05, + "loss": 0.945, + "step": 25032 + }, + { + "epoch": 0.03, + "learning_rate": 4.992333033081644e-05, + "loss": 1.3674, + "step": 25033 + }, + { + "epoch": 0.03, + "learning_rate": 4.992332418378316e-05, + "loss": 1.0743, + "step": 25034 + }, + { + "epoch": 0.03, + "learning_rate": 4.992331803650384e-05, + "loss": 1.2407, + "step": 25035 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923311888978493e-05, + "loss": 1.0378, + "step": 25036 + }, + { + "epoch": 0.03, + "learning_rate": 4.992330574120711e-05, + "loss": 1.072, + "step": 25037 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923299593189696e-05, + "loss": 0.3395, + "step": 25038 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923293444926246e-05, + "loss": 0.2991, + "step": 25039 + }, + { + "epoch": 0.03, + "learning_rate": 4.992328729641676e-05, + "loss": 0.35, + "step": 25040 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923281147661246e-05, + "loss": 0.3478, + "step": 25041 + }, + { + "epoch": 0.03, + "learning_rate": 4.992327499865971e-05, + "loss": 1.1695, + "step": 25042 + }, + { + "epoch": 0.03, + "learning_rate": 4.992326884941213e-05, + "loss": 0.989, + "step": 25043 + }, + { + "epoch": 0.03, + "learning_rate": 4.992326269991853e-05, + "loss": 1.0827, + "step": 25044 + }, + { + "epoch": 0.03, + "learning_rate": 4.992325655017889e-05, + "loss": 1.0255, + "step": 25045 + }, + { + "epoch": 0.03, + "learning_rate": 4.992325040019321e-05, + "loss": 1.1286, + "step": 25046 + }, + { + "epoch": 0.03, + "learning_rate": 4.992324424996151e-05, + "loss": 0.7944, + "step": 25047 + }, + { + "epoch": 0.03, + "learning_rate": 4.992323809948377e-05, + "loss": 0.8027, + "step": 25048 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923231948760007e-05, + "loss": 1.2326, + "step": 25049 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923225797790204e-05, + "loss": 1.2462, + "step": 25050 + }, + { + "epoch": 0.03, + "learning_rate": 4.992321964657437e-05, + "loss": 0.7767, + "step": 25051 + }, + { + "epoch": 0.03, + "learning_rate": 4.992321349511251e-05, + "loss": 1.1514, + "step": 25052 + }, + { + "epoch": 0.03, + "learning_rate": 4.992320734340462e-05, + "loss": 0.8306, + "step": 25053 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923201191450696e-05, + "loss": 1.2511, + "step": 25054 + }, + { + "epoch": 0.03, + "learning_rate": 4.992319503925074e-05, + "loss": 1.0314, + "step": 25055 + }, + { + "epoch": 0.03, + "learning_rate": 4.992318888680475e-05, + "loss": 1.0569, + "step": 25056 + }, + { + "epoch": 0.03, + "learning_rate": 4.992318273411273e-05, + "loss": 1.1128, + "step": 25057 + }, + { + "epoch": 0.03, + "learning_rate": 4.992317658117468e-05, + "loss": 1.0062, + "step": 25058 + }, + { + "epoch": 0.03, + "learning_rate": 4.99231704279906e-05, + "loss": 0.9499, + "step": 25059 + }, + { + "epoch": 0.03, + "learning_rate": 4.992316427456048e-05, + "loss": 0.8981, + "step": 25060 + }, + { + "epoch": 0.03, + "learning_rate": 4.992315812088434e-05, + "loss": 0.9923, + "step": 25061 + }, + { + "epoch": 0.03, + "learning_rate": 4.992315196696217e-05, + "loss": 0.6737, + "step": 25062 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923145812793964e-05, + "loss": 0.3192, + "step": 25063 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923139658379726e-05, + "loss": 1.0879, + "step": 25064 + }, + { + "epoch": 0.03, + "learning_rate": 4.992313350371947e-05, + "loss": 1.1829, + "step": 25065 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923127348813166e-05, + "loss": 0.9678, + "step": 25066 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923121193660836e-05, + "loss": 0.8177, + "step": 25067 + }, + { + "epoch": 0.03, + "learning_rate": 4.992311503826248e-05, + "loss": 0.7637, + "step": 25068 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923108882618094e-05, + "loss": 1.0813, + "step": 25069 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923102726727674e-05, + "loss": 0.9057, + "step": 25070 + }, + { + "epoch": 0.03, + "learning_rate": 4.992309657059122e-05, + "loss": 0.8536, + "step": 25071 + }, + { + "epoch": 0.03, + "learning_rate": 4.992309041420874e-05, + "loss": 1.1515, + "step": 25072 + }, + { + "epoch": 0.03, + "learning_rate": 4.992308425758023e-05, + "loss": 0.991, + "step": 25073 + }, + { + "epoch": 0.03, + "learning_rate": 4.992307810070569e-05, + "loss": 1.0317, + "step": 25074 + }, + { + "epoch": 0.03, + "learning_rate": 4.992307194358512e-05, + "loss": 0.9014, + "step": 25075 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923065786218523e-05, + "loss": 1.2781, + "step": 25076 + }, + { + "epoch": 0.03, + "learning_rate": 4.992305962860589e-05, + "loss": 0.8905, + "step": 25077 + }, + { + "epoch": 0.03, + "learning_rate": 4.992305347074723e-05, + "loss": 0.8441, + "step": 25078 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923047312642536e-05, + "loss": 0.8423, + "step": 25079 + }, + { + "epoch": 0.03, + "learning_rate": 4.992304115429182e-05, + "loss": 1.0254, + "step": 25080 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923034995695065e-05, + "loss": 1.3564, + "step": 25081 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923028836852295e-05, + "loss": 1.0223, + "step": 25082 + }, + { + "epoch": 0.03, + "learning_rate": 4.992302267776348e-05, + "loss": 0.9651, + "step": 25083 + }, + { + "epoch": 0.03, + "learning_rate": 4.992301651842864e-05, + "loss": 0.6304, + "step": 25084 + }, + { + "epoch": 0.03, + "learning_rate": 4.992301035884777e-05, + "loss": 0.7032, + "step": 25085 + }, + { + "epoch": 0.03, + "learning_rate": 4.9923004199020874e-05, + "loss": 1.0789, + "step": 25086 + }, + { + "epoch": 0.03, + "learning_rate": 4.992299803894794e-05, + "loss": 1.3526, + "step": 25087 + }, + { + "epoch": 0.03, + "learning_rate": 4.992299187862899e-05, + "loss": 1.0528, + "step": 25088 + }, + { + "epoch": 0.03, + "learning_rate": 4.992298571806401e-05, + "loss": 0.5238, + "step": 25089 + }, + { + "epoch": 0.03, + "learning_rate": 4.992297955725299e-05, + "loss": 0.2522, + "step": 25090 + }, + { + "epoch": 0.03, + "learning_rate": 4.992297339619595e-05, + "loss": 0.7892, + "step": 25091 + }, + { + "epoch": 0.03, + "learning_rate": 4.992296723489287e-05, + "loss": 1.1137, + "step": 25092 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922961073343766e-05, + "loss": 1.2157, + "step": 25093 + }, + { + "epoch": 0.03, + "learning_rate": 4.992295491154864e-05, + "loss": 1.3797, + "step": 25094 + }, + { + "epoch": 0.03, + "learning_rate": 4.992294874950748e-05, + "loss": 0.957, + "step": 25095 + }, + { + "epoch": 0.03, + "learning_rate": 4.992294258722029e-05, + "loss": 0.7168, + "step": 25096 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922936424687074e-05, + "loss": 1.0018, + "step": 25097 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922930261907823e-05, + "loss": 0.8612, + "step": 25098 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922924098882553e-05, + "loss": 0.8657, + "step": 25099 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922917935611244e-05, + "loss": 1.0994, + "step": 25100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922911772093916e-05, + "loss": 0.9604, + "step": 25101 + }, + { + "epoch": 0.03, + "learning_rate": 4.992290560833055e-05, + "loss": 0.9985, + "step": 25102 + }, + { + "epoch": 0.03, + "learning_rate": 4.992289944432116e-05, + "loss": 1.256, + "step": 25103 + }, + { + "epoch": 0.03, + "learning_rate": 4.992289328006575e-05, + "loss": 0.946, + "step": 25104 + }, + { + "epoch": 0.03, + "learning_rate": 4.99228871155643e-05, + "loss": 0.9306, + "step": 25105 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922880950816826e-05, + "loss": 1.3531, + "step": 25106 + }, + { + "epoch": 0.03, + "learning_rate": 4.992287478582333e-05, + "loss": 1.1832, + "step": 25107 + }, + { + "epoch": 0.03, + "learning_rate": 4.992286862058379e-05, + "loss": 1.0711, + "step": 25108 + }, + { + "epoch": 0.03, + "learning_rate": 4.992286245509824e-05, + "loss": 0.7556, + "step": 25109 + }, + { + "epoch": 0.03, + "learning_rate": 4.992285628936665e-05, + "loss": 0.8984, + "step": 25110 + }, + { + "epoch": 0.03, + "learning_rate": 4.992285012338903e-05, + "loss": 0.9954, + "step": 25111 + }, + { + "epoch": 0.03, + "learning_rate": 4.992284395716539e-05, + "loss": 0.5796, + "step": 25112 + }, + { + "epoch": 0.03, + "learning_rate": 4.992283779069572e-05, + "loss": 0.536, + "step": 25113 + }, + { + "epoch": 0.03, + "learning_rate": 4.992283162398003e-05, + "loss": 0.892, + "step": 25114 + }, + { + "epoch": 0.03, + "learning_rate": 4.99228254570183e-05, + "loss": 0.9259, + "step": 25115 + }, + { + "epoch": 0.03, + "learning_rate": 4.992281928981054e-05, + "loss": 0.883, + "step": 25116 + }, + { + "epoch": 0.03, + "learning_rate": 4.992281312235676e-05, + "loss": 0.886, + "step": 25117 + }, + { + "epoch": 0.03, + "learning_rate": 4.992280695465695e-05, + "loss": 1.0124, + "step": 25118 + }, + { + "epoch": 0.03, + "learning_rate": 4.992280078671112e-05, + "loss": 0.9801, + "step": 25119 + }, + { + "epoch": 0.03, + "learning_rate": 4.992279461851925e-05, + "loss": 0.8232, + "step": 25120 + }, + { + "epoch": 0.03, + "learning_rate": 4.992278845008137e-05, + "loss": 0.8465, + "step": 25121 + }, + { + "epoch": 0.03, + "learning_rate": 4.992278228139745e-05, + "loss": 1.2903, + "step": 25122 + }, + { + "epoch": 0.03, + "learning_rate": 4.992277611246751e-05, + "loss": 0.994, + "step": 25123 + }, + { + "epoch": 0.03, + "learning_rate": 4.992276994329153e-05, + "loss": 1.0585, + "step": 25124 + }, + { + "epoch": 0.03, + "learning_rate": 4.992276377386953e-05, + "loss": 0.9633, + "step": 25125 + }, + { + "epoch": 0.03, + "learning_rate": 4.992275760420151e-05, + "loss": 0.9627, + "step": 25126 + }, + { + "epoch": 0.03, + "learning_rate": 4.992275143428745e-05, + "loss": 0.5739, + "step": 25127 + }, + { + "epoch": 0.03, + "learning_rate": 4.992274526412737e-05, + "loss": 1.0005, + "step": 25128 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922739093721264e-05, + "loss": 0.9775, + "step": 25129 + }, + { + "epoch": 0.03, + "learning_rate": 4.992273292306913e-05, + "loss": 1.1425, + "step": 25130 + }, + { + "epoch": 0.03, + "learning_rate": 4.992272675217097e-05, + "loss": 1.1003, + "step": 25131 + }, + { + "epoch": 0.03, + "learning_rate": 4.992272058102678e-05, + "loss": 1.1229, + "step": 25132 + }, + { + "epoch": 0.03, + "learning_rate": 4.992271440963657e-05, + "loss": 1.3476, + "step": 25133 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922708238000325e-05, + "loss": 1.0289, + "step": 25134 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922702066118065e-05, + "loss": 0.832, + "step": 25135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922695893989766e-05, + "loss": 1.027, + "step": 25136 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922689721615454e-05, + "loss": 0.9691, + "step": 25137 + }, + { + "epoch": 0.03, + "learning_rate": 4.992268354899511e-05, + "loss": 0.4913, + "step": 25138 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922677376128734e-05, + "loss": 0.5484, + "step": 25139 + }, + { + "epoch": 0.03, + "learning_rate": 4.992267120301634e-05, + "loss": 0.3011, + "step": 25140 + }, + { + "epoch": 0.03, + "learning_rate": 4.992266502965791e-05, + "loss": 0.2391, + "step": 25141 + }, + { + "epoch": 0.03, + "learning_rate": 4.992265885605346e-05, + "loss": 0.2796, + "step": 25142 + }, + { + "epoch": 0.03, + "learning_rate": 4.992265268220298e-05, + "loss": 0.2041, + "step": 25143 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922646508106484e-05, + "loss": 0.2022, + "step": 25144 + }, + { + "epoch": 0.03, + "learning_rate": 4.992264033376395e-05, + "loss": 0.2846, + "step": 25145 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922634159175395e-05, + "loss": 0.1546, + "step": 25146 + }, + { + "epoch": 0.03, + "learning_rate": 4.992262798434082e-05, + "loss": 0.1229, + "step": 25147 + }, + { + "epoch": 0.03, + "learning_rate": 4.992262180926022e-05, + "loss": 0.1206, + "step": 25148 + }, + { + "epoch": 0.03, + "learning_rate": 4.992261563393358e-05, + "loss": 0.0549, + "step": 25149 + }, + { + "epoch": 0.03, + "learning_rate": 4.992260945836092e-05, + "loss": 0.0559, + "step": 25150 + }, + { + "epoch": 0.03, + "learning_rate": 4.992260328254225e-05, + "loss": 0.0581, + "step": 25151 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922597106477544e-05, + "loss": 0.056, + "step": 25152 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922590930166807e-05, + "loss": 0.0592, + "step": 25153 + }, + { + "epoch": 0.03, + "learning_rate": 4.992258475361005e-05, + "loss": 0.0559, + "step": 25154 + }, + { + "epoch": 0.03, + "learning_rate": 4.992257857680727e-05, + "loss": 0.0883, + "step": 25155 + }, + { + "epoch": 0.03, + "learning_rate": 4.992257239975845e-05, + "loss": 0.0735, + "step": 25156 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922566222463624e-05, + "loss": 0.0754, + "step": 25157 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922560044922764e-05, + "loss": 0.0941, + "step": 25158 + }, + { + "epoch": 0.03, + "learning_rate": 4.992255386713588e-05, + "loss": 0.0678, + "step": 25159 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922547689102974e-05, + "loss": 0.0667, + "step": 25160 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922541510824036e-05, + "loss": 0.0911, + "step": 25161 + }, + { + "epoch": 0.03, + "learning_rate": 4.992253533229908e-05, + "loss": 0.07, + "step": 25162 + }, + { + "epoch": 0.03, + "learning_rate": 4.99225291535281e-05, + "loss": 0.0411, + "step": 25163 + }, + { + "epoch": 0.03, + "learning_rate": 4.992252297451109e-05, + "loss": 0.0406, + "step": 25164 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922516795248056e-05, + "loss": 0.0354, + "step": 25165 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922510615738996e-05, + "loss": 0.0281, + "step": 25166 + }, + { + "epoch": 0.03, + "learning_rate": 4.992250443598392e-05, + "loss": 0.0319, + "step": 25167 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922498255982806e-05, + "loss": 0.0286, + "step": 25168 + }, + { + "epoch": 0.03, + "learning_rate": 4.992249207573568e-05, + "loss": 0.0182, + "step": 25169 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922485895242526e-05, + "loss": 0.0228, + "step": 25170 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922479714503344e-05, + "loss": 0.0253, + "step": 25171 + }, + { + "epoch": 0.03, + "learning_rate": 4.992247353351814e-05, + "loss": 0.0278, + "step": 25172 + }, + { + "epoch": 0.03, + "learning_rate": 4.992246735228692e-05, + "loss": 0.0333, + "step": 25173 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922461170809664e-05, + "loss": 0.0326, + "step": 25174 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922454989086386e-05, + "loss": 0.0319, + "step": 25175 + }, + { + "epoch": 0.03, + "learning_rate": 4.992244880711709e-05, + "loss": 0.0176, + "step": 25176 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922442624901765e-05, + "loss": 0.0349, + "step": 25177 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922436442440416e-05, + "loss": 0.0359, + "step": 25178 + }, + { + "epoch": 0.03, + "learning_rate": 4.992243025973304e-05, + "loss": 0.0339, + "step": 25179 + }, + { + "epoch": 0.03, + "learning_rate": 4.992242407677965e-05, + "loss": 0.0321, + "step": 25180 + }, + { + "epoch": 0.03, + "learning_rate": 4.992241789358023e-05, + "loss": 0.0295, + "step": 25181 + }, + { + "epoch": 0.03, + "learning_rate": 4.992241171013479e-05, + "loss": 0.0379, + "step": 25182 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922405526443325e-05, + "loss": 0.0321, + "step": 25183 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922399342505835e-05, + "loss": 0.0299, + "step": 25184 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922393158322326e-05, + "loss": 0.0318, + "step": 25185 + }, + { + "epoch": 0.03, + "learning_rate": 4.992238697389279e-05, + "loss": 0.0329, + "step": 25186 + }, + { + "epoch": 0.03, + "learning_rate": 4.992238078921723e-05, + "loss": 0.0286, + "step": 25187 + }, + { + "epoch": 0.03, + "learning_rate": 4.992237460429564e-05, + "loss": 0.0297, + "step": 25188 + }, + { + "epoch": 0.03, + "learning_rate": 4.992236841912804e-05, + "loss": 0.0252, + "step": 25189 + }, + { + "epoch": 0.03, + "learning_rate": 4.992236223371441e-05, + "loss": 0.1481, + "step": 25190 + }, + { + "epoch": 0.03, + "learning_rate": 4.992235604805476e-05, + "loss": 1.1247, + "step": 25191 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922349862149086e-05, + "loss": 0.8362, + "step": 25192 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922343675997384e-05, + "loss": 1.1324, + "step": 25193 + }, + { + "epoch": 0.03, + "learning_rate": 4.992233748959967e-05, + "loss": 1.1421, + "step": 25194 + }, + { + "epoch": 0.03, + "learning_rate": 4.992233130295592e-05, + "loss": 0.9913, + "step": 25195 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922325116066156e-05, + "loss": 1.0372, + "step": 25196 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922318928930365e-05, + "loss": 1.0103, + "step": 25197 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922312741548554e-05, + "loss": 0.7789, + "step": 25198 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922306553920724e-05, + "loss": 0.9592, + "step": 25199 + }, + { + "epoch": 0.03, + "learning_rate": 4.992230036604686e-05, + "loss": 1.1377, + "step": 25200 + }, + { + "epoch": 0.03, + "learning_rate": 4.992229417792699e-05, + "loss": 1.3155, + "step": 25201 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922287989561087e-05, + "loss": 0.858, + "step": 25202 + }, + { + "epoch": 0.03, + "learning_rate": 4.992228180094916e-05, + "loss": 0.8514, + "step": 25203 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922275612091215e-05, + "loss": 0.9601, + "step": 25204 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922269422987244e-05, + "loss": 0.7068, + "step": 25205 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922263233637254e-05, + "loss": 0.6542, + "step": 25206 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922257044041245e-05, + "loss": 0.6096, + "step": 25207 + }, + { + "epoch": 0.03, + "learning_rate": 4.992225085419921e-05, + "loss": 0.6577, + "step": 25208 + }, + { + "epoch": 0.03, + "learning_rate": 4.992224466411115e-05, + "loss": 0.8597, + "step": 25209 + }, + { + "epoch": 0.03, + "learning_rate": 4.992223847377707e-05, + "loss": 0.5305, + "step": 25210 + }, + { + "epoch": 0.03, + "learning_rate": 4.992223228319697e-05, + "loss": 0.5249, + "step": 25211 + }, + { + "epoch": 0.03, + "learning_rate": 4.992222609237085e-05, + "loss": 1.3521, + "step": 25212 + }, + { + "epoch": 0.03, + "learning_rate": 4.99222199012987e-05, + "loss": 1.0136, + "step": 25213 + }, + { + "epoch": 0.03, + "learning_rate": 4.992221370998054e-05, + "loss": 1.115, + "step": 25214 + }, + { + "epoch": 0.03, + "learning_rate": 4.992220751841635e-05, + "loss": 1.0156, + "step": 25215 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922201326606144e-05, + "loss": 1.0157, + "step": 25216 + }, + { + "epoch": 0.03, + "learning_rate": 4.992219513454992e-05, + "loss": 1.0533, + "step": 25217 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922188942247665e-05, + "loss": 1.0771, + "step": 25218 + }, + { + "epoch": 0.03, + "learning_rate": 4.99221827496994e-05, + "loss": 0.9614, + "step": 25219 + }, + { + "epoch": 0.03, + "learning_rate": 4.99221765569051e-05, + "loss": 1.1284, + "step": 25220 + }, + { + "epoch": 0.03, + "learning_rate": 4.992217036386479e-05, + "loss": 1.3159, + "step": 25221 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922164170578445e-05, + "loss": 0.8393, + "step": 25222 + }, + { + "epoch": 0.03, + "learning_rate": 4.992215797704609e-05, + "loss": 0.5453, + "step": 25223 + }, + { + "epoch": 0.03, + "learning_rate": 4.992215178326771e-05, + "loss": 1.0581, + "step": 25224 + }, + { + "epoch": 0.03, + "learning_rate": 4.992214558924331e-05, + "loss": 1.0304, + "step": 25225 + }, + { + "epoch": 0.03, + "learning_rate": 4.992213939497289e-05, + "loss": 1.1248, + "step": 25226 + }, + { + "epoch": 0.03, + "learning_rate": 4.992213320045645e-05, + "loss": 0.6368, + "step": 25227 + }, + { + "epoch": 0.03, + "learning_rate": 4.992212700569399e-05, + "loss": 0.8258, + "step": 25228 + }, + { + "epoch": 0.03, + "learning_rate": 4.99221208106855e-05, + "loss": 1.0213, + "step": 25229 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922114615431005e-05, + "loss": 1.1141, + "step": 25230 + }, + { + "epoch": 0.03, + "learning_rate": 4.992210841993048e-05, + "loss": 1.1015, + "step": 25231 + }, + { + "epoch": 0.03, + "learning_rate": 4.992210222418393e-05, + "loss": 0.5771, + "step": 25232 + }, + { + "epoch": 0.03, + "learning_rate": 4.992209602819137e-05, + "loss": 1.2469, + "step": 25233 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922089831952784e-05, + "loss": 0.7842, + "step": 25234 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922083635468174e-05, + "loss": 1.0711, + "step": 25235 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922077438737545e-05, + "loss": 1.0961, + "step": 25236 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922071241760904e-05, + "loss": 1.0949, + "step": 25237 + }, + { + "epoch": 0.03, + "learning_rate": 4.992206504453824e-05, + "loss": 0.9151, + "step": 25238 + }, + { + "epoch": 0.03, + "learning_rate": 4.992205884706955e-05, + "loss": 1.1718, + "step": 25239 + }, + { + "epoch": 0.03, + "learning_rate": 4.992205264935485e-05, + "loss": 1.1624, + "step": 25240 + }, + { + "epoch": 0.03, + "learning_rate": 4.992204645139412e-05, + "loss": 1.2207, + "step": 25241 + }, + { + "epoch": 0.03, + "learning_rate": 4.992204025318737e-05, + "loss": 1.1188, + "step": 25242 + }, + { + "epoch": 0.03, + "learning_rate": 4.99220340547346e-05, + "loss": 0.7908, + "step": 25243 + }, + { + "epoch": 0.03, + "learning_rate": 4.992202785603581e-05, + "loss": 0.8162, + "step": 25244 + }, + { + "epoch": 0.03, + "learning_rate": 4.992202165709101e-05, + "loss": 0.943, + "step": 25245 + }, + { + "epoch": 0.03, + "learning_rate": 4.9922015457900186e-05, + "loss": 1.1426, + "step": 25246 + }, + { + "epoch": 0.03, + "learning_rate": 4.992200925846334e-05, + "loss": 1.0149, + "step": 25247 + }, + { + "epoch": 0.03, + "learning_rate": 4.992200305878048e-05, + "loss": 1.3231, + "step": 25248 + }, + { + "epoch": 0.03, + "learning_rate": 4.992199685885159e-05, + "loss": 1.0122, + "step": 25249 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921990658676686e-05, + "loss": 0.7944, + "step": 25250 + }, + { + "epoch": 0.03, + "learning_rate": 4.992198445825576e-05, + "loss": 0.9803, + "step": 25251 + }, + { + "epoch": 0.03, + "learning_rate": 4.992197825758882e-05, + "loss": 1.1797, + "step": 25252 + }, + { + "epoch": 0.03, + "learning_rate": 4.992197205667586e-05, + "loss": 1.0542, + "step": 25253 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921965855516875e-05, + "loss": 0.7744, + "step": 25254 + }, + { + "epoch": 0.03, + "learning_rate": 4.992195965411187e-05, + "loss": 1.0356, + "step": 25255 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921953452460855e-05, + "loss": 1.0049, + "step": 25256 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921947250563814e-05, + "loss": 1.2333, + "step": 25257 + }, + { + "epoch": 0.03, + "learning_rate": 4.992194104842076e-05, + "loss": 0.927, + "step": 25258 + }, + { + "epoch": 0.03, + "learning_rate": 4.992193484603168e-05, + "loss": 0.9216, + "step": 25259 + }, + { + "epoch": 0.03, + "learning_rate": 4.992192864339659e-05, + "loss": 0.8418, + "step": 25260 + }, + { + "epoch": 0.03, + "learning_rate": 4.992192244051547e-05, + "loss": 1.0691, + "step": 25261 + }, + { + "epoch": 0.03, + "learning_rate": 4.992191623738835e-05, + "loss": 1.0355, + "step": 25262 + }, + { + "epoch": 0.03, + "learning_rate": 4.992191003401519e-05, + "loss": 1.0515, + "step": 25263 + }, + { + "epoch": 0.03, + "learning_rate": 4.992190383039602e-05, + "loss": 1.0592, + "step": 25264 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921897626530836e-05, + "loss": 1.1024, + "step": 25265 + }, + { + "epoch": 0.03, + "learning_rate": 4.992189142241962e-05, + "loss": 1.0361, + "step": 25266 + }, + { + "epoch": 0.03, + "learning_rate": 4.99218852180624e-05, + "loss": 1.2286, + "step": 25267 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921879013459153e-05, + "loss": 0.8063, + "step": 25268 + }, + { + "epoch": 0.03, + "learning_rate": 4.992187280860989e-05, + "loss": 1.1173, + "step": 25269 + }, + { + "epoch": 0.03, + "learning_rate": 4.992186660351461e-05, + "loss": 1.1498, + "step": 25270 + }, + { + "epoch": 0.03, + "learning_rate": 4.992186039817331e-05, + "loss": 1.1151, + "step": 25271 + }, + { + "epoch": 0.03, + "learning_rate": 4.992185419258599e-05, + "loss": 1.1275, + "step": 25272 + }, + { + "epoch": 0.03, + "learning_rate": 4.992184798675266e-05, + "loss": 1.0718, + "step": 25273 + }, + { + "epoch": 0.03, + "learning_rate": 4.992184178067331e-05, + "loss": 0.5926, + "step": 25274 + }, + { + "epoch": 0.03, + "learning_rate": 4.992183557434794e-05, + "loss": 0.4591, + "step": 25275 + }, + { + "epoch": 0.03, + "learning_rate": 4.992182936777655e-05, + "loss": 0.8349, + "step": 25276 + }, + { + "epoch": 0.03, + "learning_rate": 4.992182316095915e-05, + "loss": 0.9374, + "step": 25277 + }, + { + "epoch": 0.03, + "learning_rate": 4.992181695389572e-05, + "loss": 0.9491, + "step": 25278 + }, + { + "epoch": 0.03, + "learning_rate": 4.992181074658628e-05, + "loss": 0.9518, + "step": 25279 + }, + { + "epoch": 0.03, + "learning_rate": 4.992180453903082e-05, + "loss": 1.1468, + "step": 25280 + }, + { + "epoch": 0.03, + "learning_rate": 4.992179833122934e-05, + "loss": 1.1256, + "step": 25281 + }, + { + "epoch": 0.03, + "learning_rate": 4.992179212318185e-05, + "loss": 1.1141, + "step": 25282 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921785914888336e-05, + "loss": 0.7789, + "step": 25283 + }, + { + "epoch": 0.03, + "learning_rate": 4.992177970634881e-05, + "loss": 1.0126, + "step": 25284 + }, + { + "epoch": 0.03, + "learning_rate": 4.992177349756326e-05, + "loss": 1.0437, + "step": 25285 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921767288531694e-05, + "loss": 1.1684, + "step": 25286 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921761079254115e-05, + "loss": 1.0412, + "step": 25287 + }, + { + "epoch": 0.03, + "learning_rate": 4.992175486973052e-05, + "loss": 1.1102, + "step": 25288 + }, + { + "epoch": 0.03, + "learning_rate": 4.99217486599609e-05, + "loss": 0.7031, + "step": 25289 + }, + { + "epoch": 0.03, + "learning_rate": 4.992174244994527e-05, + "loss": 0.8885, + "step": 25290 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921736239683624e-05, + "loss": 1.207, + "step": 25291 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921730029175964e-05, + "loss": 0.8742, + "step": 25292 + }, + { + "epoch": 0.03, + "learning_rate": 4.992172381842227e-05, + "loss": 0.9332, + "step": 25293 + }, + { + "epoch": 0.03, + "learning_rate": 4.992171760742257e-05, + "loss": 0.8615, + "step": 25294 + }, + { + "epoch": 0.03, + "learning_rate": 4.992171139617686e-05, + "loss": 1.1136, + "step": 25295 + }, + { + "epoch": 0.03, + "learning_rate": 4.992170518468513e-05, + "loss": 1.2426, + "step": 25296 + }, + { + "epoch": 0.03, + "learning_rate": 4.992169897294738e-05, + "loss": 1.0677, + "step": 25297 + }, + { + "epoch": 0.03, + "learning_rate": 4.992169276096361e-05, + "loss": 1.1393, + "step": 25298 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921686548733824e-05, + "loss": 1.0658, + "step": 25299 + }, + { + "epoch": 0.03, + "learning_rate": 4.992168033625803e-05, + "loss": 1.0903, + "step": 25300 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921674123536214e-05, + "loss": 0.9362, + "step": 25301 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921667910568384e-05, + "loss": 0.988, + "step": 25302 + }, + { + "epoch": 0.03, + "learning_rate": 4.992166169735454e-05, + "loss": 1.1563, + "step": 25303 + }, + { + "epoch": 0.03, + "learning_rate": 4.992165548389467e-05, + "loss": 1.131, + "step": 25304 + }, + { + "epoch": 0.03, + "learning_rate": 4.99216492701888e-05, + "loss": 0.942, + "step": 25305 + }, + { + "epoch": 0.03, + "learning_rate": 4.99216430562369e-05, + "loss": 0.8895, + "step": 25306 + }, + { + "epoch": 0.03, + "learning_rate": 4.992163684203899e-05, + "loss": 0.8774, + "step": 25307 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921630627595065e-05, + "loss": 0.8934, + "step": 25308 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921624412905115e-05, + "loss": 1.1833, + "step": 25309 + }, + { + "epoch": 0.03, + "learning_rate": 4.992161819796915e-05, + "loss": 1.1606, + "step": 25310 + }, + { + "epoch": 0.03, + "learning_rate": 4.992161198278718e-05, + "loss": 0.9047, + "step": 25311 + }, + { + "epoch": 0.03, + "learning_rate": 4.992160576735919e-05, + "loss": 1.2609, + "step": 25312 + }, + { + "epoch": 0.03, + "learning_rate": 4.992159955168518e-05, + "loss": 0.7526, + "step": 25313 + }, + { + "epoch": 0.03, + "learning_rate": 4.992159333576516e-05, + "loss": 0.9345, + "step": 25314 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921587119599125e-05, + "loss": 1.1732, + "step": 25315 + }, + { + "epoch": 0.03, + "learning_rate": 4.992158090318707e-05, + "loss": 0.8698, + "step": 25316 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921574686529e-05, + "loss": 1.0294, + "step": 25317 + }, + { + "epoch": 0.03, + "learning_rate": 4.992156846962492e-05, + "loss": 0.7285, + "step": 25318 + }, + { + "epoch": 0.03, + "learning_rate": 4.992156225247482e-05, + "loss": 1.007, + "step": 25319 + }, + { + "epoch": 0.03, + "learning_rate": 4.992155603507871e-05, + "loss": 1.1879, + "step": 25320 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921549817436586e-05, + "loss": 0.8376, + "step": 25321 + }, + { + "epoch": 0.03, + "learning_rate": 4.992154359954844e-05, + "loss": 1.0078, + "step": 25322 + }, + { + "epoch": 0.03, + "learning_rate": 4.992153738141428e-05, + "loss": 0.9619, + "step": 25323 + }, + { + "epoch": 0.03, + "learning_rate": 4.992153116303411e-05, + "loss": 0.8041, + "step": 25324 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921524944407915e-05, + "loss": 1.242, + "step": 25325 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921518725535716e-05, + "loss": 1.06, + "step": 25326 + }, + { + "epoch": 0.03, + "learning_rate": 4.99215125064175e-05, + "loss": 0.8303, + "step": 25327 + }, + { + "epoch": 0.03, + "learning_rate": 4.992150628705327e-05, + "loss": 0.8614, + "step": 25328 + }, + { + "epoch": 0.03, + "learning_rate": 4.992150006744302e-05, + "loss": 1.1408, + "step": 25329 + }, + { + "epoch": 0.03, + "learning_rate": 4.992149384758675e-05, + "loss": 1.0482, + "step": 25330 + }, + { + "epoch": 0.03, + "learning_rate": 4.992148762748448e-05, + "loss": 0.8925, + "step": 25331 + }, + { + "epoch": 0.03, + "learning_rate": 4.992148140713619e-05, + "loss": 0.9556, + "step": 25332 + }, + { + "epoch": 0.03, + "learning_rate": 4.992147518654188e-05, + "loss": 0.7117, + "step": 25333 + }, + { + "epoch": 0.03, + "learning_rate": 4.992146896570157e-05, + "loss": 0.9339, + "step": 25334 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921462744615235e-05, + "loss": 0.9717, + "step": 25335 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921456523282887e-05, + "loss": 0.9243, + "step": 25336 + }, + { + "epoch": 0.03, + "learning_rate": 4.992145030170452e-05, + "loss": 1.0203, + "step": 25337 + }, + { + "epoch": 0.03, + "learning_rate": 4.992144407988014e-05, + "loss": 0.3971, + "step": 25338 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921437857809755e-05, + "loss": 0.578, + "step": 25339 + }, + { + "epoch": 0.03, + "learning_rate": 4.992143163549335e-05, + "loss": 0.4495, + "step": 25340 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921425412930935e-05, + "loss": 0.5035, + "step": 25341 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921419190122506e-05, + "loss": 0.5704, + "step": 25342 + }, + { + "epoch": 0.03, + "learning_rate": 4.992141296706806e-05, + "loss": 0.3427, + "step": 25343 + }, + { + "epoch": 0.03, + "learning_rate": 4.99214067437676e-05, + "loss": 1.1393, + "step": 25344 + }, + { + "epoch": 0.03, + "learning_rate": 4.992140052022113e-05, + "loss": 0.7993, + "step": 25345 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921394296428636e-05, + "loss": 0.8591, + "step": 25346 + }, + { + "epoch": 0.03, + "learning_rate": 4.992138807239014e-05, + "loss": 0.992, + "step": 25347 + }, + { + "epoch": 0.03, + "learning_rate": 4.992138184810563e-05, + "loss": 0.8876, + "step": 25348 + }, + { + "epoch": 0.03, + "learning_rate": 4.99213756235751e-05, + "loss": 1.2374, + "step": 25349 + }, + { + "epoch": 0.03, + "learning_rate": 4.992136939879856e-05, + "loss": 1.3325, + "step": 25350 + }, + { + "epoch": 0.03, + "learning_rate": 4.992136317377601e-05, + "loss": 0.8105, + "step": 25351 + }, + { + "epoch": 0.03, + "learning_rate": 4.992135694850744e-05, + "loss": 0.8143, + "step": 25352 + }, + { + "epoch": 0.03, + "learning_rate": 4.992135072299286e-05, + "loss": 1.0643, + "step": 25353 + }, + { + "epoch": 0.03, + "learning_rate": 4.992134449723227e-05, + "loss": 1.0418, + "step": 25354 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921338271225665e-05, + "loss": 1.0012, + "step": 25355 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921332044973045e-05, + "loss": 0.9529, + "step": 25356 + }, + { + "epoch": 0.03, + "learning_rate": 4.992132581847442e-05, + "loss": 0.7987, + "step": 25357 + }, + { + "epoch": 0.03, + "learning_rate": 4.992131959172977e-05, + "loss": 1.1857, + "step": 25358 + }, + { + "epoch": 0.03, + "learning_rate": 4.992131336473911e-05, + "loss": 1.0198, + "step": 25359 + }, + { + "epoch": 0.03, + "learning_rate": 4.992130713750244e-05, + "loss": 0.8573, + "step": 25360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921300910019767e-05, + "loss": 0.6895, + "step": 25361 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921294682291066e-05, + "loss": 0.4411, + "step": 25362 + }, + { + "epoch": 0.03, + "learning_rate": 4.992128845431636e-05, + "loss": 0.2818, + "step": 25363 + }, + { + "epoch": 0.03, + "learning_rate": 4.992128222609564e-05, + "loss": 0.4128, + "step": 25364 + }, + { + "epoch": 0.03, + "learning_rate": 4.992127599762891e-05, + "loss": 0.5094, + "step": 25365 + }, + { + "epoch": 0.03, + "learning_rate": 4.992126976891616e-05, + "loss": 0.6093, + "step": 25366 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921263539957406e-05, + "loss": 0.448, + "step": 25367 + }, + { + "epoch": 0.03, + "learning_rate": 4.992125731075263e-05, + "loss": 0.7534, + "step": 25368 + }, + { + "epoch": 0.03, + "learning_rate": 4.992125108130186e-05, + "loss": 0.9877, + "step": 25369 + }, + { + "epoch": 0.03, + "learning_rate": 4.992124485160507e-05, + "loss": 1.4394, + "step": 25370 + }, + { + "epoch": 0.03, + "learning_rate": 4.992123862166226e-05, + "loss": 0.6872, + "step": 25371 + }, + { + "epoch": 0.03, + "learning_rate": 4.992123239147344e-05, + "loss": 1.0095, + "step": 25372 + }, + { + "epoch": 0.03, + "learning_rate": 4.992122616103861e-05, + "loss": 1.1616, + "step": 25373 + }, + { + "epoch": 0.03, + "learning_rate": 4.992121993035777e-05, + "loss": 1.0794, + "step": 25374 + }, + { + "epoch": 0.03, + "learning_rate": 4.992121369943092e-05, + "loss": 0.8517, + "step": 25375 + }, + { + "epoch": 0.03, + "learning_rate": 4.992120746825805e-05, + "loss": 1.1044, + "step": 25376 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921201236839176e-05, + "loss": 1.3176, + "step": 25377 + }, + { + "epoch": 0.03, + "learning_rate": 4.992119500517429e-05, + "loss": 1.0207, + "step": 25378 + }, + { + "epoch": 0.03, + "learning_rate": 4.992118877326339e-05, + "loss": 1.0954, + "step": 25379 + }, + { + "epoch": 0.03, + "learning_rate": 4.992118254110648e-05, + "loss": 1.1591, + "step": 25380 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921176308703556e-05, + "loss": 1.0753, + "step": 25381 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921170076054624e-05, + "loss": 0.8058, + "step": 25382 + }, + { + "epoch": 0.03, + "learning_rate": 4.992116384315968e-05, + "loss": 0.9258, + "step": 25383 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921157610018724e-05, + "loss": 1.0384, + "step": 25384 + }, + { + "epoch": 0.03, + "learning_rate": 4.992115137663176e-05, + "loss": 1.3298, + "step": 25385 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921145142998774e-05, + "loss": 1.078, + "step": 25386 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921138909119794e-05, + "loss": 0.657, + "step": 25387 + }, + { + "epoch": 0.03, + "learning_rate": 4.992113267499479e-05, + "loss": 0.6318, + "step": 25388 + }, + { + "epoch": 0.03, + "learning_rate": 4.992112644062378e-05, + "loss": 0.159, + "step": 25389 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921120206006755e-05, + "loss": 0.1065, + "step": 25390 + }, + { + "epoch": 0.03, + "learning_rate": 4.992111397114372e-05, + "loss": 0.0809, + "step": 25391 + }, + { + "epoch": 0.03, + "learning_rate": 4.992110773603468e-05, + "loss": 0.5427, + "step": 25392 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921101500679625e-05, + "loss": 0.2939, + "step": 25393 + }, + { + "epoch": 0.03, + "learning_rate": 4.992109526507855e-05, + "loss": 0.3915, + "step": 25394 + }, + { + "epoch": 0.03, + "learning_rate": 4.992108902923148e-05, + "loss": 0.5509, + "step": 25395 + }, + { + "epoch": 0.03, + "learning_rate": 4.99210827931384e-05, + "loss": 0.2196, + "step": 25396 + }, + { + "epoch": 0.03, + "learning_rate": 4.99210765567993e-05, + "loss": 0.0504, + "step": 25397 + }, + { + "epoch": 0.03, + "learning_rate": 4.992107032021419e-05, + "loss": 0.058, + "step": 25398 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921064083383076e-05, + "loss": 0.4688, + "step": 25399 + }, + { + "epoch": 0.03, + "learning_rate": 4.992105784630595e-05, + "loss": 0.3038, + "step": 25400 + }, + { + "epoch": 0.03, + "learning_rate": 4.992105160898281e-05, + "loss": 0.0728, + "step": 25401 + }, + { + "epoch": 0.03, + "learning_rate": 4.992104537141366e-05, + "loss": 0.0445, + "step": 25402 + }, + { + "epoch": 0.03, + "learning_rate": 4.992103913359851e-05, + "loss": 0.3594, + "step": 25403 + }, + { + "epoch": 0.03, + "learning_rate": 4.992103289553734e-05, + "loss": 0.4276, + "step": 25404 + }, + { + "epoch": 0.03, + "learning_rate": 4.992102665723016e-05, + "loss": 0.5449, + "step": 25405 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921020418676975e-05, + "loss": 0.9461, + "step": 25406 + }, + { + "epoch": 0.03, + "learning_rate": 4.992101417987778e-05, + "loss": 0.9638, + "step": 25407 + }, + { + "epoch": 0.03, + "learning_rate": 4.992100794083257e-05, + "loss": 1.2127, + "step": 25408 + }, + { + "epoch": 0.03, + "learning_rate": 4.992100170154135e-05, + "loss": 1.145, + "step": 25409 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920995462004124e-05, + "loss": 0.9204, + "step": 25410 + }, + { + "epoch": 0.03, + "learning_rate": 4.992098922222089e-05, + "loss": 1.1378, + "step": 25411 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920982982191646e-05, + "loss": 1.0549, + "step": 25412 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920976741916395e-05, + "loss": 1.057, + "step": 25413 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920970501395126e-05, + "loss": 0.9796, + "step": 25414 + }, + { + "epoch": 0.03, + "learning_rate": 4.992096426062786e-05, + "loss": 0.8747, + "step": 25415 + }, + { + "epoch": 0.03, + "learning_rate": 4.992095801961457e-05, + "loss": 0.984, + "step": 25416 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920951778355277e-05, + "loss": 0.627, + "step": 25417 + }, + { + "epoch": 0.03, + "learning_rate": 4.992094553684997e-05, + "loss": 0.7613, + "step": 25418 + }, + { + "epoch": 0.03, + "learning_rate": 4.992093929509867e-05, + "loss": 0.9423, + "step": 25419 + }, + { + "epoch": 0.03, + "learning_rate": 4.992093305310135e-05, + "loss": 0.7867, + "step": 25420 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920926810858016e-05, + "loss": 0.8356, + "step": 25421 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920920568368675e-05, + "loss": 1.2746, + "step": 25422 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920914325633336e-05, + "loss": 1.0376, + "step": 25423 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920908082651984e-05, + "loss": 1.2483, + "step": 25424 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920901839424614e-05, + "loss": 0.8622, + "step": 25425 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920895595951245e-05, + "loss": 1.0395, + "step": 25426 + }, + { + "epoch": 0.03, + "learning_rate": 4.992088935223186e-05, + "loss": 1.1181, + "step": 25427 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920883108266476e-05, + "loss": 0.7554, + "step": 25428 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920876864055077e-05, + "loss": 0.8817, + "step": 25429 + }, + { + "epoch": 0.03, + "learning_rate": 4.992087061959767e-05, + "loss": 0.862, + "step": 25430 + }, + { + "epoch": 0.03, + "learning_rate": 4.992086437489426e-05, + "loss": 0.912, + "step": 25431 + }, + { + "epoch": 0.03, + "learning_rate": 4.992085812994484e-05, + "loss": 0.9013, + "step": 25432 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920851884749404e-05, + "loss": 1.0225, + "step": 25433 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920845639307964e-05, + "loss": 1.1501, + "step": 25434 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920839393620525e-05, + "loss": 0.6082, + "step": 25435 + }, + { + "epoch": 0.03, + "learning_rate": 4.992083314768706e-05, + "loss": 0.9016, + "step": 25436 + }, + { + "epoch": 0.03, + "learning_rate": 4.99208269015076e-05, + "loss": 0.858, + "step": 25437 + }, + { + "epoch": 0.03, + "learning_rate": 4.992082065508213e-05, + "loss": 1.2233, + "step": 25438 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920814408410654e-05, + "loss": 0.6477, + "step": 25439 + }, + { + "epoch": 0.03, + "learning_rate": 4.992080816149316e-05, + "loss": 0.7057, + "step": 25440 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920801914329675e-05, + "loss": 1.3023, + "step": 25441 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920795666920164e-05, + "loss": 1.0188, + "step": 25442 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920789419264654e-05, + "loss": 0.5966, + "step": 25443 + }, + { + "epoch": 0.03, + "learning_rate": 4.992078317136314e-05, + "loss": 0.5925, + "step": 25444 + }, + { + "epoch": 0.03, + "learning_rate": 4.992077692321562e-05, + "loss": 0.6258, + "step": 25445 + }, + { + "epoch": 0.03, + "learning_rate": 4.992077067482208e-05, + "loss": 0.5038, + "step": 25446 + }, + { + "epoch": 0.03, + "learning_rate": 4.992076442618254e-05, + "loss": 0.589, + "step": 25447 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920758177297e-05, + "loss": 0.4842, + "step": 25448 + }, + { + "epoch": 0.03, + "learning_rate": 4.992075192816545e-05, + "loss": 0.4888, + "step": 25449 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920745678787885e-05, + "loss": 0.398, + "step": 25450 + }, + { + "epoch": 0.03, + "learning_rate": 4.992073942916432e-05, + "loss": 0.2454, + "step": 25451 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920733179294745e-05, + "loss": 0.2128, + "step": 25452 + }, + { + "epoch": 0.03, + "learning_rate": 4.992072692917916e-05, + "loss": 0.2167, + "step": 25453 + }, + { + "epoch": 0.03, + "learning_rate": 4.992072067881757e-05, + "loss": 0.745, + "step": 25454 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920714428209974e-05, + "loss": 0.793, + "step": 25455 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920708177356366e-05, + "loss": 0.8001, + "step": 25456 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920701926256766e-05, + "loss": 4.6841, + "step": 25457 + }, + { + "epoch": 0.03, + "learning_rate": 4.992069567491115e-05, + "loss": 1.0981, + "step": 25458 + }, + { + "epoch": 0.03, + "learning_rate": 4.992068942331952e-05, + "loss": 1.1137, + "step": 25459 + }, + { + "epoch": 0.03, + "learning_rate": 4.99206831714819e-05, + "loss": 0.9468, + "step": 25460 + }, + { + "epoch": 0.03, + "learning_rate": 4.992067691939826e-05, + "loss": 0.6921, + "step": 25461 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920670667068616e-05, + "loss": 0.9783, + "step": 25462 + }, + { + "epoch": 0.03, + "learning_rate": 4.992066441449297e-05, + "loss": 0.9143, + "step": 25463 + }, + { + "epoch": 0.03, + "learning_rate": 4.992065816167131e-05, + "loss": 0.9276, + "step": 25464 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920651908603654e-05, + "loss": 1.2974, + "step": 25465 + }, + { + "epoch": 0.03, + "learning_rate": 4.992064565528999e-05, + "loss": 0.9993, + "step": 25466 + }, + { + "epoch": 0.03, + "learning_rate": 4.992063940173031e-05, + "loss": 0.8887, + "step": 25467 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920633147924636e-05, + "loss": 1.1553, + "step": 25468 + }, + { + "epoch": 0.03, + "learning_rate": 4.992062689387294e-05, + "loss": 1.2363, + "step": 25469 + }, + { + "epoch": 0.03, + "learning_rate": 4.992062063957526e-05, + "loss": 1.0574, + "step": 25470 + }, + { + "epoch": 0.03, + "learning_rate": 4.992061438503156e-05, + "loss": 0.8442, + "step": 25471 + }, + { + "epoch": 0.03, + "learning_rate": 4.992060813024185e-05, + "loss": 1.1638, + "step": 25472 + }, + { + "epoch": 0.03, + "learning_rate": 4.992060187520615e-05, + "loss": 1.197, + "step": 25473 + }, + { + "epoch": 0.03, + "learning_rate": 4.992059561992444e-05, + "loss": 1.3049, + "step": 25474 + }, + { + "epoch": 0.03, + "learning_rate": 4.992058936439671e-05, + "loss": 1.3725, + "step": 25475 + }, + { + "epoch": 0.03, + "learning_rate": 4.992058310862299e-05, + "loss": 0.7517, + "step": 25476 + }, + { + "epoch": 0.03, + "learning_rate": 4.992057685260326e-05, + "loss": 1.2874, + "step": 25477 + }, + { + "epoch": 0.03, + "learning_rate": 4.992057059633752e-05, + "loss": 1.1467, + "step": 25478 + }, + { + "epoch": 0.03, + "learning_rate": 4.992056433982578e-05, + "loss": 1.0138, + "step": 25479 + }, + { + "epoch": 0.03, + "learning_rate": 4.992055808306803e-05, + "loss": 1.0434, + "step": 25480 + }, + { + "epoch": 0.03, + "learning_rate": 4.992055182606428e-05, + "loss": 0.7546, + "step": 25481 + }, + { + "epoch": 0.03, + "learning_rate": 4.992054556881453e-05, + "loss": 0.9228, + "step": 25482 + }, + { + "epoch": 0.03, + "learning_rate": 4.992053931131876e-05, + "loss": 0.9298, + "step": 25483 + }, + { + "epoch": 0.03, + "learning_rate": 4.992053305357699e-05, + "loss": 1.0157, + "step": 25484 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920526795589216e-05, + "loss": 0.7477, + "step": 25485 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920520537355444e-05, + "loss": 1.2381, + "step": 25486 + }, + { + "epoch": 0.03, + "learning_rate": 4.992051427887566e-05, + "loss": 0.9527, + "step": 25487 + }, + { + "epoch": 0.03, + "learning_rate": 4.992050802014987e-05, + "loss": 1.1989, + "step": 25488 + }, + { + "epoch": 0.03, + "learning_rate": 4.992050176117808e-05, + "loss": 1.077, + "step": 25489 + }, + { + "epoch": 0.03, + "learning_rate": 4.992049550196029e-05, + "loss": 0.9295, + "step": 25490 + }, + { + "epoch": 0.03, + "learning_rate": 4.992048924249648e-05, + "loss": 1.1007, + "step": 25491 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920482982786675e-05, + "loss": 0.9844, + "step": 25492 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920476722830865e-05, + "loss": 0.5751, + "step": 25493 + }, + { + "epoch": 0.03, + "learning_rate": 4.992047046262905e-05, + "loss": 0.7737, + "step": 25494 + }, + { + "epoch": 0.03, + "learning_rate": 4.992046420218123e-05, + "loss": 0.7894, + "step": 25495 + }, + { + "epoch": 0.03, + "learning_rate": 4.992045794148741e-05, + "loss": 0.9189, + "step": 25496 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920451680547575e-05, + "loss": 0.8127, + "step": 25497 + }, + { + "epoch": 0.03, + "learning_rate": 4.992044541936175e-05, + "loss": 0.7078, + "step": 25498 + }, + { + "epoch": 0.03, + "learning_rate": 4.992043915792991e-05, + "loss": 1.1538, + "step": 25499 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920432896252065e-05, + "loss": 0.8989, + "step": 25500 + }, + { + "epoch": 0.03, + "eval_loss": 1.0479439496994019, + "eval_runtime": 128.7307, + "eval_samples_per_second": 10.759, + "eval_steps_per_second": 5.383, + "step": 25500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920426634328224e-05, + "loss": 0.692, + "step": 25501 + }, + { + "epoch": 0.03, + "learning_rate": 4.992042037215838e-05, + "loss": 0.7568, + "step": 25502 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920414109742526e-05, + "loss": 0.9906, + "step": 25503 + }, + { + "epoch": 0.03, + "learning_rate": 4.992040784708067e-05, + "loss": 1.1287, + "step": 25504 + }, + { + "epoch": 0.03, + "learning_rate": 4.992040158417281e-05, + "loss": 0.8113, + "step": 25505 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920395321018945e-05, + "loss": 0.9368, + "step": 25506 + }, + { + "epoch": 0.03, + "learning_rate": 4.992038905761908e-05, + "loss": 0.6073, + "step": 25507 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920382793973206e-05, + "loss": 0.8541, + "step": 25508 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920376530081335e-05, + "loss": 1.3915, + "step": 25509 + }, + { + "epoch": 0.03, + "learning_rate": 4.992037026594346e-05, + "loss": 1.0817, + "step": 25510 + }, + { + "epoch": 0.03, + "learning_rate": 4.992036400155957e-05, + "loss": 1.0359, + "step": 25511 + }, + { + "epoch": 0.03, + "learning_rate": 4.992035773692969e-05, + "loss": 1.1118, + "step": 25512 + }, + { + "epoch": 0.03, + "learning_rate": 4.99203514720538e-05, + "loss": 1.0692, + "step": 25513 + }, + { + "epoch": 0.03, + "learning_rate": 4.992034520693191e-05, + "loss": 1.0317, + "step": 25514 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920338941564014e-05, + "loss": 1.0124, + "step": 25515 + }, + { + "epoch": 0.03, + "learning_rate": 4.992033267595012e-05, + "loss": 0.8217, + "step": 25516 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920326410090224e-05, + "loss": 0.8563, + "step": 25517 + }, + { + "epoch": 0.03, + "learning_rate": 4.992032014398432e-05, + "loss": 0.8516, + "step": 25518 + }, + { + "epoch": 0.03, + "learning_rate": 4.992031387763241e-05, + "loss": 0.8468, + "step": 25519 + }, + { + "epoch": 0.03, + "learning_rate": 4.99203076110345e-05, + "loss": 1.0821, + "step": 25520 + }, + { + "epoch": 0.03, + "learning_rate": 4.992030134419059e-05, + "loss": 0.6678, + "step": 25521 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920295077100676e-05, + "loss": 0.8539, + "step": 25522 + }, + { + "epoch": 0.03, + "learning_rate": 4.992028880976476e-05, + "loss": 0.8829, + "step": 25523 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920282542182836e-05, + "loss": 1.0373, + "step": 25524 + }, + { + "epoch": 0.03, + "learning_rate": 4.992027627435492e-05, + "loss": 1.1651, + "step": 25525 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920270006280995e-05, + "loss": 0.9403, + "step": 25526 + }, + { + "epoch": 0.03, + "learning_rate": 4.992026373796107e-05, + "loss": 1.0858, + "step": 25527 + }, + { + "epoch": 0.03, + "learning_rate": 4.992025746939514e-05, + "loss": 1.04, + "step": 25528 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920251200583205e-05, + "loss": 0.6519, + "step": 25529 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920244931525274e-05, + "loss": 0.8379, + "step": 25530 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920238662221337e-05, + "loss": 0.929, + "step": 25531 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920232392671394e-05, + "loss": 0.9804, + "step": 25532 + }, + { + "epoch": 0.03, + "learning_rate": 4.992022612287546e-05, + "loss": 1.2467, + "step": 25533 + }, + { + "epoch": 0.03, + "learning_rate": 4.992021985283352e-05, + "loss": 1.1121, + "step": 25534 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920213582545574e-05, + "loss": 0.8523, + "step": 25535 + }, + { + "epoch": 0.03, + "learning_rate": 4.992020731201162e-05, + "loss": 1.1252, + "step": 25536 + }, + { + "epoch": 0.03, + "learning_rate": 4.992020104123168e-05, + "loss": 0.9649, + "step": 25537 + }, + { + "epoch": 0.03, + "learning_rate": 4.992019477020573e-05, + "loss": 1.0461, + "step": 25538 + }, + { + "epoch": 0.03, + "learning_rate": 4.992018849893378e-05, + "loss": 0.8588, + "step": 25539 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920182227415825e-05, + "loss": 0.7761, + "step": 25540 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920175955651874e-05, + "loss": 0.8121, + "step": 25541 + }, + { + "epoch": 0.03, + "learning_rate": 4.992016968364192e-05, + "loss": 1.1637, + "step": 25542 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920163411385955e-05, + "loss": 0.9679, + "step": 25543 + }, + { + "epoch": 0.03, + "learning_rate": 4.992015713888401e-05, + "loss": 0.9611, + "step": 25544 + }, + { + "epoch": 0.03, + "learning_rate": 4.992015086613604e-05, + "loss": 0.9391, + "step": 25545 + }, + { + "epoch": 0.03, + "learning_rate": 4.992014459314208e-05, + "loss": 0.7813, + "step": 25546 + }, + { + "epoch": 0.03, + "learning_rate": 4.992013831990212e-05, + "loss": 0.84, + "step": 25547 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920132046416156e-05, + "loss": 0.8469, + "step": 25548 + }, + { + "epoch": 0.03, + "learning_rate": 4.992012577268419e-05, + "loss": 0.5595, + "step": 25549 + }, + { + "epoch": 0.03, + "learning_rate": 4.992011949870622e-05, + "loss": 0.7381, + "step": 25550 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920113224482256e-05, + "loss": 0.9399, + "step": 25551 + }, + { + "epoch": 0.03, + "learning_rate": 4.992010695001229e-05, + "loss": 0.6042, + "step": 25552 + }, + { + "epoch": 0.03, + "learning_rate": 4.992010067529632e-05, + "loss": 0.824, + "step": 25553 + }, + { + "epoch": 0.03, + "learning_rate": 4.992009440033436e-05, + "loss": 0.7738, + "step": 25554 + }, + { + "epoch": 0.03, + "learning_rate": 4.992008812512639e-05, + "loss": 0.8869, + "step": 25555 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920081849672415e-05, + "loss": 0.9603, + "step": 25556 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920075573972444e-05, + "loss": 0.9571, + "step": 25557 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920069298026474e-05, + "loss": 1.2069, + "step": 25558 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920063021834506e-05, + "loss": 0.2281, + "step": 25559 + }, + { + "epoch": 0.03, + "learning_rate": 4.992005674539654e-05, + "loss": 0.666, + "step": 25560 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920050468712566e-05, + "loss": 0.8789, + "step": 25561 + }, + { + "epoch": 0.03, + "learning_rate": 4.992004419178259e-05, + "loss": 1.0569, + "step": 25562 + }, + { + "epoch": 0.03, + "learning_rate": 4.992003791460662e-05, + "loss": 0.9824, + "step": 25563 + }, + { + "epoch": 0.03, + "learning_rate": 4.992003163718464e-05, + "loss": 0.7829, + "step": 25564 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920025359516675e-05, + "loss": 0.9022, + "step": 25565 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920019081602695e-05, + "loss": 1.1186, + "step": 25566 + }, + { + "epoch": 0.03, + "learning_rate": 4.992001280344273e-05, + "loss": 1.138, + "step": 25567 + }, + { + "epoch": 0.03, + "learning_rate": 4.992000652503675e-05, + "loss": 1.0575, + "step": 25568 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920000246384784e-05, + "loss": 0.7972, + "step": 25569 + }, + { + "epoch": 0.03, + "learning_rate": 4.991999396748681e-05, + "loss": 1.0315, + "step": 25570 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919987688342837e-05, + "loss": 1.0861, + "step": 25571 + }, + { + "epoch": 0.03, + "learning_rate": 4.991998140895287e-05, + "loss": 0.9074, + "step": 25572 + }, + { + "epoch": 0.03, + "learning_rate": 4.99199751293169e-05, + "loss": 1.1712, + "step": 25573 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919968849434925e-05, + "loss": 1.0447, + "step": 25574 + }, + { + "epoch": 0.03, + "learning_rate": 4.991996256930696e-05, + "loss": 0.8542, + "step": 25575 + }, + { + "epoch": 0.03, + "learning_rate": 4.991995628893299e-05, + "loss": 0.9268, + "step": 25576 + }, + { + "epoch": 0.03, + "learning_rate": 4.991995000831302e-05, + "loss": 0.9937, + "step": 25577 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919943727447054e-05, + "loss": 0.9313, + "step": 25578 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919937446335085e-05, + "loss": 1.0095, + "step": 25579 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919931164977116e-05, + "loss": 1.2027, + "step": 25580 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919924883373156e-05, + "loss": 0.9313, + "step": 25581 + }, + { + "epoch": 0.03, + "learning_rate": 4.99199186015232e-05, + "loss": 0.8642, + "step": 25582 + }, + { + "epoch": 0.03, + "learning_rate": 4.991991231942723e-05, + "loss": 0.9955, + "step": 25583 + }, + { + "epoch": 0.03, + "learning_rate": 4.991990603708527e-05, + "loss": 1.305, + "step": 25584 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919899754497315e-05, + "loss": 1.0399, + "step": 25585 + }, + { + "epoch": 0.03, + "learning_rate": 4.991989347166335e-05, + "loss": 1.0497, + "step": 25586 + }, + { + "epoch": 0.03, + "learning_rate": 4.991988718858339e-05, + "loss": 0.7731, + "step": 25587 + }, + { + "epoch": 0.03, + "learning_rate": 4.991988090525743e-05, + "loss": 1.1434, + "step": 25588 + }, + { + "epoch": 0.03, + "learning_rate": 4.991987462168548e-05, + "loss": 1.1553, + "step": 25589 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919868337867526e-05, + "loss": 0.8772, + "step": 25590 + }, + { + "epoch": 0.03, + "learning_rate": 4.991986205380357e-05, + "loss": 1.0083, + "step": 25591 + }, + { + "epoch": 0.03, + "learning_rate": 4.991985576949362e-05, + "loss": 0.9481, + "step": 25592 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919849484937675e-05, + "loss": 0.8152, + "step": 25593 + }, + { + "epoch": 0.03, + "learning_rate": 4.991984320013573e-05, + "loss": 1.0207, + "step": 25594 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919836915087783e-05, + "loss": 1.0346, + "step": 25595 + }, + { + "epoch": 0.03, + "learning_rate": 4.991983062979384e-05, + "loss": 0.9336, + "step": 25596 + }, + { + "epoch": 0.03, + "learning_rate": 4.99198243442539e-05, + "loss": 1.0044, + "step": 25597 + }, + { + "epoch": 0.03, + "learning_rate": 4.991981805846796e-05, + "loss": 0.8974, + "step": 25598 + }, + { + "epoch": 0.03, + "learning_rate": 4.991981177243602e-05, + "loss": 0.9835, + "step": 25599 + }, + { + "epoch": 0.03, + "learning_rate": 4.991980548615809e-05, + "loss": 1.0279, + "step": 25600 + }, + { + "epoch": 0.03, + "learning_rate": 4.991979919963415e-05, + "loss": 1.0142, + "step": 25601 + }, + { + "epoch": 0.03, + "learning_rate": 4.991979291286422e-05, + "loss": 1.5041, + "step": 25602 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919786625848295e-05, + "loss": 0.8546, + "step": 25603 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919780338586365e-05, + "loss": 0.7064, + "step": 25604 + }, + { + "epoch": 0.03, + "learning_rate": 4.991977405107844e-05, + "loss": 0.9156, + "step": 25605 + }, + { + "epoch": 0.03, + "learning_rate": 4.991976776332452e-05, + "loss": 1.0011, + "step": 25606 + }, + { + "epoch": 0.03, + "learning_rate": 4.99197614753246e-05, + "loss": 0.6444, + "step": 25607 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919755187078685e-05, + "loss": 0.6394, + "step": 25608 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919748898586775e-05, + "loss": 0.8335, + "step": 25609 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919742609848866e-05, + "loss": 0.7045, + "step": 25610 + }, + { + "epoch": 0.03, + "learning_rate": 4.991973632086496e-05, + "loss": 0.985, + "step": 25611 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919730031635045e-05, + "loss": 0.7575, + "step": 25612 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919723742159154e-05, + "loss": 1.043, + "step": 25613 + }, + { + "epoch": 0.03, + "learning_rate": 4.991971745243725e-05, + "loss": 0.7751, + "step": 25614 + }, + { + "epoch": 0.03, + "learning_rate": 4.991971116246935e-05, + "loss": 0.6379, + "step": 25615 + }, + { + "epoch": 0.03, + "learning_rate": 4.991970487225546e-05, + "loss": 0.6742, + "step": 25616 + }, + { + "epoch": 0.03, + "learning_rate": 4.991969858179557e-05, + "loss": 0.2412, + "step": 25617 + }, + { + "epoch": 0.03, + "learning_rate": 4.991969229108969e-05, + "loss": 0.1725, + "step": 25618 + }, + { + "epoch": 0.03, + "learning_rate": 4.99196860001378e-05, + "loss": 0.2412, + "step": 25619 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919679708939924e-05, + "loss": 0.5532, + "step": 25620 + }, + { + "epoch": 0.03, + "learning_rate": 4.991967341749605e-05, + "loss": 0.8368, + "step": 25621 + }, + { + "epoch": 0.03, + "learning_rate": 4.991966712580618e-05, + "loss": 0.9496, + "step": 25622 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919660833870306e-05, + "loss": 1.1434, + "step": 25623 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919654541688435e-05, + "loss": 1.1843, + "step": 25624 + }, + { + "epoch": 0.03, + "learning_rate": 4.991964824926057e-05, + "loss": 1.2085, + "step": 25625 + }, + { + "epoch": 0.03, + "learning_rate": 4.991964195658671e-05, + "loss": 0.8527, + "step": 25626 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919635663666867e-05, + "loss": 0.8021, + "step": 25627 + }, + { + "epoch": 0.03, + "learning_rate": 4.991962937050101e-05, + "loss": 0.6328, + "step": 25628 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919623077089165e-05, + "loss": 0.9434, + "step": 25629 + }, + { + "epoch": 0.03, + "learning_rate": 4.991961678343132e-05, + "loss": 0.7023, + "step": 25630 + }, + { + "epoch": 0.03, + "learning_rate": 4.991961048952748e-05, + "loss": 0.968, + "step": 25631 + }, + { + "epoch": 0.03, + "learning_rate": 4.991960419537765e-05, + "loss": 0.4385, + "step": 25632 + }, + { + "epoch": 0.03, + "learning_rate": 4.991959790098182e-05, + "loss": 0.5095, + "step": 25633 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919591606339995e-05, + "loss": 1.0062, + "step": 25634 + }, + { + "epoch": 0.03, + "learning_rate": 4.991958531145217e-05, + "loss": 1.0049, + "step": 25635 + }, + { + "epoch": 0.03, + "learning_rate": 4.991957901631835e-05, + "loss": 0.9749, + "step": 25636 + }, + { + "epoch": 0.03, + "learning_rate": 4.991957272093854e-05, + "loss": 0.8883, + "step": 25637 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919566425312735e-05, + "loss": 1.0521, + "step": 25638 + }, + { + "epoch": 0.03, + "learning_rate": 4.991956012944093e-05, + "loss": 0.79, + "step": 25639 + }, + { + "epoch": 0.03, + "learning_rate": 4.991955383332313e-05, + "loss": 0.8642, + "step": 25640 + }, + { + "epoch": 0.03, + "learning_rate": 4.991954753695933e-05, + "loss": 0.9301, + "step": 25641 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919541240349543e-05, + "loss": 0.4734, + "step": 25642 + }, + { + "epoch": 0.03, + "learning_rate": 4.991953494349376e-05, + "loss": 0.4748, + "step": 25643 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919528646391976e-05, + "loss": 1.0601, + "step": 25644 + }, + { + "epoch": 0.03, + "learning_rate": 4.99195223490442e-05, + "loss": 0.9702, + "step": 25645 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919516051450434e-05, + "loss": 1.0483, + "step": 25646 + }, + { + "epoch": 0.03, + "learning_rate": 4.991950975361067e-05, + "loss": 0.9423, + "step": 25647 + }, + { + "epoch": 0.03, + "learning_rate": 4.991950345552491e-05, + "loss": 0.9092, + "step": 25648 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919497157193154e-05, + "loss": 1.1845, + "step": 25649 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919490858615406e-05, + "loss": 1.1256, + "step": 25650 + }, + { + "epoch": 0.03, + "learning_rate": 4.991948455979165e-05, + "loss": 0.8735, + "step": 25651 + }, + { + "epoch": 0.03, + "learning_rate": 4.991947826072192e-05, + "loss": 0.8359, + "step": 25652 + }, + { + "epoch": 0.03, + "learning_rate": 4.991947196140618e-05, + "loss": 1.1225, + "step": 25653 + }, + { + "epoch": 0.03, + "learning_rate": 4.991946566184445e-05, + "loss": 0.9565, + "step": 25654 + }, + { + "epoch": 0.03, + "learning_rate": 4.991945936203673e-05, + "loss": 0.9137, + "step": 25655 + }, + { + "epoch": 0.03, + "learning_rate": 4.991945306198301e-05, + "loss": 0.8453, + "step": 25656 + }, + { + "epoch": 0.03, + "learning_rate": 4.99194467616833e-05, + "loss": 0.981, + "step": 25657 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919440461137596e-05, + "loss": 0.7872, + "step": 25658 + }, + { + "epoch": 0.03, + "learning_rate": 4.991943416034589e-05, + "loss": 0.7202, + "step": 25659 + }, + { + "epoch": 0.03, + "learning_rate": 4.99194278593082e-05, + "loss": 0.6923, + "step": 25660 + }, + { + "epoch": 0.03, + "learning_rate": 4.991942155802451e-05, + "loss": 0.7891, + "step": 25661 + }, + { + "epoch": 0.03, + "learning_rate": 4.991941525649483e-05, + "loss": 0.7018, + "step": 25662 + }, + { + "epoch": 0.03, + "learning_rate": 4.991940895471915e-05, + "loss": 0.8904, + "step": 25663 + }, + { + "epoch": 0.03, + "learning_rate": 4.991940265269748e-05, + "loss": 1.0819, + "step": 25664 + }, + { + "epoch": 0.03, + "learning_rate": 4.991939635042981e-05, + "loss": 1.227, + "step": 25665 + }, + { + "epoch": 0.03, + "learning_rate": 4.991939004791616e-05, + "loss": 1.1283, + "step": 25666 + }, + { + "epoch": 0.03, + "learning_rate": 4.991938374515651e-05, + "loss": 1.3979, + "step": 25667 + }, + { + "epoch": 0.03, + "learning_rate": 4.991937744215086e-05, + "loss": 1.1434, + "step": 25668 + }, + { + "epoch": 0.03, + "learning_rate": 4.991937113889922e-05, + "loss": 1.0276, + "step": 25669 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919364835401596e-05, + "loss": 0.7966, + "step": 25670 + }, + { + "epoch": 0.03, + "learning_rate": 4.991935853165797e-05, + "loss": 1.2742, + "step": 25671 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919352227668345e-05, + "loss": 0.9509, + "step": 25672 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919345923432735e-05, + "loss": 0.9776, + "step": 25673 + }, + { + "epoch": 0.03, + "learning_rate": 4.991933961895113e-05, + "loss": 1.0581, + "step": 25674 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919333314223526e-05, + "loss": 1.2724, + "step": 25675 + }, + { + "epoch": 0.03, + "learning_rate": 4.991932700924994e-05, + "loss": 1.2349, + "step": 25676 + }, + { + "epoch": 0.03, + "learning_rate": 4.991932070403035e-05, + "loss": 0.8734, + "step": 25677 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919314398564773e-05, + "loss": 0.8264, + "step": 25678 + }, + { + "epoch": 0.03, + "learning_rate": 4.99193080928532e-05, + "loss": 0.8606, + "step": 25679 + }, + { + "epoch": 0.03, + "learning_rate": 4.991930178689563e-05, + "loss": 0.7791, + "step": 25680 + }, + { + "epoch": 0.03, + "learning_rate": 4.991929548069207e-05, + "loss": 1.3218, + "step": 25681 + }, + { + "epoch": 0.03, + "learning_rate": 4.991928917424253e-05, + "loss": 1.1962, + "step": 25682 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919282867546986e-05, + "loss": 1.0835, + "step": 25683 + }, + { + "epoch": 0.03, + "learning_rate": 4.991927656060545e-05, + "loss": 1.2724, + "step": 25684 + }, + { + "epoch": 0.03, + "learning_rate": 4.991927025341792e-05, + "loss": 1.113, + "step": 25685 + }, + { + "epoch": 0.03, + "learning_rate": 4.99192639459844e-05, + "loss": 1.0718, + "step": 25686 + }, + { + "epoch": 0.03, + "learning_rate": 4.991925763830489e-05, + "loss": 1.3142, + "step": 25687 + }, + { + "epoch": 0.03, + "learning_rate": 4.991925133037938e-05, + "loss": 0.9934, + "step": 25688 + }, + { + "epoch": 0.03, + "learning_rate": 4.991924502220788e-05, + "loss": 0.9403, + "step": 25689 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919238713790394e-05, + "loss": 1.0894, + "step": 25690 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919232405126916e-05, + "loss": 0.9014, + "step": 25691 + }, + { + "epoch": 0.03, + "learning_rate": 4.991922609621744e-05, + "loss": 1.1872, + "step": 25692 + }, + { + "epoch": 0.03, + "learning_rate": 4.991921978706197e-05, + "loss": 1.018, + "step": 25693 + }, + { + "epoch": 0.03, + "learning_rate": 4.991921347766051e-05, + "loss": 0.8534, + "step": 25694 + }, + { + "epoch": 0.03, + "learning_rate": 4.991920716801306e-05, + "loss": 1.419, + "step": 25695 + }, + { + "epoch": 0.03, + "learning_rate": 4.991920085811962e-05, + "loss": 1.1202, + "step": 25696 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919194547980184e-05, + "loss": 0.7459, + "step": 25697 + }, + { + "epoch": 0.03, + "learning_rate": 4.991918823759476e-05, + "loss": 1.0924, + "step": 25698 + }, + { + "epoch": 0.03, + "learning_rate": 4.991918192696334e-05, + "loss": 1.0415, + "step": 25699 + }, + { + "epoch": 0.03, + "learning_rate": 4.991917561608593e-05, + "loss": 1.0242, + "step": 25700 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919169304962534e-05, + "loss": 1.2055, + "step": 25701 + }, + { + "epoch": 0.03, + "learning_rate": 4.991916299359314e-05, + "loss": 1.1676, + "step": 25702 + }, + { + "epoch": 0.03, + "learning_rate": 4.991915668197775e-05, + "loss": 0.9963, + "step": 25703 + }, + { + "epoch": 0.03, + "learning_rate": 4.991915037011638e-05, + "loss": 0.7652, + "step": 25704 + }, + { + "epoch": 0.03, + "learning_rate": 4.991914405800901e-05, + "loss": 0.8364, + "step": 25705 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919137745655656e-05, + "loss": 0.9469, + "step": 25706 + }, + { + "epoch": 0.03, + "learning_rate": 4.991913143305631e-05, + "loss": 0.8693, + "step": 25707 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919125120210966e-05, + "loss": 0.9541, + "step": 25708 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919118807119634e-05, + "loss": 0.9792, + "step": 25709 + }, + { + "epoch": 0.03, + "learning_rate": 4.99191124937823e-05, + "loss": 0.9718, + "step": 25710 + }, + { + "epoch": 0.03, + "learning_rate": 4.991910618019899e-05, + "loss": 0.728, + "step": 25711 + }, + { + "epoch": 0.03, + "learning_rate": 4.991909986636969e-05, + "loss": 1.2885, + "step": 25712 + }, + { + "epoch": 0.03, + "learning_rate": 4.991909355229439e-05, + "loss": 0.9237, + "step": 25713 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919087237973106e-05, + "loss": 1.2815, + "step": 25714 + }, + { + "epoch": 0.03, + "learning_rate": 4.991908092340583e-05, + "loss": 1.1122, + "step": 25715 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919074608592566e-05, + "loss": 0.9325, + "step": 25716 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919068293533305e-05, + "loss": 1.1455, + "step": 25717 + }, + { + "epoch": 0.03, + "learning_rate": 4.991906197822805e-05, + "loss": 0.8719, + "step": 25718 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919055662676814e-05, + "loss": 1.1377, + "step": 25719 + }, + { + "epoch": 0.03, + "learning_rate": 4.991904934687959e-05, + "loss": 0.8786, + "step": 25720 + }, + { + "epoch": 0.03, + "learning_rate": 4.991904303083637e-05, + "loss": 1.1421, + "step": 25721 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919036714547155e-05, + "loss": 0.9805, + "step": 25722 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919030398011956e-05, + "loss": 0.8057, + "step": 25723 + }, + { + "epoch": 0.03, + "learning_rate": 4.991902408123077e-05, + "loss": 1.363, + "step": 25724 + }, + { + "epoch": 0.03, + "learning_rate": 4.991901776420359e-05, + "loss": 1.0186, + "step": 25725 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919011446930414e-05, + "loss": 0.7966, + "step": 25726 + }, + { + "epoch": 0.03, + "learning_rate": 4.9919005129411254e-05, + "loss": 1.1076, + "step": 25727 + }, + { + "epoch": 0.03, + "learning_rate": 4.99189988116461e-05, + "loss": 0.9373, + "step": 25728 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918992493634965e-05, + "loss": 0.666, + "step": 25729 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918986175377836e-05, + "loss": 0.8127, + "step": 25730 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918979856874715e-05, + "loss": 0.8135, + "step": 25731 + }, + { + "epoch": 0.03, + "learning_rate": 4.99189735381256e-05, + "loss": 0.93, + "step": 25732 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918967219130504e-05, + "loss": 1.1284, + "step": 25733 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918960899889414e-05, + "loss": 1.3688, + "step": 25734 + }, + { + "epoch": 0.03, + "learning_rate": 4.991895458040233e-05, + "loss": 1.2117, + "step": 25735 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918948260669265e-05, + "loss": 1.0694, + "step": 25736 + }, + { + "epoch": 0.03, + "learning_rate": 4.991894194069021e-05, + "loss": 1.3328, + "step": 25737 + }, + { + "epoch": 0.03, + "learning_rate": 4.991893562046516e-05, + "loss": 1.8201, + "step": 25738 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918929299994127e-05, + "loss": 0.8728, + "step": 25739 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918922979277105e-05, + "loss": 1.2008, + "step": 25740 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918916658314086e-05, + "loss": 1.0079, + "step": 25741 + }, + { + "epoch": 0.03, + "learning_rate": 4.991891033710509e-05, + "loss": 1.0044, + "step": 25742 + }, + { + "epoch": 0.03, + "learning_rate": 4.991890401565009e-05, + "loss": 0.8514, + "step": 25743 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918897693949116e-05, + "loss": 0.5864, + "step": 25744 + }, + { + "epoch": 0.03, + "learning_rate": 4.991889137200214e-05, + "loss": 0.527, + "step": 25745 + }, + { + "epoch": 0.03, + "learning_rate": 4.991888504980918e-05, + "loss": 1.1255, + "step": 25746 + }, + { + "epoch": 0.03, + "learning_rate": 4.991887872737023e-05, + "loss": 1.0253, + "step": 25747 + }, + { + "epoch": 0.03, + "learning_rate": 4.99188724046853e-05, + "loss": 1.0765, + "step": 25748 + }, + { + "epoch": 0.03, + "learning_rate": 4.991886608175438e-05, + "loss": 0.99, + "step": 25749 + }, + { + "epoch": 0.03, + "learning_rate": 4.991885975857746e-05, + "loss": 1.0563, + "step": 25750 + }, + { + "epoch": 0.03, + "learning_rate": 4.991885343515456e-05, + "loss": 2.487, + "step": 25751 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918847111485666e-05, + "loss": 0.9786, + "step": 25752 + }, + { + "epoch": 0.03, + "learning_rate": 4.991884078757079e-05, + "loss": 0.6252, + "step": 25753 + }, + { + "epoch": 0.03, + "learning_rate": 4.991883446340992e-05, + "loss": 0.924, + "step": 25754 + }, + { + "epoch": 0.03, + "learning_rate": 4.991882813900307e-05, + "loss": 1.1609, + "step": 25755 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918821814350226e-05, + "loss": 1.0787, + "step": 25756 + }, + { + "epoch": 0.03, + "learning_rate": 4.99188154894514e-05, + "loss": 1.1311, + "step": 25757 + }, + { + "epoch": 0.03, + "learning_rate": 4.991880916430658e-05, + "loss": 0.9768, + "step": 25758 + }, + { + "epoch": 0.03, + "learning_rate": 4.991880283891577e-05, + "loss": 0.9717, + "step": 25759 + }, + { + "epoch": 0.03, + "learning_rate": 4.991879651327898e-05, + "loss": 1.1451, + "step": 25760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918790187396194e-05, + "loss": 1.2621, + "step": 25761 + }, + { + "epoch": 0.03, + "learning_rate": 4.991878386126742e-05, + "loss": 1.0463, + "step": 25762 + }, + { + "epoch": 0.03, + "learning_rate": 4.991877753489266e-05, + "loss": 1.0465, + "step": 25763 + }, + { + "epoch": 0.03, + "learning_rate": 4.991877120827191e-05, + "loss": 0.9057, + "step": 25764 + }, + { + "epoch": 0.03, + "learning_rate": 4.991876488140518e-05, + "loss": 0.6581, + "step": 25765 + }, + { + "epoch": 0.03, + "learning_rate": 4.991875855429246e-05, + "loss": 0.7678, + "step": 25766 + }, + { + "epoch": 0.03, + "learning_rate": 4.991875222693375e-05, + "loss": 1.1339, + "step": 25767 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918745899329056e-05, + "loss": 0.9398, + "step": 25768 + }, + { + "epoch": 0.03, + "learning_rate": 4.991873957147837e-05, + "loss": 0.6879, + "step": 25769 + }, + { + "epoch": 0.03, + "learning_rate": 4.99187332433817e-05, + "loss": 0.9775, + "step": 25770 + }, + { + "epoch": 0.03, + "learning_rate": 4.991872691503904e-05, + "loss": 1.0992, + "step": 25771 + }, + { + "epoch": 0.03, + "learning_rate": 4.99187205864504e-05, + "loss": 1.017, + "step": 25772 + }, + { + "epoch": 0.03, + "learning_rate": 4.991871425761576e-05, + "loss": 1.155, + "step": 25773 + }, + { + "epoch": 0.03, + "learning_rate": 4.991870792853515e-05, + "loss": 1.0096, + "step": 25774 + }, + { + "epoch": 0.03, + "learning_rate": 4.991870159920854e-05, + "loss": 1.0484, + "step": 25775 + }, + { + "epoch": 0.03, + "learning_rate": 4.991869526963594e-05, + "loss": 0.9261, + "step": 25776 + }, + { + "epoch": 0.03, + "learning_rate": 4.991868893981736e-05, + "loss": 1.1229, + "step": 25777 + }, + { + "epoch": 0.03, + "learning_rate": 4.99186826097528e-05, + "loss": 1.1667, + "step": 25778 + }, + { + "epoch": 0.03, + "learning_rate": 4.991867627944224e-05, + "loss": 1.3097, + "step": 25779 + }, + { + "epoch": 0.03, + "learning_rate": 4.99186699488857e-05, + "loss": 0.869, + "step": 25780 + }, + { + "epoch": 0.03, + "learning_rate": 4.991866361808317e-05, + "loss": 0.8759, + "step": 25781 + }, + { + "epoch": 0.03, + "learning_rate": 4.991865728703467e-05, + "loss": 1.0684, + "step": 25782 + }, + { + "epoch": 0.03, + "learning_rate": 4.991865095574016e-05, + "loss": 0.915, + "step": 25783 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918644624199674e-05, + "loss": 0.9299, + "step": 25784 + }, + { + "epoch": 0.03, + "learning_rate": 4.99186382924132e-05, + "loss": 0.7329, + "step": 25785 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918631960380746e-05, + "loss": 0.8754, + "step": 25786 + }, + { + "epoch": 0.03, + "learning_rate": 4.99186256281023e-05, + "loss": 0.7077, + "step": 25787 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918619295577865e-05, + "loss": 1.0019, + "step": 25788 + }, + { + "epoch": 0.03, + "learning_rate": 4.991861296280745e-05, + "loss": 0.7738, + "step": 25789 + }, + { + "epoch": 0.03, + "learning_rate": 4.991860662979104e-05, + "loss": 0.6864, + "step": 25790 + }, + { + "epoch": 0.03, + "learning_rate": 4.991860029652865e-05, + "loss": 0.5901, + "step": 25791 + }, + { + "epoch": 0.03, + "learning_rate": 4.991859396302028e-05, + "loss": 0.6398, + "step": 25792 + }, + { + "epoch": 0.03, + "learning_rate": 4.991858762926592e-05, + "loss": 0.9284, + "step": 25793 + }, + { + "epoch": 0.03, + "learning_rate": 4.991858129526556e-05, + "loss": 0.7596, + "step": 25794 + }, + { + "epoch": 0.03, + "learning_rate": 4.991857496101924e-05, + "loss": 0.9285, + "step": 25795 + }, + { + "epoch": 0.03, + "learning_rate": 4.991856862652692e-05, + "loss": 0.8106, + "step": 25796 + }, + { + "epoch": 0.03, + "learning_rate": 4.991856229178861e-05, + "loss": 0.994, + "step": 25797 + }, + { + "epoch": 0.03, + "learning_rate": 4.991855595680433e-05, + "loss": 1.4894, + "step": 25798 + }, + { + "epoch": 0.03, + "learning_rate": 4.991854962157405e-05, + "loss": 1.3267, + "step": 25799 + }, + { + "epoch": 0.03, + "learning_rate": 4.991854328609778e-05, + "loss": 0.6233, + "step": 25800 + }, + { + "epoch": 0.03, + "learning_rate": 4.991853695037554e-05, + "loss": 0.2478, + "step": 25801 + }, + { + "epoch": 0.03, + "learning_rate": 4.991853061440731e-05, + "loss": 0.1664, + "step": 25802 + }, + { + "epoch": 0.03, + "learning_rate": 4.991852427819309e-05, + "loss": 0.1837, + "step": 25803 + }, + { + "epoch": 0.03, + "learning_rate": 4.991851794173289e-05, + "loss": 0.3485, + "step": 25804 + }, + { + "epoch": 0.03, + "learning_rate": 4.99185116050267e-05, + "loss": 0.9135, + "step": 25805 + }, + { + "epoch": 0.03, + "learning_rate": 4.991850526807453e-05, + "loss": 0.8773, + "step": 25806 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918498930876376e-05, + "loss": 1.3027, + "step": 25807 + }, + { + "epoch": 0.03, + "learning_rate": 4.991849259343223e-05, + "loss": 2.0103, + "step": 25808 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918486255742105e-05, + "loss": 1.9137, + "step": 25809 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918479917805995e-05, + "loss": 1.1655, + "step": 25810 + }, + { + "epoch": 0.03, + "learning_rate": 4.99184735796239e-05, + "loss": 1.0848, + "step": 25811 + }, + { + "epoch": 0.03, + "learning_rate": 4.991846724119581e-05, + "loss": 1.3777, + "step": 25812 + }, + { + "epoch": 0.03, + "learning_rate": 4.991846090252175e-05, + "loss": 0.7799, + "step": 25813 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918454563601704e-05, + "loss": 1.4942, + "step": 25814 + }, + { + "epoch": 0.03, + "learning_rate": 4.991844822443567e-05, + "loss": 0.8962, + "step": 25815 + }, + { + "epoch": 0.03, + "learning_rate": 4.991844188502365e-05, + "loss": 0.9722, + "step": 25816 + }, + { + "epoch": 0.03, + "learning_rate": 4.991843554536564e-05, + "loss": 1.0301, + "step": 25817 + }, + { + "epoch": 0.03, + "learning_rate": 4.991842920546166e-05, + "loss": 0.7009, + "step": 25818 + }, + { + "epoch": 0.03, + "learning_rate": 4.991842286531169e-05, + "loss": 1.1413, + "step": 25819 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918416524915726e-05, + "loss": 1.0206, + "step": 25820 + }, + { + "epoch": 0.03, + "learning_rate": 4.991841018427379e-05, + "loss": 0.9912, + "step": 25821 + }, + { + "epoch": 0.03, + "learning_rate": 4.991840384338586e-05, + "loss": 1.1298, + "step": 25822 + }, + { + "epoch": 0.03, + "learning_rate": 4.991839750225196e-05, + "loss": 0.9288, + "step": 25823 + }, + { + "epoch": 0.03, + "learning_rate": 4.991839116087206e-05, + "loss": 0.71, + "step": 25824 + }, + { + "epoch": 0.03, + "learning_rate": 4.991838481924619e-05, + "loss": 0.9027, + "step": 25825 + }, + { + "epoch": 0.03, + "learning_rate": 4.991837847737433e-05, + "loss": 1.3135, + "step": 25826 + }, + { + "epoch": 0.03, + "learning_rate": 4.991837213525649e-05, + "loss": 1.0744, + "step": 25827 + }, + { + "epoch": 0.03, + "learning_rate": 4.991836579289265e-05, + "loss": 1.0782, + "step": 25828 + }, + { + "epoch": 0.03, + "learning_rate": 4.991835945028285e-05, + "loss": 0.4802, + "step": 25829 + }, + { + "epoch": 0.03, + "learning_rate": 4.991835310742705e-05, + "loss": 0.8428, + "step": 25830 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918346764325274e-05, + "loss": 0.9026, + "step": 25831 + }, + { + "epoch": 0.03, + "learning_rate": 4.991834042097751e-05, + "loss": 1.142, + "step": 25832 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918334077383764e-05, + "loss": 0.9227, + "step": 25833 + }, + { + "epoch": 0.03, + "learning_rate": 4.991832773354404e-05, + "loss": 0.7308, + "step": 25834 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918321389458326e-05, + "loss": 0.9323, + "step": 25835 + }, + { + "epoch": 0.03, + "learning_rate": 4.991831504512663e-05, + "loss": 0.9103, + "step": 25836 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918308700548955e-05, + "loss": 0.9816, + "step": 25837 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918302355725294e-05, + "loss": 0.9848, + "step": 25838 + }, + { + "epoch": 0.03, + "learning_rate": 4.991829601065565e-05, + "loss": 0.771, + "step": 25839 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918289665340026e-05, + "loss": 0.7217, + "step": 25840 + }, + { + "epoch": 0.03, + "learning_rate": 4.991828331977842e-05, + "loss": 1.0244, + "step": 25841 + }, + { + "epoch": 0.03, + "learning_rate": 4.991827697397082e-05, + "loss": 1.2314, + "step": 25842 + }, + { + "epoch": 0.03, + "learning_rate": 4.991827062791725e-05, + "loss": 0.8956, + "step": 25843 + }, + { + "epoch": 0.03, + "learning_rate": 4.991826428161769e-05, + "loss": 0.9167, + "step": 25844 + }, + { + "epoch": 0.03, + "learning_rate": 4.991825793507216e-05, + "loss": 0.8992, + "step": 25845 + }, + { + "epoch": 0.03, + "learning_rate": 4.991825158828063e-05, + "loss": 0.9102, + "step": 25846 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918245241243124e-05, + "loss": 1.1132, + "step": 25847 + }, + { + "epoch": 0.03, + "learning_rate": 4.991823889395963e-05, + "loss": 0.9443, + "step": 25848 + }, + { + "epoch": 0.03, + "learning_rate": 4.991823254643016e-05, + "loss": 0.7022, + "step": 25849 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918226198654715e-05, + "loss": 0.8022, + "step": 25850 + }, + { + "epoch": 0.03, + "learning_rate": 4.991821985063328e-05, + "loss": 1.0386, + "step": 25851 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918213502365864e-05, + "loss": 1.1772, + "step": 25852 + }, + { + "epoch": 0.03, + "learning_rate": 4.991820715385247e-05, + "loss": 0.9803, + "step": 25853 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918200805093086e-05, + "loss": 1.204, + "step": 25854 + }, + { + "epoch": 0.03, + "learning_rate": 4.991819445608772e-05, + "loss": 0.7079, + "step": 25855 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918188106836374e-05, + "loss": 0.1484, + "step": 25856 + }, + { + "epoch": 0.03, + "learning_rate": 4.991818175733905e-05, + "loss": 0.3056, + "step": 25857 + }, + { + "epoch": 0.03, + "learning_rate": 4.991817540759574e-05, + "loss": 0.9031, + "step": 25858 + }, + { + "epoch": 0.03, + "learning_rate": 4.991816905760645e-05, + "loss": 1.1465, + "step": 25859 + }, + { + "epoch": 0.03, + "learning_rate": 4.991816270737118e-05, + "loss": 1.2944, + "step": 25860 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918156356889925e-05, + "loss": 0.8954, + "step": 25861 + }, + { + "epoch": 0.03, + "learning_rate": 4.991815000616269e-05, + "loss": 0.9071, + "step": 25862 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918143655189475e-05, + "loss": 1.0322, + "step": 25863 + }, + { + "epoch": 0.03, + "learning_rate": 4.991813730397028e-05, + "loss": 1.1384, + "step": 25864 + }, + { + "epoch": 0.03, + "learning_rate": 4.99181309525051e-05, + "loss": 1.1667, + "step": 25865 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918124600793935e-05, + "loss": 0.9993, + "step": 25866 + }, + { + "epoch": 0.03, + "learning_rate": 4.99181182488368e-05, + "loss": 0.7987, + "step": 25867 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918111896633674e-05, + "loss": 0.7769, + "step": 25868 + }, + { + "epoch": 0.03, + "learning_rate": 4.991810554418457e-05, + "loss": 0.7258, + "step": 25869 + }, + { + "epoch": 0.03, + "learning_rate": 4.991809919148949e-05, + "loss": 0.9819, + "step": 25870 + }, + { + "epoch": 0.03, + "learning_rate": 4.991809283854842e-05, + "loss": 0.9439, + "step": 25871 + }, + { + "epoch": 0.03, + "learning_rate": 4.991808648536137e-05, + "loss": 0.3832, + "step": 25872 + }, + { + "epoch": 0.03, + "learning_rate": 4.991808013192835e-05, + "loss": 1.7812, + "step": 25873 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918073778249344e-05, + "loss": 1.4398, + "step": 25874 + }, + { + "epoch": 0.03, + "learning_rate": 4.991806742432435e-05, + "loss": 1.3273, + "step": 25875 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918061070153375e-05, + "loss": 0.9974, + "step": 25876 + }, + { + "epoch": 0.03, + "learning_rate": 4.991805471573643e-05, + "loss": 0.8572, + "step": 25877 + }, + { + "epoch": 0.03, + "learning_rate": 4.99180483610735e-05, + "loss": 1.1089, + "step": 25878 + }, + { + "epoch": 0.03, + "learning_rate": 4.991804200616459e-05, + "loss": 1.1339, + "step": 25879 + }, + { + "epoch": 0.03, + "learning_rate": 4.99180356510097e-05, + "loss": 1.3066, + "step": 25880 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918029295608825e-05, + "loss": 0.9561, + "step": 25881 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918022939961974e-05, + "loss": 1.0982, + "step": 25882 + }, + { + "epoch": 0.03, + "learning_rate": 4.991801658406914e-05, + "loss": 1.3559, + "step": 25883 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918010227930325e-05, + "loss": 1.2015, + "step": 25884 + }, + { + "epoch": 0.03, + "learning_rate": 4.991800387154553e-05, + "loss": 1.3261, + "step": 25885 + }, + { + "epoch": 0.03, + "learning_rate": 4.991799751491476e-05, + "loss": 1.3883, + "step": 25886 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917991158038005e-05, + "loss": 1.0042, + "step": 25887 + }, + { + "epoch": 0.03, + "learning_rate": 4.991798480091527e-05, + "loss": 0.8556, + "step": 25888 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917978443546565e-05, + "loss": 0.851, + "step": 25889 + }, + { + "epoch": 0.03, + "learning_rate": 4.991797208593187e-05, + "loss": 0.9601, + "step": 25890 + }, + { + "epoch": 0.03, + "learning_rate": 4.991796572807119e-05, + "loss": 1.3565, + "step": 25891 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917959369964543e-05, + "loss": 1.3138, + "step": 25892 + }, + { + "epoch": 0.03, + "learning_rate": 4.991795301161191e-05, + "loss": 1.1688, + "step": 25893 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917946653013306e-05, + "loss": 1.0954, + "step": 25894 + }, + { + "epoch": 0.03, + "learning_rate": 4.991794029416871e-05, + "loss": 0.9156, + "step": 25895 + }, + { + "epoch": 0.03, + "learning_rate": 4.991793393507814e-05, + "loss": 0.8094, + "step": 25896 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917927575741595e-05, + "loss": 1.1329, + "step": 25897 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917921216159064e-05, + "loss": 1.2325, + "step": 25898 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917914856330554e-05, + "loss": 1.2242, + "step": 25899 + }, + { + "epoch": 0.03, + "learning_rate": 4.991790849625607e-05, + "loss": 1.132, + "step": 25900 + }, + { + "epoch": 0.03, + "learning_rate": 4.99179021359356e-05, + "loss": 1.0243, + "step": 25901 + }, + { + "epoch": 0.03, + "learning_rate": 4.991789577536916e-05, + "loss": 0.8993, + "step": 25902 + }, + { + "epoch": 0.03, + "learning_rate": 4.991788941455674e-05, + "loss": 0.7386, + "step": 25903 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917883053498335e-05, + "loss": 0.7659, + "step": 25904 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917876692193955e-05, + "loss": 0.4891, + "step": 25905 + }, + { + "epoch": 0.03, + "learning_rate": 4.991787033064359e-05, + "loss": 0.4536, + "step": 25906 + }, + { + "epoch": 0.03, + "learning_rate": 4.991786396884725e-05, + "loss": 0.3568, + "step": 25907 + }, + { + "epoch": 0.03, + "learning_rate": 4.991785760680493e-05, + "loss": 0.9202, + "step": 25908 + }, + { + "epoch": 0.03, + "learning_rate": 4.991785124451663e-05, + "loss": 0.896, + "step": 25909 + }, + { + "epoch": 0.03, + "learning_rate": 4.991784488198236e-05, + "loss": 1.0383, + "step": 25910 + }, + { + "epoch": 0.03, + "learning_rate": 4.991783851920211e-05, + "loss": 1.0848, + "step": 25911 + }, + { + "epoch": 0.03, + "learning_rate": 4.991783215617588e-05, + "loss": 1.0207, + "step": 25912 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917825792903666e-05, + "loss": 0.8927, + "step": 25913 + }, + { + "epoch": 0.03, + "learning_rate": 4.991781942938548e-05, + "loss": 0.8552, + "step": 25914 + }, + { + "epoch": 0.03, + "learning_rate": 4.991781306562131e-05, + "loss": 0.9227, + "step": 25915 + }, + { + "epoch": 0.03, + "learning_rate": 4.991780670161117e-05, + "loss": 1.1167, + "step": 25916 + }, + { + "epoch": 0.03, + "learning_rate": 4.991780033735505e-05, + "loss": 0.8406, + "step": 25917 + }, + { + "epoch": 0.03, + "learning_rate": 4.991779397285294e-05, + "loss": 0.9079, + "step": 25918 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917787608104864e-05, + "loss": 0.6282, + "step": 25919 + }, + { + "epoch": 0.03, + "learning_rate": 4.991778124311081e-05, + "loss": 0.8382, + "step": 25920 + }, + { + "epoch": 0.03, + "learning_rate": 4.991777487787077e-05, + "loss": 1.0199, + "step": 25921 + }, + { + "epoch": 0.03, + "learning_rate": 4.991776851238476e-05, + "loss": 1.0519, + "step": 25922 + }, + { + "epoch": 0.03, + "learning_rate": 4.991776214665277e-05, + "loss": 1.0039, + "step": 25923 + }, + { + "epoch": 0.03, + "learning_rate": 4.99177557806748e-05, + "loss": 1.2006, + "step": 25924 + }, + { + "epoch": 0.03, + "learning_rate": 4.991774941445086e-05, + "loss": 0.4781, + "step": 25925 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917743047980936e-05, + "loss": 0.2254, + "step": 25926 + }, + { + "epoch": 0.03, + "learning_rate": 4.991773668126504e-05, + "loss": 0.1132, + "step": 25927 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917730314303156e-05, + "loss": 0.1581, + "step": 25928 + }, + { + "epoch": 0.03, + "learning_rate": 4.99177239470953e-05, + "loss": 0.1378, + "step": 25929 + }, + { + "epoch": 0.03, + "learning_rate": 4.991771757964147e-05, + "loss": 0.1471, + "step": 25930 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917711211941665e-05, + "loss": 0.1376, + "step": 25931 + }, + { + "epoch": 0.03, + "learning_rate": 4.991770484399587e-05, + "loss": 0.0799, + "step": 25932 + }, + { + "epoch": 0.03, + "learning_rate": 4.991769847580411e-05, + "loss": 0.1844, + "step": 25933 + }, + { + "epoch": 0.03, + "learning_rate": 4.991769210736637e-05, + "loss": 0.4573, + "step": 25934 + }, + { + "epoch": 0.03, + "learning_rate": 4.991768573868265e-05, + "loss": 0.6143, + "step": 25935 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917679369752955e-05, + "loss": 0.6123, + "step": 25936 + }, + { + "epoch": 0.03, + "learning_rate": 4.991767300057729e-05, + "loss": 0.4783, + "step": 25937 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917666631155635e-05, + "loss": 0.6303, + "step": 25938 + }, + { + "epoch": 0.03, + "learning_rate": 4.991766026148801e-05, + "loss": 0.4937, + "step": 25939 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917653891574415e-05, + "loss": 0.5631, + "step": 25940 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917647521414834e-05, + "loss": 0.5555, + "step": 25941 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917641151009274e-05, + "loss": 0.5362, + "step": 25942 + }, + { + "epoch": 0.03, + "learning_rate": 4.991763478035775e-05, + "loss": 0.5339, + "step": 25943 + }, + { + "epoch": 0.03, + "learning_rate": 4.991762840946024e-05, + "loss": 0.4648, + "step": 25944 + }, + { + "epoch": 0.03, + "learning_rate": 4.991762203831676e-05, + "loss": 0.5782, + "step": 25945 + }, + { + "epoch": 0.03, + "learning_rate": 4.99176156669273e-05, + "loss": 0.5922, + "step": 25946 + }, + { + "epoch": 0.03, + "learning_rate": 4.991760929529186e-05, + "loss": 0.6157, + "step": 25947 + }, + { + "epoch": 0.03, + "learning_rate": 4.991760292341045e-05, + "loss": 0.4761, + "step": 25948 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917596551283066e-05, + "loss": 0.5092, + "step": 25949 + }, + { + "epoch": 0.03, + "learning_rate": 4.991759017890969e-05, + "loss": 0.5757, + "step": 25950 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917583806290356e-05, + "loss": 0.4771, + "step": 25951 + }, + { + "epoch": 0.03, + "learning_rate": 4.991757743342504e-05, + "loss": 0.4629, + "step": 25952 + }, + { + "epoch": 0.03, + "learning_rate": 4.991757106031375e-05, + "loss": 0.5341, + "step": 25953 + }, + { + "epoch": 0.03, + "learning_rate": 4.991756468695647e-05, + "loss": 0.5705, + "step": 25954 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917558313353235e-05, + "loss": 0.5107, + "step": 25955 + }, + { + "epoch": 0.03, + "learning_rate": 4.991755193950401e-05, + "loss": 0.4536, + "step": 25956 + }, + { + "epoch": 0.03, + "learning_rate": 4.991754556540882e-05, + "loss": 0.4818, + "step": 25957 + }, + { + "epoch": 0.03, + "learning_rate": 4.991753919106765e-05, + "loss": 0.5219, + "step": 25958 + }, + { + "epoch": 0.03, + "learning_rate": 4.991753281648051e-05, + "loss": 0.4933, + "step": 25959 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917526441647385e-05, + "loss": 0.5105, + "step": 25960 + }, + { + "epoch": 0.03, + "learning_rate": 4.991752006656829e-05, + "loss": 0.5003, + "step": 25961 + }, + { + "epoch": 0.03, + "learning_rate": 4.991751369124322e-05, + "loss": 0.45, + "step": 25962 + }, + { + "epoch": 0.03, + "learning_rate": 4.991750731567217e-05, + "loss": 0.4207, + "step": 25963 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917500939855145e-05, + "loss": 0.4828, + "step": 25964 + }, + { + "epoch": 0.03, + "learning_rate": 4.991749456379215e-05, + "loss": 0.4453, + "step": 25965 + }, + { + "epoch": 0.03, + "learning_rate": 4.991748818748317e-05, + "loss": 0.4744, + "step": 25966 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917481810928235e-05, + "loss": 0.4235, + "step": 25967 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917475434127305e-05, + "loss": 0.4519, + "step": 25968 + }, + { + "epoch": 0.03, + "learning_rate": 4.991746905708041e-05, + "loss": 0.4124, + "step": 25969 + }, + { + "epoch": 0.03, + "learning_rate": 4.991746267978754e-05, + "loss": 0.3865, + "step": 25970 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917456302248695e-05, + "loss": 0.3994, + "step": 25971 + }, + { + "epoch": 0.03, + "learning_rate": 4.991744992446388e-05, + "loss": 0.3866, + "step": 25972 + }, + { + "epoch": 0.03, + "learning_rate": 4.991744354643308e-05, + "loss": 0.3604, + "step": 25973 + }, + { + "epoch": 0.03, + "learning_rate": 4.991743716815631e-05, + "loss": 0.4321, + "step": 25974 + }, + { + "epoch": 0.03, + "learning_rate": 4.991743078963357e-05, + "loss": 0.462, + "step": 25975 + }, + { + "epoch": 0.03, + "learning_rate": 4.991742441086484e-05, + "loss": 0.4357, + "step": 25976 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917418031850147e-05, + "loss": 0.5157, + "step": 25977 + }, + { + "epoch": 0.03, + "learning_rate": 4.991741165258949e-05, + "loss": 0.5101, + "step": 25978 + }, + { + "epoch": 0.03, + "learning_rate": 4.991740527308284e-05, + "loss": 0.4588, + "step": 25979 + }, + { + "epoch": 0.03, + "learning_rate": 4.991739889333022e-05, + "loss": 0.4946, + "step": 25980 + }, + { + "epoch": 0.03, + "learning_rate": 4.991739251333164e-05, + "loss": 0.4544, + "step": 25981 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917386133087076e-05, + "loss": 0.4413, + "step": 25982 + }, + { + "epoch": 0.03, + "learning_rate": 4.991737975259654e-05, + "loss": 0.4763, + "step": 25983 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917373371860024e-05, + "loss": 0.4482, + "step": 25984 + }, + { + "epoch": 0.03, + "learning_rate": 4.991736699087754e-05, + "loss": 0.4221, + "step": 25985 + }, + { + "epoch": 0.03, + "learning_rate": 4.991736060964908e-05, + "loss": 0.4137, + "step": 25986 + }, + { + "epoch": 0.03, + "learning_rate": 4.991735422817465e-05, + "loss": 0.4145, + "step": 25987 + }, + { + "epoch": 0.03, + "learning_rate": 4.991734784645424e-05, + "loss": 0.3517, + "step": 25988 + }, + { + "epoch": 0.03, + "learning_rate": 4.991734146448787e-05, + "loss": 0.3733, + "step": 25989 + }, + { + "epoch": 0.03, + "learning_rate": 4.991733508227551e-05, + "loss": 0.5152, + "step": 25990 + }, + { + "epoch": 0.03, + "learning_rate": 4.991732869981718e-05, + "loss": 0.5202, + "step": 25991 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917322317112883e-05, + "loss": 0.4793, + "step": 25992 + }, + { + "epoch": 0.03, + "learning_rate": 4.991731593416261e-05, + "loss": 0.471, + "step": 25993 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917309550966365e-05, + "loss": 0.4608, + "step": 25994 + }, + { + "epoch": 0.03, + "learning_rate": 4.991730316752414e-05, + "loss": 0.4078, + "step": 25995 + }, + { + "epoch": 0.03, + "learning_rate": 4.991729678383595e-05, + "loss": 0.4474, + "step": 25996 + }, + { + "epoch": 0.03, + "learning_rate": 4.991729039990178e-05, + "loss": 0.4474, + "step": 25997 + }, + { + "epoch": 0.03, + "learning_rate": 4.991728401572164e-05, + "loss": 0.3836, + "step": 25998 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917277631295534e-05, + "loss": 0.4841, + "step": 25999 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917271246623446e-05, + "loss": 0.4515, + "step": 26000 + }, + { + "epoch": 0.03, + "eval_loss": 1.0860557556152344, + "eval_runtime": 125.5761, + "eval_samples_per_second": 11.029, + "eval_steps_per_second": 5.519, + "step": 26000 + }, + { + "epoch": 0.03, + "learning_rate": 4.991726486170539e-05, + "loss": 0.4444, + "step": 26001 + }, + { + "epoch": 0.03, + "learning_rate": 4.991725847654136e-05, + "loss": 0.5174, + "step": 26002 + }, + { + "epoch": 0.03, + "learning_rate": 4.991725209113135e-05, + "loss": 0.3774, + "step": 26003 + }, + { + "epoch": 0.03, + "learning_rate": 4.991724570547538e-05, + "loss": 0.4213, + "step": 26004 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917239319573426e-05, + "loss": 0.4221, + "step": 26005 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917232933425515e-05, + "loss": 0.4465, + "step": 26006 + }, + { + "epoch": 0.03, + "learning_rate": 4.991722654703161e-05, + "loss": 0.4283, + "step": 26007 + }, + { + "epoch": 0.03, + "learning_rate": 4.991722016039175e-05, + "loss": 0.4563, + "step": 26008 + }, + { + "epoch": 0.03, + "learning_rate": 4.991721377350591e-05, + "loss": 0.4204, + "step": 26009 + }, + { + "epoch": 0.03, + "learning_rate": 4.99172073863741e-05, + "loss": 0.4499, + "step": 26010 + }, + { + "epoch": 0.03, + "learning_rate": 4.991720099899632e-05, + "loss": 0.509, + "step": 26011 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917194611372566e-05, + "loss": 0.4553, + "step": 26012 + }, + { + "epoch": 0.03, + "learning_rate": 4.991718822350284e-05, + "loss": 0.4346, + "step": 26013 + }, + { + "epoch": 0.03, + "learning_rate": 4.991718183538714e-05, + "loss": 0.4909, + "step": 26014 + }, + { + "epoch": 0.03, + "learning_rate": 4.991717544702547e-05, + "loss": 0.5268, + "step": 26015 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917169058417835e-05, + "loss": 0.4683, + "step": 26016 + }, + { + "epoch": 0.03, + "learning_rate": 4.991716266956422e-05, + "loss": 0.4367, + "step": 26017 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917156280464627e-05, + "loss": 0.4877, + "step": 26018 + }, + { + "epoch": 0.03, + "learning_rate": 4.991714989111907e-05, + "loss": 0.6743, + "step": 26019 + }, + { + "epoch": 0.03, + "learning_rate": 4.991714350152754e-05, + "loss": 0.8407, + "step": 26020 + }, + { + "epoch": 0.03, + "learning_rate": 4.991713711169004e-05, + "loss": 1.0402, + "step": 26021 + }, + { + "epoch": 0.03, + "learning_rate": 4.991713072160658e-05, + "loss": 0.8534, + "step": 26022 + }, + { + "epoch": 0.03, + "learning_rate": 4.991712433127713e-05, + "loss": 0.9654, + "step": 26023 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917117940701716e-05, + "loss": 1.0535, + "step": 26024 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917111549880325e-05, + "loss": 0.9986, + "step": 26025 + }, + { + "epoch": 0.03, + "learning_rate": 4.991710515881297e-05, + "loss": 1.1621, + "step": 26026 + }, + { + "epoch": 0.03, + "learning_rate": 4.991709876749964e-05, + "loss": 0.8025, + "step": 26027 + }, + { + "epoch": 0.03, + "learning_rate": 4.991709237594034e-05, + "loss": 1.4118, + "step": 26028 + }, + { + "epoch": 0.03, + "learning_rate": 4.991708598413507e-05, + "loss": 1.323, + "step": 26029 + }, + { + "epoch": 0.03, + "learning_rate": 4.991707959208383e-05, + "loss": 1.419, + "step": 26030 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917073199786615e-05, + "loss": 1.0978, + "step": 26031 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917066807243436e-05, + "loss": 0.8578, + "step": 26032 + }, + { + "epoch": 0.03, + "learning_rate": 4.991706041445428e-05, + "loss": 1.5992, + "step": 26033 + }, + { + "epoch": 0.03, + "learning_rate": 4.991705402141916e-05, + "loss": 0.8286, + "step": 26034 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917047628138057e-05, + "loss": 0.693, + "step": 26035 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917041234611e-05, + "loss": 0.8298, + "step": 26036 + }, + { + "epoch": 0.03, + "learning_rate": 4.991703484083796e-05, + "loss": 1.204, + "step": 26037 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917028446818946e-05, + "loss": 0.9053, + "step": 26038 + }, + { + "epoch": 0.03, + "learning_rate": 4.991702205255397e-05, + "loss": 1.0469, + "step": 26039 + }, + { + "epoch": 0.03, + "learning_rate": 4.991701565804303e-05, + "loss": 0.9925, + "step": 26040 + }, + { + "epoch": 0.03, + "learning_rate": 4.9917009263286105e-05, + "loss": 1.1052, + "step": 26041 + }, + { + "epoch": 0.03, + "learning_rate": 4.991700286828322e-05, + "loss": 1.1548, + "step": 26042 + }, + { + "epoch": 0.03, + "learning_rate": 4.991699647303436e-05, + "loss": 0.9096, + "step": 26043 + }, + { + "epoch": 0.03, + "learning_rate": 4.991699007753953e-05, + "loss": 0.8057, + "step": 26044 + }, + { + "epoch": 0.03, + "learning_rate": 4.991698368179874e-05, + "loss": 0.6651, + "step": 26045 + }, + { + "epoch": 0.03, + "learning_rate": 4.991697728581197e-05, + "loss": 1.1431, + "step": 26046 + }, + { + "epoch": 0.03, + "learning_rate": 4.991697088957923e-05, + "loss": 1.2183, + "step": 26047 + }, + { + "epoch": 0.03, + "learning_rate": 4.991696449310051e-05, + "loss": 1.0361, + "step": 26048 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916958096375844e-05, + "loss": 0.8701, + "step": 26049 + }, + { + "epoch": 0.03, + "learning_rate": 4.991695169940519e-05, + "loss": 0.6644, + "step": 26050 + }, + { + "epoch": 0.03, + "learning_rate": 4.991694530218858e-05, + "loss": 0.5679, + "step": 26051 + }, + { + "epoch": 0.03, + "learning_rate": 4.991693890472599e-05, + "loss": 0.8478, + "step": 26052 + }, + { + "epoch": 0.03, + "learning_rate": 4.991693250701743e-05, + "loss": 0.8303, + "step": 26053 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916926109062906e-05, + "loss": 1.2064, + "step": 26054 + }, + { + "epoch": 0.03, + "learning_rate": 4.991691971086241e-05, + "loss": 1.2902, + "step": 26055 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916913312415945e-05, + "loss": 1.2241, + "step": 26056 + }, + { + "epoch": 0.03, + "learning_rate": 4.991690691372352e-05, + "loss": 1.374, + "step": 26057 + }, + { + "epoch": 0.03, + "learning_rate": 4.991690051478511e-05, + "loss": 1.188, + "step": 26058 + }, + { + "epoch": 0.03, + "learning_rate": 4.991689411560074e-05, + "loss": 0.965, + "step": 26059 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916887716170396e-05, + "loss": 0.986, + "step": 26060 + }, + { + "epoch": 0.03, + "learning_rate": 4.991688131649408e-05, + "loss": 1.0385, + "step": 26061 + }, + { + "epoch": 0.03, + "learning_rate": 4.991687491657181e-05, + "loss": 0.6881, + "step": 26062 + }, + { + "epoch": 0.03, + "learning_rate": 4.991686851640356e-05, + "loss": 0.7574, + "step": 26063 + }, + { + "epoch": 0.03, + "learning_rate": 4.991686211598934e-05, + "loss": 1.0766, + "step": 26064 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916855715329156e-05, + "loss": 1.1716, + "step": 26065 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916849314423e-05, + "loss": 0.9634, + "step": 26066 + }, + { + "epoch": 0.03, + "learning_rate": 4.991684291327088e-05, + "loss": 0.8014, + "step": 26067 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916836511872786e-05, + "loss": 0.9244, + "step": 26068 + }, + { + "epoch": 0.03, + "learning_rate": 4.991683011022873e-05, + "loss": 0.7757, + "step": 26069 + }, + { + "epoch": 0.03, + "learning_rate": 4.99168237083387e-05, + "loss": 0.9133, + "step": 26070 + }, + { + "epoch": 0.03, + "learning_rate": 4.99168173062027e-05, + "loss": 1.134, + "step": 26071 + }, + { + "epoch": 0.03, + "learning_rate": 4.991681090382073e-05, + "loss": 1.1169, + "step": 26072 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916804501192804e-05, + "loss": 1.3285, + "step": 26073 + }, + { + "epoch": 0.03, + "learning_rate": 4.991679809831889e-05, + "loss": 0.7384, + "step": 26074 + }, + { + "epoch": 0.03, + "learning_rate": 4.991679169519903e-05, + "loss": 0.7999, + "step": 26075 + }, + { + "epoch": 0.03, + "learning_rate": 4.991678529183319e-05, + "loss": 0.9455, + "step": 26076 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916778888221384e-05, + "loss": 1.6089, + "step": 26077 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916772484363614e-05, + "loss": 1.5912, + "step": 26078 + }, + { + "epoch": 0.03, + "learning_rate": 4.991676608025987e-05, + "loss": 0.8181, + "step": 26079 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916759675910165e-05, + "loss": 0.7797, + "step": 26080 + }, + { + "epoch": 0.03, + "learning_rate": 4.991675327131448e-05, + "loss": 1.0226, + "step": 26081 + }, + { + "epoch": 0.03, + "learning_rate": 4.991674686647284e-05, + "loss": 0.5923, + "step": 26082 + }, + { + "epoch": 0.03, + "learning_rate": 4.991674046138523e-05, + "loss": 0.953, + "step": 26083 + }, + { + "epoch": 0.03, + "learning_rate": 4.991673405605164e-05, + "loss": 0.8666, + "step": 26084 + }, + { + "epoch": 0.03, + "learning_rate": 4.99167276504721e-05, + "loss": 0.9239, + "step": 26085 + }, + { + "epoch": 0.03, + "learning_rate": 4.991672124464658e-05, + "loss": 1.1436, + "step": 26086 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916714838575094e-05, + "loss": 1.2577, + "step": 26087 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916708432257655e-05, + "loss": 1.3525, + "step": 26088 + }, + { + "epoch": 0.03, + "learning_rate": 4.991670202569423e-05, + "loss": 0.9972, + "step": 26089 + }, + { + "epoch": 0.03, + "learning_rate": 4.991669561888485e-05, + "loss": 0.9677, + "step": 26090 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916689211829495e-05, + "loss": 0.6115, + "step": 26091 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916682804528184e-05, + "loss": 0.8531, + "step": 26092 + }, + { + "epoch": 0.03, + "learning_rate": 4.991667639698089e-05, + "loss": 1.4811, + "step": 26093 + }, + { + "epoch": 0.03, + "learning_rate": 4.991666998918764e-05, + "loss": 3.8289, + "step": 26094 + }, + { + "epoch": 0.03, + "learning_rate": 4.991666358114842e-05, + "loss": 0.4209, + "step": 26095 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916657172863234e-05, + "loss": 0.1446, + "step": 26096 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916650764332085e-05, + "loss": 0.9051, + "step": 26097 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916644355554964e-05, + "loss": 1.2332, + "step": 26098 + }, + { + "epoch": 0.03, + "learning_rate": 4.991663794653187e-05, + "loss": 1.6801, + "step": 26099 + }, + { + "epoch": 0.03, + "learning_rate": 4.991663153726283e-05, + "loss": 0.8838, + "step": 26100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916625127747806e-05, + "loss": 1.1228, + "step": 26101 + }, + { + "epoch": 0.03, + "learning_rate": 4.991661871798682e-05, + "loss": 1.0108, + "step": 26102 + }, + { + "epoch": 0.03, + "learning_rate": 4.991661230797987e-05, + "loss": 0.9096, + "step": 26103 + }, + { + "epoch": 0.03, + "learning_rate": 4.991660589772695e-05, + "loss": 1.0328, + "step": 26104 + }, + { + "epoch": 0.03, + "learning_rate": 4.991659948722807e-05, + "loss": 0.9394, + "step": 26105 + }, + { + "epoch": 0.03, + "learning_rate": 4.991659307648321e-05, + "loss": 1.033, + "step": 26106 + }, + { + "epoch": 0.03, + "learning_rate": 4.991658666549239e-05, + "loss": 1.1274, + "step": 26107 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916580254255616e-05, + "loss": 0.9261, + "step": 26108 + }, + { + "epoch": 0.03, + "learning_rate": 4.991657384277286e-05, + "loss": 0.9871, + "step": 26109 + }, + { + "epoch": 0.03, + "learning_rate": 4.991656743104415e-05, + "loss": 1.8349, + "step": 26110 + }, + { + "epoch": 0.03, + "learning_rate": 4.991656101906947e-05, + "loss": 1.0934, + "step": 26111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916554606848817e-05, + "loss": 1.6266, + "step": 26112 + }, + { + "epoch": 0.03, + "learning_rate": 4.991654819438221e-05, + "loss": 1.6194, + "step": 26113 + }, + { + "epoch": 0.03, + "learning_rate": 4.991654178166963e-05, + "loss": 1.3072, + "step": 26114 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916535368711095e-05, + "loss": 0.8548, + "step": 26115 + }, + { + "epoch": 0.03, + "learning_rate": 4.991652895550658e-05, + "loss": 0.9472, + "step": 26116 + }, + { + "epoch": 0.03, + "learning_rate": 4.991652254205611e-05, + "loss": 1.2554, + "step": 26117 + }, + { + "epoch": 0.03, + "learning_rate": 4.991651612835967e-05, + "loss": 1.2452, + "step": 26118 + }, + { + "epoch": 0.03, + "learning_rate": 4.991650971441726e-05, + "loss": 0.9404, + "step": 26119 + }, + { + "epoch": 0.03, + "learning_rate": 4.991650330022889e-05, + "loss": 0.9088, + "step": 26120 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916496885794555e-05, + "loss": 1.1325, + "step": 26121 + }, + { + "epoch": 0.03, + "learning_rate": 4.991649047111425e-05, + "loss": 1.0911, + "step": 26122 + }, + { + "epoch": 0.03, + "learning_rate": 4.991648405618799e-05, + "loss": 1.2145, + "step": 26123 + }, + { + "epoch": 0.03, + "learning_rate": 4.991647764101576e-05, + "loss": 1.4028, + "step": 26124 + }, + { + "epoch": 0.03, + "learning_rate": 4.991647122559756e-05, + "loss": 1.0197, + "step": 26125 + }, + { + "epoch": 0.03, + "learning_rate": 4.99164648099334e-05, + "loss": 1.0037, + "step": 26126 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916458394023275e-05, + "loss": 0.673, + "step": 26127 + }, + { + "epoch": 0.03, + "learning_rate": 4.991645197786718e-05, + "loss": 1.0895, + "step": 26128 + }, + { + "epoch": 0.03, + "learning_rate": 4.991644556146513e-05, + "loss": 0.9901, + "step": 26129 + }, + { + "epoch": 0.03, + "learning_rate": 4.991643914481711e-05, + "loss": 1.0335, + "step": 26130 + }, + { + "epoch": 0.03, + "learning_rate": 4.991643272792312e-05, + "loss": 1.6946, + "step": 26131 + }, + { + "epoch": 0.03, + "learning_rate": 4.991642631078318e-05, + "loss": 1.2949, + "step": 26132 + }, + { + "epoch": 0.03, + "learning_rate": 4.991641989339727e-05, + "loss": 0.8925, + "step": 26133 + }, + { + "epoch": 0.03, + "learning_rate": 4.991641347576539e-05, + "loss": 0.8859, + "step": 26134 + }, + { + "epoch": 0.03, + "learning_rate": 4.991640705788754e-05, + "loss": 0.8471, + "step": 26135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916400639763736e-05, + "loss": 0.5979, + "step": 26136 + }, + { + "epoch": 0.03, + "learning_rate": 4.991639422139397e-05, + "loss": 0.6681, + "step": 26137 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916387802778233e-05, + "loss": 0.8831, + "step": 26138 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916381383916535e-05, + "loss": 1.0922, + "step": 26139 + }, + { + "epoch": 0.03, + "learning_rate": 4.991637496480887e-05, + "loss": 1.1613, + "step": 26140 + }, + { + "epoch": 0.03, + "learning_rate": 4.991636854545524e-05, + "loss": 1.1967, + "step": 26141 + }, + { + "epoch": 0.03, + "learning_rate": 4.991636212585566e-05, + "loss": 0.8537, + "step": 26142 + }, + { + "epoch": 0.03, + "learning_rate": 4.99163557060101e-05, + "loss": 1.047, + "step": 26143 + }, + { + "epoch": 0.03, + "learning_rate": 4.991634928591858e-05, + "loss": 0.94, + "step": 26144 + }, + { + "epoch": 0.03, + "learning_rate": 4.99163428655811e-05, + "loss": 1.0826, + "step": 26145 + }, + { + "epoch": 0.03, + "learning_rate": 4.991633644499765e-05, + "loss": 1.0241, + "step": 26146 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916330024168246e-05, + "loss": 1.318, + "step": 26147 + }, + { + "epoch": 0.03, + "learning_rate": 4.991632360309287e-05, + "loss": 1.1236, + "step": 26148 + }, + { + "epoch": 0.03, + "learning_rate": 4.991631718177153e-05, + "loss": 0.5673, + "step": 26149 + }, + { + "epoch": 0.03, + "learning_rate": 4.991631076020423e-05, + "loss": 0.7606, + "step": 26150 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916304338390976e-05, + "loss": 1.5247, + "step": 26151 + }, + { + "epoch": 0.03, + "learning_rate": 4.991629791633174e-05, + "loss": 1.0264, + "step": 26152 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916291494026556e-05, + "loss": 1.2371, + "step": 26153 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916285071475406e-05, + "loss": 0.4817, + "step": 26154 + }, + { + "epoch": 0.03, + "learning_rate": 4.991627864867829e-05, + "loss": 0.7484, + "step": 26155 + }, + { + "epoch": 0.03, + "learning_rate": 4.991627222563521e-05, + "loss": 0.8763, + "step": 26156 + }, + { + "epoch": 0.03, + "learning_rate": 4.991626580234617e-05, + "loss": 0.8567, + "step": 26157 + }, + { + "epoch": 0.03, + "learning_rate": 4.991625937881116e-05, + "loss": 1.2165, + "step": 26158 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916252955030194e-05, + "loss": 1.0351, + "step": 26159 + }, + { + "epoch": 0.03, + "learning_rate": 4.991624653100327e-05, + "loss": 0.5446, + "step": 26160 + }, + { + "epoch": 0.03, + "learning_rate": 4.991624010673037e-05, + "loss": 0.7159, + "step": 26161 + }, + { + "epoch": 0.03, + "learning_rate": 4.991623368221152e-05, + "loss": 0.6068, + "step": 26162 + }, + { + "epoch": 0.03, + "learning_rate": 4.99162272574467e-05, + "loss": 0.4778, + "step": 26163 + }, + { + "epoch": 0.03, + "learning_rate": 4.991622083243592e-05, + "loss": 0.9843, + "step": 26164 + }, + { + "epoch": 0.03, + "learning_rate": 4.991621440717918e-05, + "loss": 0.8191, + "step": 26165 + }, + { + "epoch": 0.03, + "learning_rate": 4.991620798167648e-05, + "loss": 1.2769, + "step": 26166 + }, + { + "epoch": 0.03, + "learning_rate": 4.991620155592781e-05, + "loss": 0.8247, + "step": 26167 + }, + { + "epoch": 0.03, + "learning_rate": 4.991619512993318e-05, + "loss": 0.4417, + "step": 26168 + }, + { + "epoch": 0.03, + "learning_rate": 4.991618870369259e-05, + "loss": 0.2118, + "step": 26169 + }, + { + "epoch": 0.03, + "learning_rate": 4.991618227720604e-05, + "loss": 0.1124, + "step": 26170 + }, + { + "epoch": 0.03, + "learning_rate": 4.991617585047352e-05, + "loss": 0.1774, + "step": 26171 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916169423495044e-05, + "loss": 0.133, + "step": 26172 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916162996270596e-05, + "loss": 0.1695, + "step": 26173 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916156568800196e-05, + "loss": 0.1485, + "step": 26174 + }, + { + "epoch": 0.03, + "learning_rate": 4.991615014108383e-05, + "loss": 0.0928, + "step": 26175 + }, + { + "epoch": 0.03, + "learning_rate": 4.991614371312151e-05, + "loss": 0.1705, + "step": 26176 + }, + { + "epoch": 0.03, + "learning_rate": 4.991613728491322e-05, + "loss": 0.4077, + "step": 26177 + }, + { + "epoch": 0.03, + "learning_rate": 4.991613085645898e-05, + "loss": 0.4945, + "step": 26178 + }, + { + "epoch": 0.03, + "learning_rate": 4.991612442775877e-05, + "loss": 0.5179, + "step": 26179 + }, + { + "epoch": 0.03, + "learning_rate": 4.991611799881259e-05, + "loss": 0.3656, + "step": 26180 + }, + { + "epoch": 0.03, + "learning_rate": 4.991611156962046e-05, + "loss": 0.6605, + "step": 26181 + }, + { + "epoch": 0.03, + "learning_rate": 4.991610514018237e-05, + "loss": 0.5359, + "step": 26182 + }, + { + "epoch": 0.03, + "learning_rate": 4.991609871049831e-05, + "loss": 0.6405, + "step": 26183 + }, + { + "epoch": 0.03, + "learning_rate": 4.991609228056829e-05, + "loss": 0.6068, + "step": 26184 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916085850392314e-05, + "loss": 0.5701, + "step": 26185 + }, + { + "epoch": 0.03, + "learning_rate": 4.991607941997037e-05, + "loss": 0.5832, + "step": 26186 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916072989302476e-05, + "loss": 0.4512, + "step": 26187 + }, + { + "epoch": 0.03, + "learning_rate": 4.991606655838861e-05, + "loss": 0.5943, + "step": 26188 + }, + { + "epoch": 0.03, + "learning_rate": 4.991606012722879e-05, + "loss": 0.4802, + "step": 26189 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916053695823003e-05, + "loss": 0.4161, + "step": 26190 + }, + { + "epoch": 0.03, + "learning_rate": 4.991604726417126e-05, + "loss": 0.3822, + "step": 26191 + }, + { + "epoch": 0.03, + "learning_rate": 4.991604083227356e-05, + "loss": 0.381, + "step": 26192 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916034400129895e-05, + "loss": 0.5058, + "step": 26193 + }, + { + "epoch": 0.03, + "learning_rate": 4.991602796774027e-05, + "loss": 0.5226, + "step": 26194 + }, + { + "epoch": 0.03, + "learning_rate": 4.991602153510469e-05, + "loss": 0.4985, + "step": 26195 + }, + { + "epoch": 0.03, + "learning_rate": 4.991601510222314e-05, + "loss": 0.5016, + "step": 26196 + }, + { + "epoch": 0.03, + "learning_rate": 4.9916008669095635e-05, + "loss": 0.4556, + "step": 26197 + }, + { + "epoch": 0.03, + "learning_rate": 4.991600223572217e-05, + "loss": 0.4792, + "step": 26198 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915995802102736e-05, + "loss": 0.3886, + "step": 26199 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915989368237346e-05, + "loss": 0.4247, + "step": 26200 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915982934126005e-05, + "loss": 0.4942, + "step": 26201 + }, + { + "epoch": 0.03, + "learning_rate": 4.991597649976869e-05, + "loss": 0.4914, + "step": 26202 + }, + { + "epoch": 0.03, + "learning_rate": 4.991597006516542e-05, + "loss": 0.5582, + "step": 26203 + }, + { + "epoch": 0.03, + "learning_rate": 4.99159636303162e-05, + "loss": 0.5923, + "step": 26204 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915957195221006e-05, + "loss": 0.4462, + "step": 26205 + }, + { + "epoch": 0.03, + "learning_rate": 4.991595075987986e-05, + "loss": 0.4232, + "step": 26206 + }, + { + "epoch": 0.03, + "learning_rate": 4.991594432429275e-05, + "loss": 0.4507, + "step": 26207 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915937888459684e-05, + "loss": 0.4294, + "step": 26208 + }, + { + "epoch": 0.03, + "learning_rate": 4.991593145238066e-05, + "loss": 0.4226, + "step": 26209 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915925016055675e-05, + "loss": 0.4418, + "step": 26210 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915918579484726e-05, + "loss": 0.4319, + "step": 26211 + }, + { + "epoch": 0.03, + "learning_rate": 4.991591214266782e-05, + "loss": 0.4723, + "step": 26212 + }, + { + "epoch": 0.03, + "learning_rate": 4.991590570560496e-05, + "loss": 0.3979, + "step": 26213 + }, + { + "epoch": 0.03, + "learning_rate": 4.991589926829613e-05, + "loss": 0.363, + "step": 26214 + }, + { + "epoch": 0.03, + "learning_rate": 4.991589283074135e-05, + "loss": 0.3988, + "step": 26215 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915886392940606e-05, + "loss": 0.3467, + "step": 26216 + }, + { + "epoch": 0.03, + "learning_rate": 4.991587995489391e-05, + "loss": 0.3542, + "step": 26217 + }, + { + "epoch": 0.03, + "learning_rate": 4.991587351660124e-05, + "loss": 0.3993, + "step": 26218 + }, + { + "epoch": 0.03, + "learning_rate": 4.991586707806263e-05, + "loss": 0.4435, + "step": 26219 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915860639278044e-05, + "loss": 0.436, + "step": 26220 + }, + { + "epoch": 0.03, + "learning_rate": 4.991585420024751e-05, + "loss": 0.47, + "step": 26221 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915847760971014e-05, + "loss": 0.5567, + "step": 26222 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915841321448556e-05, + "loss": 0.5489, + "step": 26223 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915834881680146e-05, + "loss": 0.5758, + "step": 26224 + }, + { + "epoch": 0.03, + "learning_rate": 4.991582844166577e-05, + "loss": 0.4574, + "step": 26225 + }, + { + "epoch": 0.03, + "learning_rate": 4.991582200140544e-05, + "loss": 0.4563, + "step": 26226 + }, + { + "epoch": 0.03, + "learning_rate": 4.991581556089915e-05, + "loss": 0.4314, + "step": 26227 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915809120146906e-05, + "loss": 0.43, + "step": 26228 + }, + { + "epoch": 0.03, + "learning_rate": 4.991580267914869e-05, + "loss": 0.3926, + "step": 26229 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915796237904534e-05, + "loss": 0.3889, + "step": 26230 + }, + { + "epoch": 0.03, + "learning_rate": 4.991578979641441e-05, + "loss": 0.4197, + "step": 26231 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915783354678324e-05, + "loss": 0.3731, + "step": 26232 + }, + { + "epoch": 0.03, + "learning_rate": 4.991577691269629e-05, + "loss": 0.3427, + "step": 26233 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915770470468295e-05, + "loss": 0.4582, + "step": 26234 + }, + { + "epoch": 0.03, + "learning_rate": 4.991576402799434e-05, + "loss": 0.5256, + "step": 26235 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915757585274426e-05, + "loss": 0.5136, + "step": 26236 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915751142308555e-05, + "loss": 0.5019, + "step": 26237 + }, + { + "epoch": 0.03, + "learning_rate": 4.991574469909672e-05, + "loss": 0.5632, + "step": 26238 + }, + { + "epoch": 0.03, + "learning_rate": 4.991573825563893e-05, + "loss": 0.5141, + "step": 26239 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915731811935194e-05, + "loss": 0.3972, + "step": 26240 + }, + { + "epoch": 0.03, + "learning_rate": 4.991572536798549e-05, + "loss": 0.4635, + "step": 26241 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915718923789836e-05, + "loss": 0.473, + "step": 26242 + }, + { + "epoch": 0.03, + "learning_rate": 4.991571247934822e-05, + "loss": 0.3901, + "step": 26243 + }, + { + "epoch": 0.03, + "learning_rate": 4.991570603466064e-05, + "loss": 0.4911, + "step": 26244 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915699589727104e-05, + "loss": 0.5677, + "step": 26245 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915693144547624e-05, + "loss": 0.5609, + "step": 26246 + }, + { + "epoch": 0.03, + "learning_rate": 4.991568669912218e-05, + "loss": 0.4634, + "step": 26247 + }, + { + "epoch": 0.03, + "learning_rate": 4.991568025345077e-05, + "loss": 0.3629, + "step": 26248 + }, + { + "epoch": 0.03, + "learning_rate": 4.991567380753341e-05, + "loss": 0.4125, + "step": 26249 + }, + { + "epoch": 0.03, + "learning_rate": 4.991566736137009e-05, + "loss": 0.4555, + "step": 26250 + }, + { + "epoch": 0.03, + "learning_rate": 4.991566091496081e-05, + "loss": 0.4548, + "step": 26251 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915654468305585e-05, + "loss": 0.4108, + "step": 26252 + }, + { + "epoch": 0.03, + "learning_rate": 4.99156480214044e-05, + "loss": 0.4031, + "step": 26253 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915641574257255e-05, + "loss": 0.3731, + "step": 26254 + }, + { + "epoch": 0.03, + "learning_rate": 4.991563512686415e-05, + "loss": 0.3806, + "step": 26255 + }, + { + "epoch": 0.03, + "learning_rate": 4.991562867922509e-05, + "loss": 0.3758, + "step": 26256 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915622231340074e-05, + "loss": 0.4648, + "step": 26257 + }, + { + "epoch": 0.03, + "learning_rate": 4.991561578320911e-05, + "loss": 0.4271, + "step": 26258 + }, + { + "epoch": 0.03, + "learning_rate": 4.991560933483218e-05, + "loss": 0.4682, + "step": 26259 + }, + { + "epoch": 0.03, + "learning_rate": 4.99156028862093e-05, + "loss": 0.5311, + "step": 26260 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915596437340454e-05, + "loss": 0.5002, + "step": 26261 + }, + { + "epoch": 0.03, + "learning_rate": 4.991558998822566e-05, + "loss": 0.4195, + "step": 26262 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915583538864906e-05, + "loss": 0.455, + "step": 26263 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915577089258195e-05, + "loss": 0.5333, + "step": 26264 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915570639405526e-05, + "loss": 0.5352, + "step": 26265 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915564189306905e-05, + "loss": 0.8874, + "step": 26266 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915557738962334e-05, + "loss": 1.104, + "step": 26267 + }, + { + "epoch": 0.03, + "learning_rate": 4.99155512883718e-05, + "loss": 0.9953, + "step": 26268 + }, + { + "epoch": 0.03, + "learning_rate": 4.991554483753531e-05, + "loss": 1.0556, + "step": 26269 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915538386452864e-05, + "loss": 1.2879, + "step": 26270 + }, + { + "epoch": 0.03, + "learning_rate": 4.991553193512447e-05, + "loss": 1.0084, + "step": 26271 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915525483550105e-05, + "loss": 0.7354, + "step": 26272 + }, + { + "epoch": 0.03, + "learning_rate": 4.99155190317298e-05, + "loss": 0.9757, + "step": 26273 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915512579663534e-05, + "loss": 0.886, + "step": 26274 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915506127351305e-05, + "loss": 1.0123, + "step": 26275 + }, + { + "epoch": 0.03, + "learning_rate": 4.991549967479313e-05, + "loss": 1.3076, + "step": 26276 + }, + { + "epoch": 0.03, + "learning_rate": 4.991549322198899e-05, + "loss": 1.104, + "step": 26277 + }, + { + "epoch": 0.03, + "learning_rate": 4.991548676893891e-05, + "loss": 1.0424, + "step": 26278 + }, + { + "epoch": 0.03, + "learning_rate": 4.991548031564286e-05, + "loss": 1.1174, + "step": 26279 + }, + { + "epoch": 0.03, + "learning_rate": 4.991547386210087e-05, + "loss": 1.2435, + "step": 26280 + }, + { + "epoch": 0.03, + "learning_rate": 4.991546740831291e-05, + "loss": 1.1675, + "step": 26281 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915460954279005e-05, + "loss": 1.0732, + "step": 26282 + }, + { + "epoch": 0.03, + "learning_rate": 4.991545449999914e-05, + "loss": 0.8396, + "step": 26283 + }, + { + "epoch": 0.03, + "learning_rate": 4.991544804547332e-05, + "loss": 1.4425, + "step": 26284 + }, + { + "epoch": 0.03, + "learning_rate": 4.991544159070155e-05, + "loss": 0.9062, + "step": 26285 + }, + { + "epoch": 0.03, + "learning_rate": 4.991543513568382e-05, + "loss": 1.0825, + "step": 26286 + }, + { + "epoch": 0.03, + "learning_rate": 4.991542868042014e-05, + "loss": 1.0097, + "step": 26287 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915422224910504e-05, + "loss": 0.9317, + "step": 26288 + }, + { + "epoch": 0.03, + "learning_rate": 4.99154157691549e-05, + "loss": 1.035, + "step": 26289 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915409313153364e-05, + "loss": 1.0449, + "step": 26290 + }, + { + "epoch": 0.03, + "learning_rate": 4.991540285690586e-05, + "loss": 0.9418, + "step": 26291 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915396400412405e-05, + "loss": 0.8218, + "step": 26292 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915389943673e-05, + "loss": 0.9577, + "step": 26293 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915383486687635e-05, + "loss": 0.8339, + "step": 26294 + }, + { + "epoch": 0.03, + "learning_rate": 4.991537702945631e-05, + "loss": 1.0096, + "step": 26295 + }, + { + "epoch": 0.03, + "learning_rate": 4.991537057197904e-05, + "loss": 1.1229, + "step": 26296 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915364114255815e-05, + "loss": 0.9962, + "step": 26297 + }, + { + "epoch": 0.03, + "learning_rate": 4.991535765628664e-05, + "loss": 0.3849, + "step": 26298 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915351198071505e-05, + "loss": 0.2717, + "step": 26299 + }, + { + "epoch": 0.03, + "learning_rate": 4.991534473961041e-05, + "loss": 0.4351, + "step": 26300 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915338280903376e-05, + "loss": 0.2628, + "step": 26301 + }, + { + "epoch": 0.03, + "learning_rate": 4.991533182195038e-05, + "loss": 0.2272, + "step": 26302 + }, + { + "epoch": 0.03, + "learning_rate": 4.991532536275143e-05, + "loss": 0.2606, + "step": 26303 + }, + { + "epoch": 0.03, + "learning_rate": 4.991531890330653e-05, + "loss": 0.1835, + "step": 26304 + }, + { + "epoch": 0.03, + "learning_rate": 4.991531244361567e-05, + "loss": 0.2243, + "step": 26305 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915305983678864e-05, + "loss": 0.1572, + "step": 26306 + }, + { + "epoch": 0.03, + "learning_rate": 4.991529952349609e-05, + "loss": 0.171, + "step": 26307 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915293063067384e-05, + "loss": 0.1675, + "step": 26308 + }, + { + "epoch": 0.03, + "learning_rate": 4.991528660239271e-05, + "loss": 0.8312, + "step": 26309 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915280141472085e-05, + "loss": 1.09, + "step": 26310 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915273680305515e-05, + "loss": 1.16, + "step": 26311 + }, + { + "epoch": 0.03, + "learning_rate": 4.991526721889298e-05, + "loss": 0.7803, + "step": 26312 + }, + { + "epoch": 0.03, + "learning_rate": 4.99152607572345e-05, + "loss": 1.4041, + "step": 26313 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915254295330065e-05, + "loss": 0.9313, + "step": 26314 + }, + { + "epoch": 0.03, + "learning_rate": 4.991524783317968e-05, + "loss": 0.7128, + "step": 26315 + }, + { + "epoch": 0.03, + "learning_rate": 4.991524137078334e-05, + "loss": 0.6648, + "step": 26316 + }, + { + "epoch": 0.03, + "learning_rate": 4.991523490814105e-05, + "loss": 2.4841, + "step": 26317 + }, + { + "epoch": 0.03, + "learning_rate": 4.99152284452528e-05, + "loss": 1.3497, + "step": 26318 + }, + { + "epoch": 0.03, + "learning_rate": 4.99152219821186e-05, + "loss": 0.99, + "step": 26319 + }, + { + "epoch": 0.03, + "learning_rate": 4.991521551873845e-05, + "loss": 0.8246, + "step": 26320 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915209055112345e-05, + "loss": 1.4009, + "step": 26321 + }, + { + "epoch": 0.03, + "learning_rate": 4.991520259124029e-05, + "loss": 0.8341, + "step": 26322 + }, + { + "epoch": 0.03, + "learning_rate": 4.991519612712228e-05, + "loss": 0.9569, + "step": 26323 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915189662758323e-05, + "loss": 0.7435, + "step": 26324 + }, + { + "epoch": 0.03, + "learning_rate": 4.991518319814841e-05, + "loss": 0.615, + "step": 26325 + }, + { + "epoch": 0.03, + "learning_rate": 4.991517673329254e-05, + "loss": 0.9217, + "step": 26326 + }, + { + "epoch": 0.03, + "learning_rate": 4.991517026819073e-05, + "loss": 0.8297, + "step": 26327 + }, + { + "epoch": 0.03, + "learning_rate": 4.991516380284296e-05, + "loss": 0.697, + "step": 26328 + }, + { + "epoch": 0.03, + "learning_rate": 4.991515733724923e-05, + "loss": 0.5306, + "step": 26329 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915150871409555e-05, + "loss": 0.2699, + "step": 26330 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915144405323933e-05, + "loss": 0.6386, + "step": 26331 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915137938992354e-05, + "loss": 0.8911, + "step": 26332 + }, + { + "epoch": 0.03, + "learning_rate": 4.991513147241482e-05, + "loss": 1.2351, + "step": 26333 + }, + { + "epoch": 0.03, + "learning_rate": 4.991512500559135e-05, + "loss": 1.0743, + "step": 26334 + }, + { + "epoch": 0.03, + "learning_rate": 4.991511853852191e-05, + "loss": 0.9627, + "step": 26335 + }, + { + "epoch": 0.03, + "learning_rate": 4.991511207120653e-05, + "loss": 1.0674, + "step": 26336 + }, + { + "epoch": 0.03, + "learning_rate": 4.99151056036452e-05, + "loss": 1.0499, + "step": 26337 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915099135837904e-05, + "loss": 0.5073, + "step": 26338 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915092667784666e-05, + "loss": 0.9845, + "step": 26339 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915086199485476e-05, + "loss": 1.1443, + "step": 26340 + }, + { + "epoch": 0.03, + "learning_rate": 4.991507973094034e-05, + "loss": 0.7054, + "step": 26341 + }, + { + "epoch": 0.03, + "learning_rate": 4.991507326214925e-05, + "loss": 1.1952, + "step": 26342 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915066793112205e-05, + "loss": 1.0224, + "step": 26343 + }, + { + "epoch": 0.03, + "learning_rate": 4.991506032382921e-05, + "loss": 0.9133, + "step": 26344 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915053854300264e-05, + "loss": 1.0111, + "step": 26345 + }, + { + "epoch": 0.03, + "learning_rate": 4.991504738452537e-05, + "loss": 0.9544, + "step": 26346 + }, + { + "epoch": 0.03, + "learning_rate": 4.991504091450452e-05, + "loss": 0.7952, + "step": 26347 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915034444237724e-05, + "loss": 1.3111, + "step": 26348 + }, + { + "epoch": 0.03, + "learning_rate": 4.991502797372498e-05, + "loss": 0.9001, + "step": 26349 + }, + { + "epoch": 0.03, + "learning_rate": 4.991502150296628e-05, + "loss": 1.0702, + "step": 26350 + }, + { + "epoch": 0.03, + "learning_rate": 4.991501503196163e-05, + "loss": 0.8875, + "step": 26351 + }, + { + "epoch": 0.03, + "learning_rate": 4.991500856071103e-05, + "loss": 1.0312, + "step": 26352 + }, + { + "epoch": 0.03, + "learning_rate": 4.991500208921448e-05, + "loss": 0.9989, + "step": 26353 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914995617471974e-05, + "loss": 0.8885, + "step": 26354 + }, + { + "epoch": 0.03, + "learning_rate": 4.991498914548353e-05, + "loss": 0.8667, + "step": 26355 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914982673249125e-05, + "loss": 0.6203, + "step": 26356 + }, + { + "epoch": 0.03, + "learning_rate": 4.991497620076877e-05, + "loss": 0.2184, + "step": 26357 + }, + { + "epoch": 0.03, + "learning_rate": 4.991496972804247e-05, + "loss": 0.2155, + "step": 26358 + }, + { + "epoch": 0.03, + "learning_rate": 4.991496325507022e-05, + "loss": 0.2879, + "step": 26359 + }, + { + "epoch": 0.03, + "learning_rate": 4.991495678185202e-05, + "loss": 0.9102, + "step": 26360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914950308387866e-05, + "loss": 0.5272, + "step": 26361 + }, + { + "epoch": 0.03, + "learning_rate": 4.991494383467776e-05, + "loss": 0.547, + "step": 26362 + }, + { + "epoch": 0.03, + "learning_rate": 4.991493736072171e-05, + "loss": 0.9327, + "step": 26363 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914930886519706e-05, + "loss": 1.3136, + "step": 26364 + }, + { + "epoch": 0.03, + "learning_rate": 4.991492441207176e-05, + "loss": 0.9932, + "step": 26365 + }, + { + "epoch": 0.03, + "learning_rate": 4.991491793737786e-05, + "loss": 1.0904, + "step": 26366 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914911462438006e-05, + "loss": 1.0791, + "step": 26367 + }, + { + "epoch": 0.03, + "learning_rate": 4.991490498725221e-05, + "loss": 1.0038, + "step": 26368 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914898511820465e-05, + "loss": 0.5293, + "step": 26369 + }, + { + "epoch": 0.03, + "learning_rate": 4.991489203614276e-05, + "loss": 0.9861, + "step": 26370 + }, + { + "epoch": 0.03, + "learning_rate": 4.991488556021912e-05, + "loss": 0.8856, + "step": 26371 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914879084049515e-05, + "loss": 0.9358, + "step": 26372 + }, + { + "epoch": 0.03, + "learning_rate": 4.991487260763397e-05, + "loss": 1.2281, + "step": 26373 + }, + { + "epoch": 0.03, + "learning_rate": 4.991486613097247e-05, + "loss": 1.2161, + "step": 26374 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914859654065025e-05, + "loss": 0.9964, + "step": 26375 + }, + { + "epoch": 0.03, + "learning_rate": 4.991485317691164e-05, + "loss": 0.9988, + "step": 26376 + }, + { + "epoch": 0.03, + "learning_rate": 4.99148466995123e-05, + "loss": 0.9529, + "step": 26377 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914840221867e-05, + "loss": 1.1156, + "step": 26378 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914833743975765e-05, + "loss": 0.9664, + "step": 26379 + }, + { + "epoch": 0.03, + "learning_rate": 4.991482726583857e-05, + "loss": 0.9096, + "step": 26380 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914820787455435e-05, + "loss": 0.8037, + "step": 26381 + }, + { + "epoch": 0.03, + "learning_rate": 4.991481430882635e-05, + "loss": 1.1242, + "step": 26382 + }, + { + "epoch": 0.03, + "learning_rate": 4.991480782995131e-05, + "loss": 0.8462, + "step": 26383 + }, + { + "epoch": 0.03, + "learning_rate": 4.991480135083033e-05, + "loss": 1.0812, + "step": 26384 + }, + { + "epoch": 0.03, + "learning_rate": 4.99147948714634e-05, + "loss": 0.8729, + "step": 26385 + }, + { + "epoch": 0.03, + "learning_rate": 4.991478839185052e-05, + "loss": 0.9842, + "step": 26386 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914781911991694e-05, + "loss": 0.6173, + "step": 26387 + }, + { + "epoch": 0.03, + "learning_rate": 4.991477543188692e-05, + "loss": 0.9025, + "step": 26388 + }, + { + "epoch": 0.03, + "learning_rate": 4.991476895153619e-05, + "loss": 1.1055, + "step": 26389 + }, + { + "epoch": 0.03, + "learning_rate": 4.991476247093951e-05, + "loss": 1.3985, + "step": 26390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914755990096896e-05, + "loss": 1.0423, + "step": 26391 + }, + { + "epoch": 0.03, + "learning_rate": 4.991474950900833e-05, + "loss": 1.0021, + "step": 26392 + }, + { + "epoch": 0.03, + "learning_rate": 4.991474302767381e-05, + "loss": 1.264, + "step": 26393 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914736546093346e-05, + "loss": 1.1002, + "step": 26394 + }, + { + "epoch": 0.03, + "learning_rate": 4.991473006426694e-05, + "loss": 1.0632, + "step": 26395 + }, + { + "epoch": 0.03, + "learning_rate": 4.991472358219457e-05, + "loss": 0.9114, + "step": 26396 + }, + { + "epoch": 0.03, + "learning_rate": 4.991471709987626e-05, + "loss": 1.0777, + "step": 26397 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914710617312014e-05, + "loss": 0.7861, + "step": 26398 + }, + { + "epoch": 0.03, + "learning_rate": 4.991470413450181e-05, + "loss": 1.4321, + "step": 26399 + }, + { + "epoch": 0.03, + "learning_rate": 4.991469765144566e-05, + "loss": 0.8867, + "step": 26400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914691168143555e-05, + "loss": 0.9937, + "step": 26401 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914684684595516e-05, + "loss": 0.6895, + "step": 26402 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914678200801525e-05, + "loss": 0.5806, + "step": 26403 + }, + { + "epoch": 0.03, + "learning_rate": 4.991467171676158e-05, + "loss": 0.6155, + "step": 26404 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914665232475696e-05, + "loss": 0.9866, + "step": 26405 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914658747943865e-05, + "loss": 0.768, + "step": 26406 + }, + { + "epoch": 0.03, + "learning_rate": 4.991465226316608e-05, + "loss": 1.1548, + "step": 26407 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914645778142356e-05, + "loss": 1.0235, + "step": 26408 + }, + { + "epoch": 0.03, + "learning_rate": 4.991463929287268e-05, + "loss": 1.4975, + "step": 26409 + }, + { + "epoch": 0.03, + "learning_rate": 4.991463280735706e-05, + "loss": 0.9725, + "step": 26410 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914626321595495e-05, + "loss": 0.7657, + "step": 26411 + }, + { + "epoch": 0.03, + "learning_rate": 4.991461983558797e-05, + "loss": 0.9486, + "step": 26412 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914613349334514e-05, + "loss": 1.0024, + "step": 26413 + }, + { + "epoch": 0.03, + "learning_rate": 4.991460686283511e-05, + "loss": 0.7183, + "step": 26414 + }, + { + "epoch": 0.03, + "learning_rate": 4.991460037608975e-05, + "loss": 0.7768, + "step": 26415 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914593889098446e-05, + "loss": 0.9217, + "step": 26416 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914587401861205e-05, + "loss": 0.8991, + "step": 26417 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914580914378006e-05, + "loss": 0.8451, + "step": 26418 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914574426648876e-05, + "loss": 1.1295, + "step": 26419 + }, + { + "epoch": 0.03, + "learning_rate": 4.991456793867378e-05, + "loss": 0.6534, + "step": 26420 + }, + { + "epoch": 0.03, + "learning_rate": 4.991456145045275e-05, + "loss": 0.8426, + "step": 26421 + }, + { + "epoch": 0.03, + "learning_rate": 4.991455496198577e-05, + "loss": 1.4677, + "step": 26422 + }, + { + "epoch": 0.03, + "learning_rate": 4.991454847327285e-05, + "loss": 1.0968, + "step": 26423 + }, + { + "epoch": 0.03, + "learning_rate": 4.991454198431398e-05, + "loss": 1.138, + "step": 26424 + }, + { + "epoch": 0.03, + "learning_rate": 4.991453549510916e-05, + "loss": 1.223, + "step": 26425 + }, + { + "epoch": 0.03, + "learning_rate": 4.991452900565841e-05, + "loss": 0.9099, + "step": 26426 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914522515961695e-05, + "loss": 0.9461, + "step": 26427 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914516026019045e-05, + "loss": 0.9793, + "step": 26428 + }, + { + "epoch": 0.03, + "learning_rate": 4.991450953583044e-05, + "loss": 1.0168, + "step": 26429 + }, + { + "epoch": 0.03, + "learning_rate": 4.99145030453959e-05, + "loss": 0.9299, + "step": 26430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914496554715406e-05, + "loss": 1.1653, + "step": 26431 + }, + { + "epoch": 0.03, + "learning_rate": 4.991449006378898e-05, + "loss": 1.3132, + "step": 26432 + }, + { + "epoch": 0.03, + "learning_rate": 4.991448357261659e-05, + "loss": 1.004, + "step": 26433 + }, + { + "epoch": 0.03, + "learning_rate": 4.991447708119826e-05, + "loss": 0.6788, + "step": 26434 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914470589534e-05, + "loss": 1.3737, + "step": 26435 + }, + { + "epoch": 0.03, + "learning_rate": 4.991446409762378e-05, + "loss": 1.2934, + "step": 26436 + }, + { + "epoch": 0.03, + "learning_rate": 4.991445760546762e-05, + "loss": 1.1547, + "step": 26437 + }, + { + "epoch": 0.03, + "learning_rate": 4.991445111306551e-05, + "loss": 0.6727, + "step": 26438 + }, + { + "epoch": 0.03, + "learning_rate": 4.991444462041746e-05, + "loss": 1.095, + "step": 26439 + }, + { + "epoch": 0.03, + "learning_rate": 4.991443812752347e-05, + "loss": 0.7684, + "step": 26440 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914431634383526e-05, + "loss": 1.1079, + "step": 26441 + }, + { + "epoch": 0.03, + "learning_rate": 4.991442514099764e-05, + "loss": 0.9026, + "step": 26442 + }, + { + "epoch": 0.03, + "learning_rate": 4.991441864736581e-05, + "loss": 0.9026, + "step": 26443 + }, + { + "epoch": 0.03, + "learning_rate": 4.991441215348803e-05, + "loss": 1.2051, + "step": 26444 + }, + { + "epoch": 0.03, + "learning_rate": 4.991440565936432e-05, + "loss": 0.9586, + "step": 26445 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914399164994654e-05, + "loss": 0.7224, + "step": 26446 + }, + { + "epoch": 0.03, + "learning_rate": 4.991439267037904e-05, + "loss": 0.6418, + "step": 26447 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914386175517484e-05, + "loss": 0.9031, + "step": 26448 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914379680409986e-05, + "loss": 0.7415, + "step": 26449 + }, + { + "epoch": 0.03, + "learning_rate": 4.991437318505655e-05, + "loss": 1.1432, + "step": 26450 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914366689457156e-05, + "loss": 0.8822, + "step": 26451 + }, + { + "epoch": 0.03, + "learning_rate": 4.991436019361183e-05, + "loss": 1.0814, + "step": 26452 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914353697520555e-05, + "loss": 1.0252, + "step": 26453 + }, + { + "epoch": 0.03, + "learning_rate": 4.991434720118334e-05, + "loss": 1.0778, + "step": 26454 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914340704600176e-05, + "loss": 1.1994, + "step": 26455 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914334207771074e-05, + "loss": 1.0815, + "step": 26456 + }, + { + "epoch": 0.03, + "learning_rate": 4.991432771069602e-05, + "loss": 1.1064, + "step": 26457 + }, + { + "epoch": 0.03, + "learning_rate": 4.991432121337503e-05, + "loss": 0.8877, + "step": 26458 + }, + { + "epoch": 0.03, + "learning_rate": 4.991431471580809e-05, + "loss": 0.6061, + "step": 26459 + }, + { + "epoch": 0.03, + "learning_rate": 4.991430821799521e-05, + "loss": 0.9613, + "step": 26460 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914301719936386e-05, + "loss": 1.0432, + "step": 26461 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914295221631616e-05, + "loss": 1.0025, + "step": 26462 + }, + { + "epoch": 0.03, + "learning_rate": 4.99142887230809e-05, + "loss": 0.6768, + "step": 26463 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914282224284257e-05, + "loss": 1.5083, + "step": 26464 + }, + { + "epoch": 0.03, + "learning_rate": 4.991427572524165e-05, + "loss": 1.1781, + "step": 26465 + }, + { + "epoch": 0.03, + "learning_rate": 4.991426922595311e-05, + "loss": 1.2936, + "step": 26466 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914262726418634e-05, + "loss": 1.0814, + "step": 26467 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914256226638204e-05, + "loss": 1.0378, + "step": 26468 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914249726611836e-05, + "loss": 1.0801, + "step": 26469 + }, + { + "epoch": 0.03, + "learning_rate": 4.991424322633952e-05, + "loss": 0.9379, + "step": 26470 + }, + { + "epoch": 0.03, + "learning_rate": 4.991423672582126e-05, + "loss": 0.8743, + "step": 26471 + }, + { + "epoch": 0.03, + "learning_rate": 4.991423022505707e-05, + "loss": 0.9897, + "step": 26472 + }, + { + "epoch": 0.03, + "learning_rate": 4.991422372404692e-05, + "loss": 0.8483, + "step": 26473 + }, + { + "epoch": 0.03, + "learning_rate": 4.991421722279084e-05, + "loss": 0.8323, + "step": 26474 + }, + { + "epoch": 0.03, + "learning_rate": 4.991421072128881e-05, + "loss": 1.0691, + "step": 26475 + }, + { + "epoch": 0.03, + "learning_rate": 4.991420421954084e-05, + "loss": 0.6878, + "step": 26476 + }, + { + "epoch": 0.03, + "learning_rate": 4.991419771754693e-05, + "loss": 0.8261, + "step": 26477 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914191215307074e-05, + "loss": 0.6114, + "step": 26478 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914184712821275e-05, + "loss": 0.5325, + "step": 26479 + }, + { + "epoch": 0.03, + "learning_rate": 4.991417821008954e-05, + "loss": 0.3695, + "step": 26480 + }, + { + "epoch": 0.03, + "learning_rate": 4.991417170711186e-05, + "loss": 1.0279, + "step": 26481 + }, + { + "epoch": 0.03, + "learning_rate": 4.991416520388823e-05, + "loss": 0.9288, + "step": 26482 + }, + { + "epoch": 0.03, + "learning_rate": 4.991415870041867e-05, + "loss": 1.0958, + "step": 26483 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914152196703154e-05, + "loss": 0.9976, + "step": 26484 + }, + { + "epoch": 0.03, + "learning_rate": 4.991414569274171e-05, + "loss": 0.9787, + "step": 26485 + }, + { + "epoch": 0.03, + "learning_rate": 4.991413918853431e-05, + "loss": 1.2108, + "step": 26486 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914132684080984e-05, + "loss": 1.0233, + "step": 26487 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914126179381705e-05, + "loss": 0.864, + "step": 26488 + }, + { + "epoch": 0.03, + "learning_rate": 4.991411967443649e-05, + "loss": 0.917, + "step": 26489 + }, + { + "epoch": 0.03, + "learning_rate": 4.991411316924533e-05, + "loss": 1.0737, + "step": 26490 + }, + { + "epoch": 0.03, + "learning_rate": 4.991410666380822e-05, + "loss": 0.8926, + "step": 26491 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914100158125185e-05, + "loss": 1.0618, + "step": 26492 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914093652196204e-05, + "loss": 0.8548, + "step": 26493 + }, + { + "epoch": 0.03, + "learning_rate": 4.991408714602127e-05, + "loss": 0.9595, + "step": 26494 + }, + { + "epoch": 0.03, + "learning_rate": 4.991408063960041e-05, + "loss": 0.9115, + "step": 26495 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914074132933594e-05, + "loss": 1.0515, + "step": 26496 + }, + { + "epoch": 0.03, + "learning_rate": 4.991406762602085e-05, + "loss": 0.8174, + "step": 26497 + }, + { + "epoch": 0.03, + "learning_rate": 4.991406111886216e-05, + "loss": 0.9635, + "step": 26498 + }, + { + "epoch": 0.03, + "learning_rate": 4.991405461145753e-05, + "loss": 1.0582, + "step": 26499 + }, + { + "epoch": 0.03, + "learning_rate": 4.991404810380695e-05, + "loss": 1.3509, + "step": 26500 + }, + { + "epoch": 0.03, + "eval_loss": 1.059253454208374, + "eval_runtime": 126.5882, + "eval_samples_per_second": 10.941, + "eval_steps_per_second": 5.474, + "step": 26500 + }, + { + "epoch": 0.03, + "learning_rate": 4.991404159591044e-05, + "loss": 1.3138, + "step": 26501 + }, + { + "epoch": 0.03, + "learning_rate": 4.991403508776799e-05, + "loss": 1.3593, + "step": 26502 + }, + { + "epoch": 0.03, + "learning_rate": 4.991402857937959e-05, + "loss": 1.3163, + "step": 26503 + }, + { + "epoch": 0.03, + "learning_rate": 4.991402207074526e-05, + "loss": 1.1691, + "step": 26504 + }, + { + "epoch": 0.03, + "learning_rate": 4.991401556186498e-05, + "loss": 1.0829, + "step": 26505 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914009052738766e-05, + "loss": 0.9203, + "step": 26506 + }, + { + "epoch": 0.03, + "learning_rate": 4.99140025433666e-05, + "loss": 1.3035, + "step": 26507 + }, + { + "epoch": 0.03, + "learning_rate": 4.99139960337485e-05, + "loss": 1.0156, + "step": 26508 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913989523884464e-05, + "loss": 0.7801, + "step": 26509 + }, + { + "epoch": 0.03, + "learning_rate": 4.991398301377449e-05, + "loss": 1.0427, + "step": 26510 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913976503418567e-05, + "loss": 0.6046, + "step": 26511 + }, + { + "epoch": 0.03, + "learning_rate": 4.99139699928167e-05, + "loss": 1.2639, + "step": 26512 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913963481968905e-05, + "loss": 0.8549, + "step": 26513 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913956970875164e-05, + "loss": 0.8926, + "step": 26514 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913950459535486e-05, + "loss": 1.1237, + "step": 26515 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913943947949856e-05, + "loss": 1.329, + "step": 26516 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913937436118296e-05, + "loss": 1.0363, + "step": 26517 + }, + { + "epoch": 0.03, + "learning_rate": 4.99139309240408e-05, + "loss": 0.9725, + "step": 26518 + }, + { + "epoch": 0.03, + "learning_rate": 4.991392441171736e-05, + "loss": 1.0001, + "step": 26519 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913917899147974e-05, + "loss": 1.0782, + "step": 26520 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913911386332656e-05, + "loss": 1.1654, + "step": 26521 + }, + { + "epoch": 0.03, + "learning_rate": 4.99139048732714e-05, + "loss": 0.8433, + "step": 26522 + }, + { + "epoch": 0.03, + "learning_rate": 4.99138983599642e-05, + "loss": 0.8733, + "step": 26523 + }, + { + "epoch": 0.03, + "learning_rate": 4.991389184641107e-05, + "loss": 0.6547, + "step": 26524 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913885332611985e-05, + "loss": 0.8079, + "step": 26525 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913878818566965e-05, + "loss": 0.6542, + "step": 26526 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913872304276014e-05, + "loss": 0.5413, + "step": 26527 + }, + { + "epoch": 0.03, + "learning_rate": 4.991386578973911e-05, + "loss": 1.0732, + "step": 26528 + }, + { + "epoch": 0.03, + "learning_rate": 4.991385927495628e-05, + "loss": 0.6916, + "step": 26529 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913852759927506e-05, + "loss": 0.7581, + "step": 26530 + }, + { + "epoch": 0.03, + "learning_rate": 4.991384624465279e-05, + "loss": 0.5425, + "step": 26531 + }, + { + "epoch": 0.03, + "learning_rate": 4.991383972913214e-05, + "loss": 0.8617, + "step": 26532 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913833213365546e-05, + "loss": 0.9565, + "step": 26533 + }, + { + "epoch": 0.03, + "learning_rate": 4.991382669735302e-05, + "loss": 1.348, + "step": 26534 + }, + { + "epoch": 0.03, + "learning_rate": 4.991382018109455e-05, + "loss": 1.1681, + "step": 26535 + }, + { + "epoch": 0.03, + "learning_rate": 4.991381366459014e-05, + "loss": 0.9598, + "step": 26536 + }, + { + "epoch": 0.03, + "learning_rate": 4.99138071478398e-05, + "loss": 0.9269, + "step": 26537 + }, + { + "epoch": 0.03, + "learning_rate": 4.991380063084351e-05, + "loss": 0.6442, + "step": 26538 + }, + { + "epoch": 0.03, + "learning_rate": 4.991379411360129e-05, + "loss": 0.5665, + "step": 26539 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913787596113126e-05, + "loss": 0.7593, + "step": 26540 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913781078379026e-05, + "loss": 0.6642, + "step": 26541 + }, + { + "epoch": 0.03, + "learning_rate": 4.991377456039898e-05, + "loss": 0.8075, + "step": 26542 + }, + { + "epoch": 0.03, + "learning_rate": 4.991376804217301e-05, + "loss": 0.9759, + "step": 26543 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913761523701094e-05, + "loss": 1.0537, + "step": 26544 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913755004983244e-05, + "loss": 0.8617, + "step": 26545 + }, + { + "epoch": 0.03, + "learning_rate": 4.991374848601945e-05, + "loss": 0.9665, + "step": 26546 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913741966809716e-05, + "loss": 1.0809, + "step": 26547 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913735447354046e-05, + "loss": 1.2464, + "step": 26548 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913728927652445e-05, + "loss": 1.1587, + "step": 26549 + }, + { + "epoch": 0.03, + "learning_rate": 4.99137224077049e-05, + "loss": 1.0142, + "step": 26550 + }, + { + "epoch": 0.03, + "learning_rate": 4.991371588751142e-05, + "loss": 1.3479, + "step": 26551 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913709367072e-05, + "loss": 1.1529, + "step": 26552 + }, + { + "epoch": 0.03, + "learning_rate": 4.991370284638664e-05, + "loss": 0.8997, + "step": 26553 + }, + { + "epoch": 0.03, + "learning_rate": 4.991369632545535e-05, + "loss": 0.9531, + "step": 26554 + }, + { + "epoch": 0.03, + "learning_rate": 4.991368980427812e-05, + "loss": 0.9492, + "step": 26555 + }, + { + "epoch": 0.03, + "learning_rate": 4.991368328285495e-05, + "loss": 0.6766, + "step": 26556 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913676761185843e-05, + "loss": 1.1456, + "step": 26557 + }, + { + "epoch": 0.03, + "learning_rate": 4.99136702392708e-05, + "loss": 1.0605, + "step": 26558 + }, + { + "epoch": 0.03, + "learning_rate": 4.991366371710982e-05, + "loss": 0.7864, + "step": 26559 + }, + { + "epoch": 0.03, + "learning_rate": 4.99136571947029e-05, + "loss": 1.0016, + "step": 26560 + }, + { + "epoch": 0.03, + "learning_rate": 4.991365067205005e-05, + "loss": 0.6241, + "step": 26561 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913644149151256e-05, + "loss": 0.8148, + "step": 26562 + }, + { + "epoch": 0.03, + "learning_rate": 4.991363762600653e-05, + "loss": 1.8407, + "step": 26563 + }, + { + "epoch": 0.03, + "learning_rate": 4.991363110261586e-05, + "loss": 1.0944, + "step": 26564 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913624578979256e-05, + "loss": 0.8532, + "step": 26565 + }, + { + "epoch": 0.03, + "learning_rate": 4.991361805509672e-05, + "loss": 1.024, + "step": 26566 + }, + { + "epoch": 0.03, + "learning_rate": 4.991361153096824e-05, + "loss": 0.8708, + "step": 26567 + }, + { + "epoch": 0.03, + "learning_rate": 4.991360500659383e-05, + "loss": 0.738, + "step": 26568 + }, + { + "epoch": 0.03, + "learning_rate": 4.991359848197348e-05, + "loss": 1.0545, + "step": 26569 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913591957107194e-05, + "loss": 0.931, + "step": 26570 + }, + { + "epoch": 0.03, + "learning_rate": 4.991358543199497e-05, + "loss": 0.8857, + "step": 26571 + }, + { + "epoch": 0.03, + "learning_rate": 4.991357890663681e-05, + "loss": 1.04, + "step": 26572 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913572381032716e-05, + "loss": 1.0223, + "step": 26573 + }, + { + "epoch": 0.03, + "learning_rate": 4.991356585518268e-05, + "loss": 0.71, + "step": 26574 + }, + { + "epoch": 0.03, + "learning_rate": 4.991355932908671e-05, + "loss": 1.1412, + "step": 26575 + }, + { + "epoch": 0.03, + "learning_rate": 4.991355280274481e-05, + "loss": 0.8846, + "step": 26576 + }, + { + "epoch": 0.03, + "learning_rate": 4.991354627615697e-05, + "loss": 0.9287, + "step": 26577 + }, + { + "epoch": 0.03, + "learning_rate": 4.991353974932319e-05, + "loss": 0.8072, + "step": 26578 + }, + { + "epoch": 0.03, + "learning_rate": 4.991353322224348e-05, + "loss": 1.0013, + "step": 26579 + }, + { + "epoch": 0.03, + "learning_rate": 4.991352669491783e-05, + "loss": 0.8616, + "step": 26580 + }, + { + "epoch": 0.03, + "learning_rate": 4.991352016734625e-05, + "loss": 1.2211, + "step": 26581 + }, + { + "epoch": 0.03, + "learning_rate": 4.991351363952873e-05, + "loss": 0.8255, + "step": 26582 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913507111465275e-05, + "loss": 0.9199, + "step": 26583 + }, + { + "epoch": 0.03, + "learning_rate": 4.991350058315588e-05, + "loss": 1.0177, + "step": 26584 + }, + { + "epoch": 0.03, + "learning_rate": 4.991349405460055e-05, + "loss": 0.8242, + "step": 26585 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913487525799294e-05, + "loss": 0.9365, + "step": 26586 + }, + { + "epoch": 0.03, + "learning_rate": 4.991348099675209e-05, + "loss": 0.8892, + "step": 26587 + }, + { + "epoch": 0.03, + "learning_rate": 4.991347446745896e-05, + "loss": 1.1601, + "step": 26588 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913467937919886e-05, + "loss": 1.0765, + "step": 26589 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913461408134886e-05, + "loss": 1.088, + "step": 26590 + }, + { + "epoch": 0.03, + "learning_rate": 4.991345487810395e-05, + "loss": 0.626, + "step": 26591 + }, + { + "epoch": 0.03, + "learning_rate": 4.991344834782707e-05, + "loss": 0.9148, + "step": 26592 + }, + { + "epoch": 0.03, + "learning_rate": 4.991344181730426e-05, + "loss": 1.3591, + "step": 26593 + }, + { + "epoch": 0.03, + "learning_rate": 4.991343528653552e-05, + "loss": 1.1861, + "step": 26594 + }, + { + "epoch": 0.03, + "learning_rate": 4.991342875552084e-05, + "loss": 1.084, + "step": 26595 + }, + { + "epoch": 0.03, + "learning_rate": 4.991342222426023e-05, + "loss": 0.7684, + "step": 26596 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913415692753685e-05, + "loss": 0.5724, + "step": 26597 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913409161001204e-05, + "loss": 0.9024, + "step": 26598 + }, + { + "epoch": 0.03, + "learning_rate": 4.991340262900278e-05, + "loss": 0.9817, + "step": 26599 + }, + { + "epoch": 0.03, + "learning_rate": 4.991339609675842e-05, + "loss": 1.0247, + "step": 26600 + }, + { + "epoch": 0.03, + "learning_rate": 4.991338956426814e-05, + "loss": 1.11, + "step": 26601 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913383031531915e-05, + "loss": 1.07, + "step": 26602 + }, + { + "epoch": 0.03, + "learning_rate": 4.991337649854976e-05, + "loss": 0.4502, + "step": 26603 + }, + { + "epoch": 0.03, + "learning_rate": 4.991336996532167e-05, + "loss": 1.0268, + "step": 26604 + }, + { + "epoch": 0.03, + "learning_rate": 4.991336343184765e-05, + "loss": 0.9461, + "step": 26605 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913356898127684e-05, + "loss": 1.0942, + "step": 26606 + }, + { + "epoch": 0.03, + "learning_rate": 4.991335036416179e-05, + "loss": 1.1404, + "step": 26607 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913343829949966e-05, + "loss": 1.3175, + "step": 26608 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913337295492204e-05, + "loss": 1.0565, + "step": 26609 + }, + { + "epoch": 0.03, + "learning_rate": 4.991333076078851e-05, + "loss": 1.235, + "step": 26610 + }, + { + "epoch": 0.03, + "learning_rate": 4.991332422583888e-05, + "loss": 1.1696, + "step": 26611 + }, + { + "epoch": 0.03, + "learning_rate": 4.991331769064331e-05, + "loss": 1.0902, + "step": 26612 + }, + { + "epoch": 0.03, + "learning_rate": 4.991331115520182e-05, + "loss": 0.9244, + "step": 26613 + }, + { + "epoch": 0.03, + "learning_rate": 4.991330461951439e-05, + "loss": 0.8775, + "step": 26614 + }, + { + "epoch": 0.03, + "learning_rate": 4.991329808358103e-05, + "loss": 1.6401, + "step": 26615 + }, + { + "epoch": 0.03, + "learning_rate": 4.991329154740173e-05, + "loss": 1.6216, + "step": 26616 + }, + { + "epoch": 0.03, + "learning_rate": 4.99132850109765e-05, + "loss": 1.0087, + "step": 26617 + }, + { + "epoch": 0.03, + "learning_rate": 4.991327847430533e-05, + "loss": 0.8009, + "step": 26618 + }, + { + "epoch": 0.03, + "learning_rate": 4.991327193738823e-05, + "loss": 0.9941, + "step": 26619 + }, + { + "epoch": 0.03, + "learning_rate": 4.99132654002252e-05, + "loss": 0.9688, + "step": 26620 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913258862816234e-05, + "loss": 0.739, + "step": 26621 + }, + { + "epoch": 0.03, + "learning_rate": 4.991325232516134e-05, + "loss": 1.0117, + "step": 26622 + }, + { + "epoch": 0.03, + "learning_rate": 4.991324578726051e-05, + "loss": 1.0001, + "step": 26623 + }, + { + "epoch": 0.03, + "learning_rate": 4.991323924911374e-05, + "loss": 0.961, + "step": 26624 + }, + { + "epoch": 0.03, + "learning_rate": 4.991323271072105e-05, + "loss": 0.9612, + "step": 26625 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913226172082414e-05, + "loss": 0.9412, + "step": 26626 + }, + { + "epoch": 0.03, + "learning_rate": 4.991321963319785e-05, + "loss": 0.9391, + "step": 26627 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913213094067356e-05, + "loss": 0.9277, + "step": 26628 + }, + { + "epoch": 0.03, + "learning_rate": 4.991320655469093e-05, + "loss": 0.9147, + "step": 26629 + }, + { + "epoch": 0.03, + "learning_rate": 4.991320001506857e-05, + "loss": 1.047, + "step": 26630 + }, + { + "epoch": 0.03, + "learning_rate": 4.991319347520027e-05, + "loss": 0.9517, + "step": 26631 + }, + { + "epoch": 0.03, + "learning_rate": 4.991318693508605e-05, + "loss": 0.9245, + "step": 26632 + }, + { + "epoch": 0.03, + "learning_rate": 4.991318039472589e-05, + "loss": 1.1425, + "step": 26633 + }, + { + "epoch": 0.03, + "learning_rate": 4.99131738541198e-05, + "loss": 1.1568, + "step": 26634 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913167313267775e-05, + "loss": 0.9222, + "step": 26635 + }, + { + "epoch": 0.03, + "learning_rate": 4.991316077216982e-05, + "loss": 0.9955, + "step": 26636 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913154230825934e-05, + "loss": 1.0849, + "step": 26637 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913147689236107e-05, + "loss": 0.8867, + "step": 26638 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913141147400355e-05, + "loss": 0.6078, + "step": 26639 + }, + { + "epoch": 0.03, + "learning_rate": 4.991313460531867e-05, + "loss": 0.6905, + "step": 26640 + }, + { + "epoch": 0.03, + "learning_rate": 4.991312806299105e-05, + "loss": 1.0155, + "step": 26641 + }, + { + "epoch": 0.03, + "learning_rate": 4.99131215204175e-05, + "loss": 0.7934, + "step": 26642 + }, + { + "epoch": 0.03, + "learning_rate": 4.991311497759803e-05, + "loss": 1.1019, + "step": 26643 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913108434532615e-05, + "loss": 0.9067, + "step": 26644 + }, + { + "epoch": 0.03, + "learning_rate": 4.991310189122127e-05, + "loss": 0.7873, + "step": 26645 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913095347664e-05, + "loss": 0.8926, + "step": 26646 + }, + { + "epoch": 0.03, + "learning_rate": 4.991308880386079e-05, + "loss": 0.9431, + "step": 26647 + }, + { + "epoch": 0.03, + "learning_rate": 4.991308225981165e-05, + "loss": 0.7589, + "step": 26648 + }, + { + "epoch": 0.03, + "learning_rate": 4.991307571551658e-05, + "loss": 0.8291, + "step": 26649 + }, + { + "epoch": 0.03, + "learning_rate": 4.991306917097558e-05, + "loss": 0.9818, + "step": 26650 + }, + { + "epoch": 0.03, + "learning_rate": 4.991306262618865e-05, + "loss": 1.209, + "step": 26651 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913056081155786e-05, + "loss": 0.9561, + "step": 26652 + }, + { + "epoch": 0.03, + "learning_rate": 4.991304953587699e-05, + "loss": 0.9088, + "step": 26653 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913042990352267e-05, + "loss": 0.6468, + "step": 26654 + }, + { + "epoch": 0.03, + "learning_rate": 4.991303644458161e-05, + "loss": 1.0286, + "step": 26655 + }, + { + "epoch": 0.03, + "learning_rate": 4.991302989856502e-05, + "loss": 1.1467, + "step": 26656 + }, + { + "epoch": 0.03, + "learning_rate": 4.99130233523025e-05, + "loss": 1.0052, + "step": 26657 + }, + { + "epoch": 0.03, + "learning_rate": 4.991301680579405e-05, + "loss": 1.3352, + "step": 26658 + }, + { + "epoch": 0.03, + "learning_rate": 4.991301025903967e-05, + "loss": 1.2541, + "step": 26659 + }, + { + "epoch": 0.03, + "learning_rate": 4.991300371203936e-05, + "loss": 1.2058, + "step": 26660 + }, + { + "epoch": 0.03, + "learning_rate": 4.991299716479312e-05, + "loss": 1.0115, + "step": 26661 + }, + { + "epoch": 0.03, + "learning_rate": 4.991299061730095e-05, + "loss": 0.9362, + "step": 26662 + }, + { + "epoch": 0.03, + "learning_rate": 4.991298406956284e-05, + "loss": 0.834, + "step": 26663 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912977521578804e-05, + "loss": 1.0199, + "step": 26664 + }, + { + "epoch": 0.03, + "learning_rate": 4.991297097334884e-05, + "loss": 1.1484, + "step": 26665 + }, + { + "epoch": 0.03, + "learning_rate": 4.991296442487295e-05, + "loss": 0.9857, + "step": 26666 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912957876151125e-05, + "loss": 1.0808, + "step": 26667 + }, + { + "epoch": 0.03, + "learning_rate": 4.991295132718336e-05, + "loss": 1.1592, + "step": 26668 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912944777969684e-05, + "loss": 0.9563, + "step": 26669 + }, + { + "epoch": 0.03, + "learning_rate": 4.991293822851007e-05, + "loss": 0.5057, + "step": 26670 + }, + { + "epoch": 0.03, + "learning_rate": 4.991293167880452e-05, + "loss": 1.091, + "step": 26671 + }, + { + "epoch": 0.03, + "learning_rate": 4.991292512885305e-05, + "loss": 0.9267, + "step": 26672 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912918578655644e-05, + "loss": 1.0513, + "step": 26673 + }, + { + "epoch": 0.03, + "learning_rate": 4.991291202821231e-05, + "loss": 0.8995, + "step": 26674 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912905477523045e-05, + "loss": 0.9566, + "step": 26675 + }, + { + "epoch": 0.03, + "learning_rate": 4.991289892658785e-05, + "loss": 0.8715, + "step": 26676 + }, + { + "epoch": 0.03, + "learning_rate": 4.991289237540672e-05, + "loss": 0.9222, + "step": 26677 + }, + { + "epoch": 0.03, + "learning_rate": 4.991288582397967e-05, + "loss": 0.9443, + "step": 26678 + }, + { + "epoch": 0.03, + "learning_rate": 4.991287927230669e-05, + "loss": 1.0669, + "step": 26679 + }, + { + "epoch": 0.03, + "learning_rate": 4.991287272038777e-05, + "loss": 1.1623, + "step": 26680 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912866168222934e-05, + "loss": 1.1085, + "step": 26681 + }, + { + "epoch": 0.03, + "learning_rate": 4.991285961581216e-05, + "loss": 0.6031, + "step": 26682 + }, + { + "epoch": 0.03, + "learning_rate": 4.991285306315546e-05, + "loss": 0.8515, + "step": 26683 + }, + { + "epoch": 0.03, + "learning_rate": 4.991284651025283e-05, + "loss": 1.0193, + "step": 26684 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912839957104277e-05, + "loss": 1.5152, + "step": 26685 + }, + { + "epoch": 0.03, + "learning_rate": 4.991283340370978e-05, + "loss": 1.2933, + "step": 26686 + }, + { + "epoch": 0.03, + "learning_rate": 4.991282685006937e-05, + "loss": 0.8884, + "step": 26687 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912820296183016e-05, + "loss": 0.9091, + "step": 26688 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912813742050745e-05, + "loss": 0.7794, + "step": 26689 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912807187672537e-05, + "loss": 1.0308, + "step": 26690 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912800633048404e-05, + "loss": 1.0187, + "step": 26691 + }, + { + "epoch": 0.03, + "learning_rate": 4.991279407817835e-05, + "loss": 0.8742, + "step": 26692 + }, + { + "epoch": 0.03, + "learning_rate": 4.991278752306236e-05, + "loss": 0.8315, + "step": 26693 + }, + { + "epoch": 0.03, + "learning_rate": 4.991278096770044e-05, + "loss": 1.3056, + "step": 26694 + }, + { + "epoch": 0.03, + "learning_rate": 4.99127744120926e-05, + "loss": 0.8068, + "step": 26695 + }, + { + "epoch": 0.03, + "learning_rate": 4.991276785623882e-05, + "loss": 0.7784, + "step": 26696 + }, + { + "epoch": 0.03, + "learning_rate": 4.991276130013912e-05, + "loss": 0.6242, + "step": 26697 + }, + { + "epoch": 0.03, + "learning_rate": 4.991275474379349e-05, + "loss": 0.9305, + "step": 26698 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912748187201924e-05, + "loss": 1.1838, + "step": 26699 + }, + { + "epoch": 0.03, + "learning_rate": 4.991274163036443e-05, + "loss": 1.0683, + "step": 26700 + }, + { + "epoch": 0.03, + "learning_rate": 4.991273507328102e-05, + "loss": 0.8339, + "step": 26701 + }, + { + "epoch": 0.03, + "learning_rate": 4.991272851595168e-05, + "loss": 0.9989, + "step": 26702 + }, + { + "epoch": 0.03, + "learning_rate": 4.991272195837641e-05, + "loss": 1.0248, + "step": 26703 + }, + { + "epoch": 0.03, + "learning_rate": 4.99127154005552e-05, + "loss": 0.9769, + "step": 26704 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912708842488076e-05, + "loss": 0.51, + "step": 26705 + }, + { + "epoch": 0.03, + "learning_rate": 4.991270228417502e-05, + "loss": 0.7487, + "step": 26706 + }, + { + "epoch": 0.03, + "learning_rate": 4.991269572561604e-05, + "loss": 1.1073, + "step": 26707 + }, + { + "epoch": 0.03, + "learning_rate": 4.991268916681113e-05, + "loss": 0.9207, + "step": 26708 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912682607760287e-05, + "loss": 0.6842, + "step": 26709 + }, + { + "epoch": 0.03, + "learning_rate": 4.991267604846353e-05, + "loss": 1.3604, + "step": 26710 + }, + { + "epoch": 0.03, + "learning_rate": 4.991266948892083e-05, + "loss": 0.9541, + "step": 26711 + }, + { + "epoch": 0.03, + "learning_rate": 4.991266292913221e-05, + "loss": 0.9235, + "step": 26712 + }, + { + "epoch": 0.03, + "learning_rate": 4.991265636909766e-05, + "loss": 0.808, + "step": 26713 + }, + { + "epoch": 0.03, + "learning_rate": 4.991264980881719e-05, + "loss": 0.8906, + "step": 26714 + }, + { + "epoch": 0.03, + "learning_rate": 4.991264324829078e-05, + "loss": 0.9615, + "step": 26715 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912636687518455e-05, + "loss": 0.9409, + "step": 26716 + }, + { + "epoch": 0.03, + "learning_rate": 4.991263012650019e-05, + "loss": 0.8495, + "step": 26717 + }, + { + "epoch": 0.03, + "learning_rate": 4.991262356523601e-05, + "loss": 1.1694, + "step": 26718 + }, + { + "epoch": 0.03, + "learning_rate": 4.99126170037259e-05, + "loss": 0.6641, + "step": 26719 + }, + { + "epoch": 0.03, + "learning_rate": 4.991261044196987e-05, + "loss": 0.8522, + "step": 26720 + }, + { + "epoch": 0.03, + "learning_rate": 4.99126038799679e-05, + "loss": 1.358, + "step": 26721 + }, + { + "epoch": 0.03, + "learning_rate": 4.991259731772001e-05, + "loss": 1.1143, + "step": 26722 + }, + { + "epoch": 0.03, + "learning_rate": 4.991259075522619e-05, + "loss": 1.0975, + "step": 26723 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912584192486445e-05, + "loss": 1.1385, + "step": 26724 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912577629500776e-05, + "loss": 1.267, + "step": 26725 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912571066269176e-05, + "loss": 1.2471, + "step": 26726 + }, + { + "epoch": 0.03, + "learning_rate": 4.991256450279166e-05, + "loss": 0.7951, + "step": 26727 + }, + { + "epoch": 0.03, + "learning_rate": 4.99125579390682e-05, + "loss": 0.8791, + "step": 26728 + }, + { + "epoch": 0.03, + "learning_rate": 4.991255137509882e-05, + "loss": 0.9081, + "step": 26729 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912544810883526e-05, + "loss": 0.9096, + "step": 26730 + }, + { + "epoch": 0.03, + "learning_rate": 4.99125382464223e-05, + "loss": 1.2326, + "step": 26731 + }, + { + "epoch": 0.03, + "learning_rate": 4.991253168171515e-05, + "loss": 1.0025, + "step": 26732 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912525116762064e-05, + "loss": 1.1107, + "step": 26733 + }, + { + "epoch": 0.03, + "learning_rate": 4.991251855156306e-05, + "loss": 0.9335, + "step": 26734 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912511986118126e-05, + "loss": 0.9886, + "step": 26735 + }, + { + "epoch": 0.03, + "learning_rate": 4.991250542042727e-05, + "loss": 0.8375, + "step": 26736 + }, + { + "epoch": 0.03, + "learning_rate": 4.991249885449048e-05, + "loss": 0.9776, + "step": 26737 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912492288307774e-05, + "loss": 1.0044, + "step": 26738 + }, + { + "epoch": 0.03, + "learning_rate": 4.991248572187913e-05, + "loss": 1.0527, + "step": 26739 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912479155204574e-05, + "loss": 1.5724, + "step": 26740 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912472588284084e-05, + "loss": 1.2569, + "step": 26741 + }, + { + "epoch": 0.03, + "learning_rate": 4.991246602111768e-05, + "loss": 1.2253, + "step": 26742 + }, + { + "epoch": 0.03, + "learning_rate": 4.991245945370534e-05, + "loss": 1.0364, + "step": 26743 + }, + { + "epoch": 0.03, + "learning_rate": 4.991245288604708e-05, + "loss": 1.1396, + "step": 26744 + }, + { + "epoch": 0.03, + "learning_rate": 4.991244631814289e-05, + "loss": 0.9474, + "step": 26745 + }, + { + "epoch": 0.03, + "learning_rate": 4.991243974999278e-05, + "loss": 1.3108, + "step": 26746 + }, + { + "epoch": 0.03, + "learning_rate": 4.991243318159674e-05, + "loss": 1.1646, + "step": 26747 + }, + { + "epoch": 0.03, + "learning_rate": 4.991242661295478e-05, + "loss": 0.8901, + "step": 26748 + }, + { + "epoch": 0.03, + "learning_rate": 4.991242004406689e-05, + "loss": 0.9603, + "step": 26749 + }, + { + "epoch": 0.03, + "learning_rate": 4.991241347493307e-05, + "loss": 1.125, + "step": 26750 + }, + { + "epoch": 0.03, + "learning_rate": 4.991240690555333e-05, + "loss": 0.8842, + "step": 26751 + }, + { + "epoch": 0.03, + "learning_rate": 4.991240033592767e-05, + "loss": 0.2774, + "step": 26752 + }, + { + "epoch": 0.03, + "learning_rate": 4.991239376605609e-05, + "loss": 0.6267, + "step": 26753 + }, + { + "epoch": 0.03, + "learning_rate": 4.991238719593857e-05, + "loss": 0.9615, + "step": 26754 + }, + { + "epoch": 0.03, + "learning_rate": 4.991238062557514e-05, + "loss": 0.7919, + "step": 26755 + }, + { + "epoch": 0.03, + "learning_rate": 4.991237405496577e-05, + "loss": 0.9506, + "step": 26756 + }, + { + "epoch": 0.03, + "learning_rate": 4.991236748411049e-05, + "loss": 1.3704, + "step": 26757 + }, + { + "epoch": 0.03, + "learning_rate": 4.991236091300928e-05, + "loss": 0.5962, + "step": 26758 + }, + { + "epoch": 0.03, + "learning_rate": 4.991235434166215e-05, + "loss": 1.0961, + "step": 26759 + }, + { + "epoch": 0.03, + "learning_rate": 4.991234777006909e-05, + "loss": 0.9104, + "step": 26760 + }, + { + "epoch": 0.03, + "learning_rate": 4.99123411982301e-05, + "loss": 0.9005, + "step": 26761 + }, + { + "epoch": 0.03, + "learning_rate": 4.99123346261452e-05, + "loss": 0.9287, + "step": 26762 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912328053814364e-05, + "loss": 1.0366, + "step": 26763 + }, + { + "epoch": 0.03, + "learning_rate": 4.991232148123761e-05, + "loss": 0.8067, + "step": 26764 + }, + { + "epoch": 0.03, + "learning_rate": 4.991231490841493e-05, + "loss": 0.8784, + "step": 26765 + }, + { + "epoch": 0.03, + "learning_rate": 4.991230833534632e-05, + "loss": 1.165, + "step": 26766 + }, + { + "epoch": 0.03, + "learning_rate": 4.99123017620318e-05, + "loss": 1.1018, + "step": 26767 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912295188471356e-05, + "loss": 1.1175, + "step": 26768 + }, + { + "epoch": 0.03, + "learning_rate": 4.991228861466498e-05, + "loss": 1.15, + "step": 26769 + }, + { + "epoch": 0.03, + "learning_rate": 4.991228204061268e-05, + "loss": 1.0813, + "step": 26770 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912275466314465e-05, + "loss": 1.2602, + "step": 26771 + }, + { + "epoch": 0.03, + "learning_rate": 4.991226889177031e-05, + "loss": 0.9275, + "step": 26772 + }, + { + "epoch": 0.03, + "learning_rate": 4.991226231698024e-05, + "loss": 0.8742, + "step": 26773 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912255741944256e-05, + "loss": 0.7898, + "step": 26774 + }, + { + "epoch": 0.03, + "learning_rate": 4.991224916666234e-05, + "loss": 0.9416, + "step": 26775 + }, + { + "epoch": 0.03, + "learning_rate": 4.99122425911345e-05, + "loss": 1.0406, + "step": 26776 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912236015360744e-05, + "loss": 0.7723, + "step": 26777 + }, + { + "epoch": 0.03, + "learning_rate": 4.991222943934106e-05, + "loss": 1.1298, + "step": 26778 + }, + { + "epoch": 0.03, + "learning_rate": 4.991222286307545e-05, + "loss": 0.8745, + "step": 26779 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912216286563914e-05, + "loss": 0.7528, + "step": 26780 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912209709806465e-05, + "loss": 0.7908, + "step": 26781 + }, + { + "epoch": 0.03, + "learning_rate": 4.991220313280309e-05, + "loss": 1.2125, + "step": 26782 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912196555553794e-05, + "loss": 0.8367, + "step": 26783 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912189978058566e-05, + "loss": 0.6942, + "step": 26784 + }, + { + "epoch": 0.03, + "learning_rate": 4.991218340031743e-05, + "loss": 0.8158, + "step": 26785 + }, + { + "epoch": 0.03, + "learning_rate": 4.991217682233036e-05, + "loss": 0.8163, + "step": 26786 + }, + { + "epoch": 0.03, + "learning_rate": 4.991217024409737e-05, + "loss": 0.7397, + "step": 26787 + }, + { + "epoch": 0.03, + "learning_rate": 4.991216366561846e-05, + "loss": 1.2894, + "step": 26788 + }, + { + "epoch": 0.03, + "learning_rate": 4.991215708689362e-05, + "loss": 0.9577, + "step": 26789 + }, + { + "epoch": 0.03, + "learning_rate": 4.991215050792287e-05, + "loss": 1.1177, + "step": 26790 + }, + { + "epoch": 0.03, + "learning_rate": 4.991214392870619e-05, + "loss": 1.2259, + "step": 26791 + }, + { + "epoch": 0.03, + "learning_rate": 4.991213734924359e-05, + "loss": 0.9205, + "step": 26792 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912130769535065e-05, + "loss": 0.5936, + "step": 26793 + }, + { + "epoch": 0.03, + "learning_rate": 4.991212418958062e-05, + "loss": 0.3901, + "step": 26794 + }, + { + "epoch": 0.03, + "learning_rate": 4.991211760938025e-05, + "loss": 1.0342, + "step": 26795 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912111028933964e-05, + "loss": 1.1426, + "step": 26796 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912104448241755e-05, + "loss": 1.0071, + "step": 26797 + }, + { + "epoch": 0.03, + "learning_rate": 4.991209786730362e-05, + "loss": 0.8949, + "step": 26798 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912091286119566e-05, + "loss": 0.6756, + "step": 26799 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912084704689585e-05, + "loss": 1.0531, + "step": 26800 + }, + { + "epoch": 0.03, + "learning_rate": 4.991207812301369e-05, + "loss": 1.006, + "step": 26801 + }, + { + "epoch": 0.03, + "learning_rate": 4.991207154109188e-05, + "loss": 0.8581, + "step": 26802 + }, + { + "epoch": 0.03, + "learning_rate": 4.991206495892413e-05, + "loss": 0.9681, + "step": 26803 + }, + { + "epoch": 0.03, + "learning_rate": 4.991205837651047e-05, + "loss": 1.0809, + "step": 26804 + }, + { + "epoch": 0.03, + "learning_rate": 4.991205179385089e-05, + "loss": 1.1244, + "step": 26805 + }, + { + "epoch": 0.03, + "learning_rate": 4.991204521094538e-05, + "loss": 0.9817, + "step": 26806 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912038627793956e-05, + "loss": 1.3168, + "step": 26807 + }, + { + "epoch": 0.03, + "learning_rate": 4.991203204439661e-05, + "loss": 1.3964, + "step": 26808 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912025460753344e-05, + "loss": 1.0439, + "step": 26809 + }, + { + "epoch": 0.03, + "learning_rate": 4.9912018876864155e-05, + "loss": 1.1306, + "step": 26810 + }, + { + "epoch": 0.03, + "learning_rate": 4.991201229272904e-05, + "loss": 1.3033, + "step": 26811 + }, + { + "epoch": 0.03, + "learning_rate": 4.991200570834801e-05, + "loss": 1.3888, + "step": 26812 + }, + { + "epoch": 0.03, + "learning_rate": 4.991199912372106e-05, + "loss": 1.2217, + "step": 26813 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911992538848186e-05, + "loss": 0.8826, + "step": 26814 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911985953729396e-05, + "loss": 0.6732, + "step": 26815 + }, + { + "epoch": 0.03, + "learning_rate": 4.991197936836468e-05, + "loss": 0.93, + "step": 26816 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911972782754045e-05, + "loss": 0.9521, + "step": 26817 + }, + { + "epoch": 0.03, + "learning_rate": 4.991196619689749e-05, + "loss": 0.9679, + "step": 26818 + }, + { + "epoch": 0.03, + "learning_rate": 4.991195961079502e-05, + "loss": 0.8592, + "step": 26819 + }, + { + "epoch": 0.03, + "learning_rate": 4.991195302444662e-05, + "loss": 1.4616, + "step": 26820 + }, + { + "epoch": 0.03, + "learning_rate": 4.991194643785231e-05, + "loss": 1.1019, + "step": 26821 + }, + { + "epoch": 0.03, + "learning_rate": 4.991193985101207e-05, + "loss": 1.008, + "step": 26822 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911933263925914e-05, + "loss": 1.0432, + "step": 26823 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911926676593835e-05, + "loss": 1.125, + "step": 26824 + }, + { + "epoch": 0.03, + "learning_rate": 4.991192008901584e-05, + "loss": 0.9843, + "step": 26825 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911913501191924e-05, + "loss": 0.9426, + "step": 26826 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911906913122085e-05, + "loss": 0.9161, + "step": 26827 + }, + { + "epoch": 0.03, + "learning_rate": 4.991190032480633e-05, + "loss": 1.0644, + "step": 26828 + }, + { + "epoch": 0.03, + "learning_rate": 4.991189373624465e-05, + "loss": 1.4092, + "step": 26829 + }, + { + "epoch": 0.03, + "learning_rate": 4.991188714743706e-05, + "loss": 1.0138, + "step": 26830 + }, + { + "epoch": 0.03, + "learning_rate": 4.991188055838354e-05, + "loss": 1.1918, + "step": 26831 + }, + { + "epoch": 0.03, + "learning_rate": 4.991187396908411e-05, + "loss": 1.2013, + "step": 26832 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911867379538755e-05, + "loss": 1.0369, + "step": 26833 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911860789747475e-05, + "loss": 1.2653, + "step": 26834 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911854199710284e-05, + "loss": 1.2287, + "step": 26835 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911847609427175e-05, + "loss": 0.9346, + "step": 26836 + }, + { + "epoch": 0.03, + "learning_rate": 4.991184101889814e-05, + "loss": 0.8314, + "step": 26837 + }, + { + "epoch": 0.03, + "learning_rate": 4.991183442812319e-05, + "loss": 0.8721, + "step": 26838 + }, + { + "epoch": 0.03, + "learning_rate": 4.991182783710232e-05, + "loss": 1.0569, + "step": 26839 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911821245835534e-05, + "loss": 1.2035, + "step": 26840 + }, + { + "epoch": 0.03, + "learning_rate": 4.991181465432282e-05, + "loss": 1.5467, + "step": 26841 + }, + { + "epoch": 0.03, + "learning_rate": 4.991180806256419e-05, + "loss": 1.0387, + "step": 26842 + }, + { + "epoch": 0.03, + "learning_rate": 4.991180147055965e-05, + "loss": 0.8754, + "step": 26843 + }, + { + "epoch": 0.03, + "learning_rate": 4.991179487830918e-05, + "loss": 0.6551, + "step": 26844 + }, + { + "epoch": 0.03, + "learning_rate": 4.99117882858128e-05, + "loss": 0.9056, + "step": 26845 + }, + { + "epoch": 0.03, + "learning_rate": 4.99117816930705e-05, + "loss": 1.2566, + "step": 26846 + }, + { + "epoch": 0.03, + "learning_rate": 4.991177510008227e-05, + "loss": 1.1585, + "step": 26847 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911768506848135e-05, + "loss": 1.1097, + "step": 26848 + }, + { + "epoch": 0.03, + "learning_rate": 4.991176191336808e-05, + "loss": 0.9204, + "step": 26849 + }, + { + "epoch": 0.03, + "learning_rate": 4.991175531964211e-05, + "loss": 0.854, + "step": 26850 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911748725670205e-05, + "loss": 1.2184, + "step": 26851 + }, + { + "epoch": 0.03, + "learning_rate": 4.991174213145239e-05, + "loss": 1.29, + "step": 26852 + }, + { + "epoch": 0.03, + "learning_rate": 4.991173553698866e-05, + "loss": 1.1112, + "step": 26853 + }, + { + "epoch": 0.03, + "learning_rate": 4.991172894227901e-05, + "loss": 1.1708, + "step": 26854 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911722347323446e-05, + "loss": 1.2395, + "step": 26855 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911715752121956e-05, + "loss": 1.1501, + "step": 26856 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911709156674556e-05, + "loss": 0.8496, + "step": 26857 + }, + { + "epoch": 0.03, + "learning_rate": 4.991170256098123e-05, + "loss": 0.9385, + "step": 26858 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911695965041996e-05, + "loss": 1.0147, + "step": 26859 + }, + { + "epoch": 0.03, + "learning_rate": 4.991168936885684e-05, + "loss": 1.1267, + "step": 26860 + }, + { + "epoch": 0.03, + "learning_rate": 4.991168277242576e-05, + "loss": 1.0359, + "step": 26861 + }, + { + "epoch": 0.03, + "learning_rate": 4.991167617574877e-05, + "loss": 0.9257, + "step": 26862 + }, + { + "epoch": 0.03, + "learning_rate": 4.991166957882586e-05, + "loss": 0.8573, + "step": 26863 + }, + { + "epoch": 0.03, + "learning_rate": 4.991166298165704e-05, + "loss": 0.9707, + "step": 26864 + }, + { + "epoch": 0.03, + "learning_rate": 4.991165638424229e-05, + "loss": 1.1, + "step": 26865 + }, + { + "epoch": 0.03, + "learning_rate": 4.991164978658163e-05, + "loss": 0.95, + "step": 26866 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911643188675053e-05, + "loss": 0.5659, + "step": 26867 + }, + { + "epoch": 0.03, + "learning_rate": 4.991163659052255e-05, + "loss": 1.2734, + "step": 26868 + }, + { + "epoch": 0.03, + "learning_rate": 4.991162999212414e-05, + "loss": 0.8616, + "step": 26869 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911623393479806e-05, + "loss": 1.0021, + "step": 26870 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911616794589555e-05, + "loss": 0.8867, + "step": 26871 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911610195453394e-05, + "loss": 1.1057, + "step": 26872 + }, + { + "epoch": 0.03, + "learning_rate": 4.991160359607131e-05, + "loss": 0.9041, + "step": 26873 + }, + { + "epoch": 0.03, + "learning_rate": 4.991159699644331e-05, + "loss": 1.2288, + "step": 26874 + }, + { + "epoch": 0.03, + "learning_rate": 4.99115903965694e-05, + "loss": 0.9301, + "step": 26875 + }, + { + "epoch": 0.03, + "learning_rate": 4.991158379644956e-05, + "loss": 1.2943, + "step": 26876 + }, + { + "epoch": 0.03, + "learning_rate": 4.991157719608381e-05, + "loss": 1.1801, + "step": 26877 + }, + { + "epoch": 0.03, + "learning_rate": 4.991157059547215e-05, + "loss": 0.8251, + "step": 26878 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911563994614564e-05, + "loss": 0.9208, + "step": 26879 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911557393511064e-05, + "loss": 1.1103, + "step": 26880 + }, + { + "epoch": 0.03, + "learning_rate": 4.991155079216165e-05, + "loss": 0.972, + "step": 26881 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911544190566325e-05, + "loss": 0.9369, + "step": 26882 + }, + { + "epoch": 0.03, + "learning_rate": 4.991153758872508e-05, + "loss": 0.7588, + "step": 26883 + }, + { + "epoch": 0.03, + "learning_rate": 4.991153098663791e-05, + "loss": 0.8079, + "step": 26884 + }, + { + "epoch": 0.03, + "learning_rate": 4.991152438430483e-05, + "loss": 0.98, + "step": 26885 + }, + { + "epoch": 0.03, + "learning_rate": 4.991151778172584e-05, + "loss": 0.7687, + "step": 26886 + }, + { + "epoch": 0.03, + "learning_rate": 4.991151117890092e-05, + "loss": 0.7565, + "step": 26887 + }, + { + "epoch": 0.03, + "learning_rate": 4.991150457583009e-05, + "loss": 1.194, + "step": 26888 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911497972513346e-05, + "loss": 0.8899, + "step": 26889 + }, + { + "epoch": 0.03, + "learning_rate": 4.991149136895069e-05, + "loss": 1.291, + "step": 26890 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911484765142115e-05, + "loss": 0.7324, + "step": 26891 + }, + { + "epoch": 0.03, + "learning_rate": 4.991147816108762e-05, + "loss": 0.883, + "step": 26892 + }, + { + "epoch": 0.03, + "learning_rate": 4.991147155678721e-05, + "loss": 1.1096, + "step": 26893 + }, + { + "epoch": 0.03, + "learning_rate": 4.99114649522409e-05, + "loss": 0.7795, + "step": 26894 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911458347448656e-05, + "loss": 0.8717, + "step": 26895 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911451742410505e-05, + "loss": 0.9188, + "step": 26896 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911445137126436e-05, + "loss": 1.046, + "step": 26897 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911438531596457e-05, + "loss": 0.8801, + "step": 26898 + }, + { + "epoch": 0.03, + "learning_rate": 4.991143192582055e-05, + "loss": 0.9049, + "step": 26899 + }, + { + "epoch": 0.03, + "learning_rate": 4.991142531979874e-05, + "loss": 1.2029, + "step": 26900 + }, + { + "epoch": 0.03, + "learning_rate": 4.991141871353101e-05, + "loss": 1.2144, + "step": 26901 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911412107017365e-05, + "loss": 1.3744, + "step": 26902 + }, + { + "epoch": 0.03, + "learning_rate": 4.99114055002578e-05, + "loss": 1.3262, + "step": 26903 + }, + { + "epoch": 0.03, + "learning_rate": 4.991139889325233e-05, + "loss": 1.5156, + "step": 26904 + }, + { + "epoch": 0.03, + "learning_rate": 4.991139228600094e-05, + "loss": 1.0484, + "step": 26905 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911385678503644e-05, + "loss": 1.0517, + "step": 26906 + }, + { + "epoch": 0.03, + "learning_rate": 4.991137907076042e-05, + "loss": 0.8222, + "step": 26907 + }, + { + "epoch": 0.03, + "learning_rate": 4.991137246277129e-05, + "loss": 1.0472, + "step": 26908 + }, + { + "epoch": 0.03, + "learning_rate": 4.991136585453624e-05, + "loss": 1.04, + "step": 26909 + }, + { + "epoch": 0.03, + "learning_rate": 4.991135924605528e-05, + "loss": 1.0255, + "step": 26910 + }, + { + "epoch": 0.03, + "learning_rate": 4.99113526373284e-05, + "loss": 0.9476, + "step": 26911 + }, + { + "epoch": 0.03, + "learning_rate": 4.991134602835561e-05, + "loss": 0.5468, + "step": 26912 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911339419136904e-05, + "loss": 0.8107, + "step": 26913 + }, + { + "epoch": 0.03, + "learning_rate": 4.991133280967229e-05, + "loss": 0.9159, + "step": 26914 + }, + { + "epoch": 0.03, + "learning_rate": 4.991132619996175e-05, + "loss": 1.296, + "step": 26915 + }, + { + "epoch": 0.03, + "learning_rate": 4.99113195900053e-05, + "loss": 0.9748, + "step": 26916 + }, + { + "epoch": 0.03, + "learning_rate": 4.991131297980294e-05, + "loss": 1.0405, + "step": 26917 + }, + { + "epoch": 0.03, + "learning_rate": 4.991130636935466e-05, + "loss": 1.2153, + "step": 26918 + }, + { + "epoch": 0.03, + "learning_rate": 4.991129975866048e-05, + "loss": 1.0682, + "step": 26919 + }, + { + "epoch": 0.03, + "learning_rate": 4.991129314772037e-05, + "loss": 0.6975, + "step": 26920 + }, + { + "epoch": 0.03, + "learning_rate": 4.991128653653435e-05, + "loss": 0.7133, + "step": 26921 + }, + { + "epoch": 0.03, + "learning_rate": 4.991127992510242e-05, + "loss": 0.8922, + "step": 26922 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911273313424576e-05, + "loss": 0.9832, + "step": 26923 + }, + { + "epoch": 0.03, + "learning_rate": 4.991126670150081e-05, + "loss": 0.8909, + "step": 26924 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911260089331146e-05, + "loss": 0.8099, + "step": 26925 + }, + { + "epoch": 0.03, + "learning_rate": 4.991125347691556e-05, + "loss": 0.4222, + "step": 26926 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911246864254066e-05, + "loss": 1.0042, + "step": 26927 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911240251346646e-05, + "loss": 0.7435, + "step": 26928 + }, + { + "epoch": 0.03, + "learning_rate": 4.991123363819332e-05, + "loss": 1.5602, + "step": 26929 + }, + { + "epoch": 0.03, + "learning_rate": 4.991122702479408e-05, + "loss": 0.908, + "step": 26930 + }, + { + "epoch": 0.03, + "learning_rate": 4.991122041114894e-05, + "loss": 1.1267, + "step": 26931 + }, + { + "epoch": 0.03, + "learning_rate": 4.991121379725787e-05, + "loss": 1.353, + "step": 26932 + }, + { + "epoch": 0.03, + "learning_rate": 4.991120718312089e-05, + "loss": 1.1698, + "step": 26933 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911200568738007e-05, + "loss": 1.0144, + "step": 26934 + }, + { + "epoch": 0.03, + "learning_rate": 4.99111939541092e-05, + "loss": 1.0006, + "step": 26935 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911187339234475e-05, + "loss": 1.057, + "step": 26936 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911180724113846e-05, + "loss": 1.0498, + "step": 26937 + }, + { + "epoch": 0.03, + "learning_rate": 4.991117410874731e-05, + "loss": 0.7731, + "step": 26938 + }, + { + "epoch": 0.03, + "learning_rate": 4.991116749313486e-05, + "loss": 1.0751, + "step": 26939 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911160877276484e-05, + "loss": 1.0736, + "step": 26940 + }, + { + "epoch": 0.03, + "learning_rate": 4.991115426117221e-05, + "loss": 0.9844, + "step": 26941 + }, + { + "epoch": 0.03, + "learning_rate": 4.991114764482202e-05, + "loss": 0.8554, + "step": 26942 + }, + { + "epoch": 0.03, + "learning_rate": 4.991114102822591e-05, + "loss": 0.9061, + "step": 26943 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911134411383896e-05, + "loss": 0.4937, + "step": 26944 + }, + { + "epoch": 0.03, + "learning_rate": 4.991112779429596e-05, + "loss": 0.5691, + "step": 26945 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911121176962125e-05, + "loss": 0.1923, + "step": 26946 + }, + { + "epoch": 0.03, + "learning_rate": 4.991111455938237e-05, + "loss": 0.138, + "step": 26947 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911107941556704e-05, + "loss": 0.1559, + "step": 26948 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911101323485135e-05, + "loss": 0.1733, + "step": 26949 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911094705167635e-05, + "loss": 0.4034, + "step": 26950 + }, + { + "epoch": 0.03, + "learning_rate": 4.991108808660424e-05, + "loss": 1.093, + "step": 26951 + }, + { + "epoch": 0.03, + "learning_rate": 4.991108146779493e-05, + "loss": 1.2736, + "step": 26952 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911074848739705e-05, + "loss": 1.1731, + "step": 26953 + }, + { + "epoch": 0.03, + "learning_rate": 4.991106822943857e-05, + "loss": 0.9419, + "step": 26954 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911061609891515e-05, + "loss": 0.4126, + "step": 26955 + }, + { + "epoch": 0.03, + "learning_rate": 4.991105499009856e-05, + "loss": 0.736, + "step": 26956 + }, + { + "epoch": 0.03, + "learning_rate": 4.991104837005969e-05, + "loss": 1.1068, + "step": 26957 + }, + { + "epoch": 0.03, + "learning_rate": 4.991104174977491e-05, + "loss": 0.9218, + "step": 26958 + }, + { + "epoch": 0.03, + "learning_rate": 4.991103512924422e-05, + "loss": 1.0234, + "step": 26959 + }, + { + "epoch": 0.03, + "learning_rate": 4.991102850846761e-05, + "loss": 0.6468, + "step": 26960 + }, + { + "epoch": 0.03, + "learning_rate": 4.99110218874451e-05, + "loss": 0.6965, + "step": 26961 + }, + { + "epoch": 0.03, + "learning_rate": 4.9911015266176666e-05, + "loss": 0.748, + "step": 26962 + }, + { + "epoch": 0.03, + "learning_rate": 4.991100864466233e-05, + "loss": 1.2039, + "step": 26963 + }, + { + "epoch": 0.03, + "learning_rate": 4.991100202290208e-05, + "loss": 1.1384, + "step": 26964 + }, + { + "epoch": 0.03, + "learning_rate": 4.991099540089592e-05, + "loss": 0.9328, + "step": 26965 + }, + { + "epoch": 0.03, + "learning_rate": 4.991098877864385e-05, + "loss": 0.7647, + "step": 26966 + }, + { + "epoch": 0.03, + "learning_rate": 4.991098215614587e-05, + "loss": 0.8638, + "step": 26967 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910975533401985e-05, + "loss": 0.5231, + "step": 26968 + }, + { + "epoch": 0.03, + "learning_rate": 4.991096891041218e-05, + "loss": 0.4703, + "step": 26969 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910962287176465e-05, + "loss": 0.4046, + "step": 26970 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910955663694835e-05, + "loss": 0.482, + "step": 26971 + }, + { + "epoch": 0.03, + "learning_rate": 4.99109490399673e-05, + "loss": 0.6608, + "step": 26972 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910942415993864e-05, + "loss": 0.9169, + "step": 26973 + }, + { + "epoch": 0.03, + "learning_rate": 4.99109357917745e-05, + "loss": 1.1318, + "step": 26974 + }, + { + "epoch": 0.03, + "learning_rate": 4.991092916730924e-05, + "loss": 0.9549, + "step": 26975 + }, + { + "epoch": 0.03, + "learning_rate": 4.991092254259807e-05, + "loss": 0.9387, + "step": 26976 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910915917640975e-05, + "loss": 0.7964, + "step": 26977 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910909292437984e-05, + "loss": 0.8403, + "step": 26978 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910902666989076e-05, + "loss": 1.0187, + "step": 26979 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910896041294265e-05, + "loss": 0.9564, + "step": 26980 + }, + { + "epoch": 0.03, + "learning_rate": 4.991088941535354e-05, + "loss": 1.2211, + "step": 26981 + }, + { + "epoch": 0.03, + "learning_rate": 4.99108827891669e-05, + "loss": 1.2985, + "step": 26982 + }, + { + "epoch": 0.03, + "learning_rate": 4.991087616273436e-05, + "loss": 1.3234, + "step": 26983 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910869536055906e-05, + "loss": 1.011, + "step": 26984 + }, + { + "epoch": 0.03, + "learning_rate": 4.991086290913154e-05, + "loss": 0.9546, + "step": 26985 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910856281961266e-05, + "loss": 1.0895, + "step": 26986 + }, + { + "epoch": 0.03, + "learning_rate": 4.991084965454508e-05, + "loss": 1.0329, + "step": 26987 + }, + { + "epoch": 0.03, + "learning_rate": 4.991084302688299e-05, + "loss": 1.0303, + "step": 26988 + }, + { + "epoch": 0.03, + "learning_rate": 4.991083639897499e-05, + "loss": 1.1332, + "step": 26989 + }, + { + "epoch": 0.03, + "learning_rate": 4.991082977082108e-05, + "loss": 0.9216, + "step": 26990 + }, + { + "epoch": 0.03, + "learning_rate": 4.991082314242126e-05, + "loss": 0.9576, + "step": 26991 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910816513775526e-05, + "loss": 1.1569, + "step": 26992 + }, + { + "epoch": 0.03, + "learning_rate": 4.991080988488389e-05, + "loss": 0.9073, + "step": 26993 + }, + { + "epoch": 0.03, + "learning_rate": 4.991080325574634e-05, + "loss": 1.0934, + "step": 26994 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910796626362886e-05, + "loss": 0.7477, + "step": 26995 + }, + { + "epoch": 0.03, + "learning_rate": 4.991078999673352e-05, + "loss": 0.4916, + "step": 26996 + }, + { + "epoch": 0.03, + "learning_rate": 4.991078336685825e-05, + "loss": 0.504, + "step": 26997 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910776736737065e-05, + "loss": 0.4001, + "step": 26998 + }, + { + "epoch": 0.03, + "learning_rate": 4.991077010636997e-05, + "loss": 0.8864, + "step": 26999 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910763475756975e-05, + "loss": 0.5926, + "step": 27000 + }, + { + "epoch": 0.03, + "eval_loss": 1.06881582736969, + "eval_runtime": 126.3852, + "eval_samples_per_second": 10.959, + "eval_steps_per_second": 5.483, + "step": 27000 + }, + { + "epoch": 0.03, + "learning_rate": 4.991075684489806e-05, + "loss": 0.6985, + "step": 27001 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910750213793243e-05, + "loss": 0.5751, + "step": 27002 + }, + { + "epoch": 0.03, + "learning_rate": 4.991074358244252e-05, + "loss": 0.652, + "step": 27003 + }, + { + "epoch": 0.03, + "learning_rate": 4.991073695084588e-05, + "loss": 0.6291, + "step": 27004 + }, + { + "epoch": 0.03, + "learning_rate": 4.991073031900334e-05, + "loss": 0.4679, + "step": 27005 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910723686914887e-05, + "loss": 0.6497, + "step": 27006 + }, + { + "epoch": 0.03, + "learning_rate": 4.991071705458053e-05, + "loss": 0.4919, + "step": 27007 + }, + { + "epoch": 0.03, + "learning_rate": 4.991071042200026e-05, + "loss": 0.466, + "step": 27008 + }, + { + "epoch": 0.03, + "learning_rate": 4.991070378917409e-05, + "loss": 0.4591, + "step": 27009 + }, + { + "epoch": 0.03, + "learning_rate": 4.991069715610201e-05, + "loss": 0.4455, + "step": 27010 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910690522784014e-05, + "loss": 0.4484, + "step": 27011 + }, + { + "epoch": 0.03, + "learning_rate": 4.991068388922012e-05, + "loss": 0.4287, + "step": 27012 + }, + { + "epoch": 0.03, + "learning_rate": 4.991067725541031e-05, + "loss": 0.4423, + "step": 27013 + }, + { + "epoch": 0.03, + "learning_rate": 4.991067062135459e-05, + "loss": 0.4063, + "step": 27014 + }, + { + "epoch": 0.03, + "learning_rate": 4.991066398705298e-05, + "loss": 0.4004, + "step": 27015 + }, + { + "epoch": 0.03, + "learning_rate": 4.991065735250544e-05, + "loss": 0.4604, + "step": 27016 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910650717712e-05, + "loss": 0.4911, + "step": 27017 + }, + { + "epoch": 0.03, + "learning_rate": 4.991064408267266e-05, + "loss": 0.5221, + "step": 27018 + }, + { + "epoch": 0.03, + "learning_rate": 4.991063744738741e-05, + "loss": 0.4918, + "step": 27019 + }, + { + "epoch": 0.03, + "learning_rate": 4.991063081185625e-05, + "loss": 0.4898, + "step": 27020 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910624176079185e-05, + "loss": 0.4693, + "step": 27021 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910617540056216e-05, + "loss": 0.48, + "step": 27022 + }, + { + "epoch": 0.03, + "learning_rate": 4.991061090378733e-05, + "loss": 0.5877, + "step": 27023 + }, + { + "epoch": 0.03, + "learning_rate": 4.991060426727254e-05, + "loss": 0.4786, + "step": 27024 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910597630511845e-05, + "loss": 0.4195, + "step": 27025 + }, + { + "epoch": 0.03, + "learning_rate": 4.991059099350525e-05, + "loss": 0.3996, + "step": 27026 + }, + { + "epoch": 0.03, + "learning_rate": 4.991058435625274e-05, + "loss": 0.3994, + "step": 27027 + }, + { + "epoch": 0.03, + "learning_rate": 4.991057771875433e-05, + "loss": 0.4062, + "step": 27028 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910571081010004e-05, + "loss": 0.4113, + "step": 27029 + }, + { + "epoch": 0.03, + "learning_rate": 4.991056444301977e-05, + "loss": 0.4673, + "step": 27030 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910557804783634e-05, + "loss": 0.6835, + "step": 27031 + }, + { + "epoch": 0.03, + "learning_rate": 4.991055116630159e-05, + "loss": 0.5696, + "step": 27032 + }, + { + "epoch": 0.03, + "learning_rate": 4.991054452757364e-05, + "loss": 0.4912, + "step": 27033 + }, + { + "epoch": 0.03, + "learning_rate": 4.99105378885998e-05, + "loss": 0.5117, + "step": 27034 + }, + { + "epoch": 0.03, + "learning_rate": 4.991053124938003e-05, + "loss": 0.4814, + "step": 27035 + }, + { + "epoch": 0.03, + "learning_rate": 4.991052460991437e-05, + "loss": 0.5295, + "step": 27036 + }, + { + "epoch": 0.03, + "learning_rate": 4.991051797020279e-05, + "loss": 0.5073, + "step": 27037 + }, + { + "epoch": 0.03, + "learning_rate": 4.991051133024531e-05, + "loss": 0.5185, + "step": 27038 + }, + { + "epoch": 0.03, + "learning_rate": 4.991050469004192e-05, + "loss": 0.4338, + "step": 27039 + }, + { + "epoch": 0.03, + "learning_rate": 4.991049804959264e-05, + "loss": 0.4382, + "step": 27040 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910491408897435e-05, + "loss": 0.452, + "step": 27041 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910484767956335e-05, + "loss": 0.4877, + "step": 27042 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910478126769325e-05, + "loss": 0.4815, + "step": 27043 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910471485336404e-05, + "loss": 0.4188, + "step": 27044 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910464843657586e-05, + "loss": 0.4334, + "step": 27045 + }, + { + "epoch": 0.03, + "learning_rate": 4.991045820173286e-05, + "loss": 0.4734, + "step": 27046 + }, + { + "epoch": 0.03, + "learning_rate": 4.991045155956223e-05, + "loss": 0.473, + "step": 27047 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910444917145696e-05, + "loss": 0.445, + "step": 27048 + }, + { + "epoch": 0.03, + "learning_rate": 4.991043827448325e-05, + "loss": 0.4857, + "step": 27049 + }, + { + "epoch": 0.03, + "learning_rate": 4.99104316315749e-05, + "loss": 0.4434, + "step": 27050 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910424988420644e-05, + "loss": 0.4397, + "step": 27051 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910418345020485e-05, + "loss": 0.422, + "step": 27052 + }, + { + "epoch": 0.03, + "learning_rate": 4.991041170137443e-05, + "loss": 0.4954, + "step": 27053 + }, + { + "epoch": 0.03, + "learning_rate": 4.991040505748246e-05, + "loss": 0.5059, + "step": 27054 + }, + { + "epoch": 0.03, + "learning_rate": 4.991039841334458e-05, + "loss": 0.5205, + "step": 27055 + }, + { + "epoch": 0.03, + "learning_rate": 4.991039176896081e-05, + "loss": 0.4028, + "step": 27056 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910385124331116e-05, + "loss": 0.4021, + "step": 27057 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910378479455535e-05, + "loss": 0.4271, + "step": 27058 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910371834334036e-05, + "loss": 0.3997, + "step": 27059 + }, + { + "epoch": 0.03, + "learning_rate": 4.991036518896664e-05, + "loss": 0.4413, + "step": 27060 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910358543353334e-05, + "loss": 0.4646, + "step": 27061 + }, + { + "epoch": 0.03, + "learning_rate": 4.991035189749413e-05, + "loss": 0.4452, + "step": 27062 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910345251389016e-05, + "loss": 0.4356, + "step": 27063 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910338605038e-05, + "loss": 0.441, + "step": 27064 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910331958441084e-05, + "loss": 0.4046, + "step": 27065 + }, + { + "epoch": 0.03, + "learning_rate": 4.991032531159826e-05, + "loss": 0.4549, + "step": 27066 + }, + { + "epoch": 0.03, + "learning_rate": 4.991031866450953e-05, + "loss": 0.4641, + "step": 27067 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910312017174896e-05, + "loss": 0.5026, + "step": 27068 + }, + { + "epoch": 0.03, + "learning_rate": 4.991030536959436e-05, + "loss": 0.4674, + "step": 27069 + }, + { + "epoch": 0.03, + "learning_rate": 4.991029872176791e-05, + "loss": 0.5061, + "step": 27070 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910292073695567e-05, + "loss": 0.474, + "step": 27071 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910285425377325e-05, + "loss": 0.4524, + "step": 27072 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910278776813166e-05, + "loss": 0.4787, + "step": 27073 + }, + { + "epoch": 0.03, + "learning_rate": 4.991027212800311e-05, + "loss": 0.4907, + "step": 27074 + }, + { + "epoch": 0.03, + "learning_rate": 4.991026547894715e-05, + "loss": 0.4861, + "step": 27075 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910258829645286e-05, + "loss": 0.5151, + "step": 27076 + }, + { + "epoch": 0.03, + "learning_rate": 4.991025218009752e-05, + "loss": 0.4591, + "step": 27077 + }, + { + "epoch": 0.03, + "learning_rate": 4.991024553030385e-05, + "loss": 0.5556, + "step": 27078 + }, + { + "epoch": 0.03, + "learning_rate": 4.991023888026427e-05, + "loss": 0.5146, + "step": 27079 + }, + { + "epoch": 0.03, + "learning_rate": 4.991023222997879e-05, + "loss": 0.4401, + "step": 27080 + }, + { + "epoch": 0.03, + "learning_rate": 4.991022557944741e-05, + "loss": 0.4655, + "step": 27081 + }, + { + "epoch": 0.03, + "learning_rate": 4.991021892867013e-05, + "loss": 0.4523, + "step": 27082 + }, + { + "epoch": 0.03, + "learning_rate": 4.991021227764694e-05, + "loss": 0.4146, + "step": 27083 + }, + { + "epoch": 0.03, + "learning_rate": 4.991020562637785e-05, + "loss": 0.3843, + "step": 27084 + }, + { + "epoch": 0.03, + "learning_rate": 4.991019897486285e-05, + "loss": 0.4036, + "step": 27085 + }, + { + "epoch": 0.03, + "learning_rate": 4.991019232310196e-05, + "loss": 0.427, + "step": 27086 + }, + { + "epoch": 0.03, + "learning_rate": 4.991018567109516e-05, + "loss": 0.4155, + "step": 27087 + }, + { + "epoch": 0.03, + "learning_rate": 4.991017901884245e-05, + "loss": 0.4289, + "step": 27088 + }, + { + "epoch": 0.03, + "learning_rate": 4.991017236634385e-05, + "loss": 0.4104, + "step": 27089 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910165713599345e-05, + "loss": 0.46, + "step": 27090 + }, + { + "epoch": 0.03, + "learning_rate": 4.991015906060893e-05, + "loss": 0.4813, + "step": 27091 + }, + { + "epoch": 0.03, + "learning_rate": 4.991015240737261e-05, + "loss": 0.4047, + "step": 27092 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910145753890404e-05, + "loss": 0.4293, + "step": 27093 + }, + { + "epoch": 0.03, + "learning_rate": 4.991013910016228e-05, + "loss": 0.4493, + "step": 27094 + }, + { + "epoch": 0.03, + "learning_rate": 4.991013244618826e-05, + "loss": 0.4408, + "step": 27095 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910125791968335e-05, + "loss": 0.4357, + "step": 27096 + }, + { + "epoch": 0.03, + "learning_rate": 4.991011913750251e-05, + "loss": 0.464, + "step": 27097 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910112482790783e-05, + "loss": 0.3781, + "step": 27098 + }, + { + "epoch": 0.03, + "learning_rate": 4.991010582783315e-05, + "loss": 0.414, + "step": 27099 + }, + { + "epoch": 0.03, + "learning_rate": 4.991009917262962e-05, + "loss": 0.3884, + "step": 27100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910092517180184e-05, + "loss": 0.3994, + "step": 27101 + }, + { + "epoch": 0.03, + "learning_rate": 4.991008586148485e-05, + "loss": 0.4007, + "step": 27102 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910079205543615e-05, + "loss": 0.4584, + "step": 27103 + }, + { + "epoch": 0.03, + "learning_rate": 4.991007254935647e-05, + "loss": 0.5652, + "step": 27104 + }, + { + "epoch": 0.03, + "learning_rate": 4.991006589292343e-05, + "loss": 0.7679, + "step": 27105 + }, + { + "epoch": 0.03, + "learning_rate": 4.991005923624449e-05, + "loss": 0.7162, + "step": 27106 + }, + { + "epoch": 0.03, + "learning_rate": 4.991005257931964e-05, + "loss": 0.5744, + "step": 27107 + }, + { + "epoch": 0.03, + "learning_rate": 4.99100459221489e-05, + "loss": 0.4677, + "step": 27108 + }, + { + "epoch": 0.03, + "learning_rate": 4.991003926473225e-05, + "loss": 0.4657, + "step": 27109 + }, + { + "epoch": 0.03, + "learning_rate": 4.99100326070697e-05, + "loss": 0.47, + "step": 27110 + }, + { + "epoch": 0.03, + "learning_rate": 4.991002594916125e-05, + "loss": 0.5322, + "step": 27111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910019291006904e-05, + "loss": 0.8757, + "step": 27112 + }, + { + "epoch": 0.03, + "learning_rate": 4.991001263260665e-05, + "loss": 0.8789, + "step": 27113 + }, + { + "epoch": 0.03, + "learning_rate": 4.99100059739605e-05, + "loss": 1.5043, + "step": 27114 + }, + { + "epoch": 0.03, + "learning_rate": 4.990999931506843e-05, + "loss": 0.7531, + "step": 27115 + }, + { + "epoch": 0.03, + "learning_rate": 4.990999265593048e-05, + "loss": 0.2777, + "step": 27116 + }, + { + "epoch": 0.03, + "learning_rate": 4.990998599654663e-05, + "loss": 0.2668, + "step": 27117 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909979336916866e-05, + "loss": 0.1835, + "step": 27118 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909972677041205e-05, + "loss": 0.2602, + "step": 27119 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909966016919654e-05, + "loss": 1.0096, + "step": 27120 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909959356552186e-05, + "loss": 1.2605, + "step": 27121 + }, + { + "epoch": 0.03, + "learning_rate": 4.990995269593883e-05, + "loss": 1.3213, + "step": 27122 + }, + { + "epoch": 0.03, + "learning_rate": 4.990994603507957e-05, + "loss": 1.0937, + "step": 27123 + }, + { + "epoch": 0.03, + "learning_rate": 4.99099393739744e-05, + "loss": 1.0715, + "step": 27124 + }, + { + "epoch": 0.03, + "learning_rate": 4.990993271262334e-05, + "loss": 1.3422, + "step": 27125 + }, + { + "epoch": 0.03, + "learning_rate": 4.990992605102638e-05, + "loss": 1.1623, + "step": 27126 + }, + { + "epoch": 0.03, + "learning_rate": 4.990991938918351e-05, + "loss": 1.7131, + "step": 27127 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909912727094756e-05, + "loss": 1.3739, + "step": 27128 + }, + { + "epoch": 0.03, + "learning_rate": 4.990990606476009e-05, + "loss": 1.4769, + "step": 27129 + }, + { + "epoch": 0.03, + "learning_rate": 4.990989940217953e-05, + "loss": 1.6199, + "step": 27130 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909892739353057e-05, + "loss": 1.4038, + "step": 27131 + }, + { + "epoch": 0.03, + "learning_rate": 4.99098860762807e-05, + "loss": 0.9505, + "step": 27132 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909879412962433e-05, + "loss": 1.0532, + "step": 27133 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909872749398276e-05, + "loss": 1.1387, + "step": 27134 + }, + { + "epoch": 0.03, + "learning_rate": 4.990986608558821e-05, + "loss": 0.9297, + "step": 27135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909859421532244e-05, + "loss": 0.7847, + "step": 27136 + }, + { + "epoch": 0.03, + "learning_rate": 4.990985275723038e-05, + "loss": 0.8098, + "step": 27137 + }, + { + "epoch": 0.03, + "learning_rate": 4.990984609268262e-05, + "loss": 1.1527, + "step": 27138 + }, + { + "epoch": 0.03, + "learning_rate": 4.990983942788896e-05, + "loss": 1.0261, + "step": 27139 + }, + { + "epoch": 0.03, + "learning_rate": 4.99098327628494e-05, + "loss": 0.9621, + "step": 27140 + }, + { + "epoch": 0.03, + "learning_rate": 4.990982609756394e-05, + "loss": 0.8721, + "step": 27141 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909819432032577e-05, + "loss": 0.994, + "step": 27142 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909812766255326e-05, + "loss": 0.9971, + "step": 27143 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909806100232164e-05, + "loss": 1.0924, + "step": 27144 + }, + { + "epoch": 0.03, + "learning_rate": 4.990979943396311e-05, + "loss": 1.0448, + "step": 27145 + }, + { + "epoch": 0.03, + "learning_rate": 4.990979276744815e-05, + "loss": 1.0751, + "step": 27146 + }, + { + "epoch": 0.03, + "learning_rate": 4.99097861006873e-05, + "loss": 1.08, + "step": 27147 + }, + { + "epoch": 0.03, + "learning_rate": 4.990977943368054e-05, + "loss": 1.0588, + "step": 27148 + }, + { + "epoch": 0.03, + "learning_rate": 4.990977276642789e-05, + "loss": 0.6965, + "step": 27149 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909766098929337e-05, + "loss": 1.1504, + "step": 27150 + }, + { + "epoch": 0.03, + "learning_rate": 4.990975943118489e-05, + "loss": 1.1032, + "step": 27151 + }, + { + "epoch": 0.03, + "learning_rate": 4.990975276319454e-05, + "loss": 1.0929, + "step": 27152 + }, + { + "epoch": 0.03, + "learning_rate": 4.990974609495829e-05, + "loss": 0.518, + "step": 27153 + }, + { + "epoch": 0.03, + "learning_rate": 4.990973942647614e-05, + "loss": 0.7915, + "step": 27154 + }, + { + "epoch": 0.03, + "learning_rate": 4.99097327577481e-05, + "loss": 0.7656, + "step": 27155 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909726088774154e-05, + "loss": 1.322, + "step": 27156 + }, + { + "epoch": 0.03, + "learning_rate": 4.990971941955431e-05, + "loss": 0.9368, + "step": 27157 + }, + { + "epoch": 0.03, + "learning_rate": 4.990971275008858e-05, + "loss": 0.8154, + "step": 27158 + }, + { + "epoch": 0.03, + "learning_rate": 4.990970608037694e-05, + "loss": 1.1022, + "step": 27159 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909699410419404e-05, + "loss": 0.8749, + "step": 27160 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909692740215966e-05, + "loss": 0.7688, + "step": 27161 + }, + { + "epoch": 0.03, + "learning_rate": 4.990968606976664e-05, + "loss": 1.1618, + "step": 27162 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909679399071404e-05, + "loss": 0.9059, + "step": 27163 + }, + { + "epoch": 0.03, + "learning_rate": 4.990967272813028e-05, + "loss": 0.9161, + "step": 27164 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909666056943255e-05, + "loss": 1.1195, + "step": 27165 + }, + { + "epoch": 0.03, + "learning_rate": 4.990965938551033e-05, + "loss": 0.8816, + "step": 27166 + }, + { + "epoch": 0.03, + "learning_rate": 4.990965271383151e-05, + "loss": 0.7799, + "step": 27167 + }, + { + "epoch": 0.03, + "learning_rate": 4.990964604190679e-05, + "loss": 0.9019, + "step": 27168 + }, + { + "epoch": 0.03, + "learning_rate": 4.990963936973618e-05, + "loss": 1.2261, + "step": 27169 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909632697319665e-05, + "loss": 1.3599, + "step": 27170 + }, + { + "epoch": 0.03, + "learning_rate": 4.990962602465725e-05, + "loss": 0.9132, + "step": 27171 + }, + { + "epoch": 0.03, + "learning_rate": 4.990961935174895e-05, + "loss": 0.9332, + "step": 27172 + }, + { + "epoch": 0.03, + "learning_rate": 4.990961267859474e-05, + "loss": 1.069, + "step": 27173 + }, + { + "epoch": 0.03, + "learning_rate": 4.990960600519464e-05, + "loss": 0.4079, + "step": 27174 + }, + { + "epoch": 0.03, + "learning_rate": 4.990959933154864e-05, + "loss": 0.2566, + "step": 27175 + }, + { + "epoch": 0.03, + "learning_rate": 4.990959265765674e-05, + "loss": 0.3453, + "step": 27176 + }, + { + "epoch": 0.03, + "learning_rate": 4.990958598351895e-05, + "loss": 0.1878, + "step": 27177 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909579309135255e-05, + "loss": 0.2198, + "step": 27178 + }, + { + "epoch": 0.03, + "learning_rate": 4.990957263450567e-05, + "loss": 0.5531, + "step": 27179 + }, + { + "epoch": 0.03, + "learning_rate": 4.990956595963018e-05, + "loss": 0.9182, + "step": 27180 + }, + { + "epoch": 0.03, + "learning_rate": 4.99095592845088e-05, + "loss": 0.7606, + "step": 27181 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909552609141524e-05, + "loss": 0.9266, + "step": 27182 + }, + { + "epoch": 0.03, + "learning_rate": 4.990954593352835e-05, + "loss": 0.9673, + "step": 27183 + }, + { + "epoch": 0.03, + "learning_rate": 4.990953925766928e-05, + "loss": 1.0877, + "step": 27184 + }, + { + "epoch": 0.03, + "learning_rate": 4.990953258156431e-05, + "loss": 1.4811, + "step": 27185 + }, + { + "epoch": 0.03, + "learning_rate": 4.990952590521345e-05, + "loss": 0.919, + "step": 27186 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909519228616685e-05, + "loss": 0.9347, + "step": 27187 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909512551774026e-05, + "loss": 0.6831, + "step": 27188 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909505874685484e-05, + "loss": 0.609, + "step": 27189 + }, + { + "epoch": 0.03, + "learning_rate": 4.990949919735103e-05, + "loss": 0.7479, + "step": 27190 + }, + { + "epoch": 0.03, + "learning_rate": 4.990949251977069e-05, + "loss": 1.0012, + "step": 27191 + }, + { + "epoch": 0.03, + "learning_rate": 4.990948584194445e-05, + "loss": 0.8955, + "step": 27192 + }, + { + "epoch": 0.03, + "learning_rate": 4.990947916387231e-05, + "loss": 0.9165, + "step": 27193 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909472485554275e-05, + "loss": 0.8733, + "step": 27194 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909465806990344e-05, + "loss": 1.227, + "step": 27195 + }, + { + "epoch": 0.03, + "learning_rate": 4.990945912818052e-05, + "loss": 1.3044, + "step": 27196 + }, + { + "epoch": 0.03, + "learning_rate": 4.99094524491248e-05, + "loss": 1.2288, + "step": 27197 + }, + { + "epoch": 0.03, + "learning_rate": 4.990944576982319e-05, + "loss": 0.9239, + "step": 27198 + }, + { + "epoch": 0.03, + "learning_rate": 4.990943909027568e-05, + "loss": 1.2269, + "step": 27199 + }, + { + "epoch": 0.03, + "learning_rate": 4.990943241048227e-05, + "loss": 0.8428, + "step": 27200 + }, + { + "epoch": 0.03, + "learning_rate": 4.990942573044297e-05, + "loss": 1.1116, + "step": 27201 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909419050157766e-05, + "loss": 0.7963, + "step": 27202 + }, + { + "epoch": 0.03, + "learning_rate": 4.990941236962667e-05, + "loss": 1.1804, + "step": 27203 + }, + { + "epoch": 0.03, + "learning_rate": 4.990940568884969e-05, + "loss": 0.9363, + "step": 27204 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909399007826806e-05, + "loss": 1.0536, + "step": 27205 + }, + { + "epoch": 0.03, + "learning_rate": 4.990939232655803e-05, + "loss": 0.8306, + "step": 27206 + }, + { + "epoch": 0.03, + "learning_rate": 4.990938564504336e-05, + "loss": 1.0922, + "step": 27207 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909378963282785e-05, + "loss": 1.0285, + "step": 27208 + }, + { + "epoch": 0.03, + "learning_rate": 4.990937228127632e-05, + "loss": 1.0117, + "step": 27209 + }, + { + "epoch": 0.03, + "learning_rate": 4.990936559902397e-05, + "loss": 0.9404, + "step": 27210 + }, + { + "epoch": 0.03, + "learning_rate": 4.990935891652572e-05, + "loss": 0.9749, + "step": 27211 + }, + { + "epoch": 0.03, + "learning_rate": 4.990935223378157e-05, + "loss": 0.7205, + "step": 27212 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909345550791525e-05, + "loss": 1.0468, + "step": 27213 + }, + { + "epoch": 0.03, + "learning_rate": 4.990933886755559e-05, + "loss": 1.0374, + "step": 27214 + }, + { + "epoch": 0.03, + "learning_rate": 4.990933218407375e-05, + "loss": 1.1134, + "step": 27215 + }, + { + "epoch": 0.03, + "learning_rate": 4.990932550034603e-05, + "loss": 1.225, + "step": 27216 + }, + { + "epoch": 0.03, + "learning_rate": 4.990931881637241e-05, + "loss": 1.0937, + "step": 27217 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909312132152894e-05, + "loss": 1.0351, + "step": 27218 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909305447687494e-05, + "loss": 0.9798, + "step": 27219 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909298762976184e-05, + "loss": 1.1342, + "step": 27220 + }, + { + "epoch": 0.03, + "learning_rate": 4.990929207801899e-05, + "loss": 1.1391, + "step": 27221 + }, + { + "epoch": 0.03, + "learning_rate": 4.99092853928159e-05, + "loss": 1.2792, + "step": 27222 + }, + { + "epoch": 0.03, + "learning_rate": 4.990927870736691e-05, + "loss": 0.8999, + "step": 27223 + }, + { + "epoch": 0.03, + "learning_rate": 4.990927202167204e-05, + "loss": 0.7395, + "step": 27224 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909265335731264e-05, + "loss": 0.9834, + "step": 27225 + }, + { + "epoch": 0.03, + "learning_rate": 4.99092586495446e-05, + "loss": 0.7025, + "step": 27226 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909251963112036e-05, + "loss": 1.3661, + "step": 27227 + }, + { + "epoch": 0.03, + "learning_rate": 4.990924527643358e-05, + "loss": 1.1217, + "step": 27228 + }, + { + "epoch": 0.03, + "learning_rate": 4.990923858950924e-05, + "loss": 0.9009, + "step": 27229 + }, + { + "epoch": 0.03, + "learning_rate": 4.990923190233899e-05, + "loss": 0.8427, + "step": 27230 + }, + { + "epoch": 0.03, + "learning_rate": 4.990922521492286e-05, + "loss": 0.9321, + "step": 27231 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909218527260834e-05, + "loss": 1.1869, + "step": 27232 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909211839352916e-05, + "loss": 1.1952, + "step": 27233 + }, + { + "epoch": 0.03, + "learning_rate": 4.99092051511991e-05, + "loss": 0.8818, + "step": 27234 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909198462799396e-05, + "loss": 0.9442, + "step": 27235 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909191774153794e-05, + "loss": 0.6818, + "step": 27236 + }, + { + "epoch": 0.03, + "learning_rate": 4.99091850852623e-05, + "loss": 1.3287, + "step": 27237 + }, + { + "epoch": 0.03, + "learning_rate": 4.990917839612491e-05, + "loss": 0.8901, + "step": 27238 + }, + { + "epoch": 0.03, + "learning_rate": 4.990917170674164e-05, + "loss": 1.1095, + "step": 27239 + }, + { + "epoch": 0.03, + "learning_rate": 4.990916501711246e-05, + "loss": 1.2713, + "step": 27240 + }, + { + "epoch": 0.03, + "learning_rate": 4.99091583272374e-05, + "loss": 1.3348, + "step": 27241 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909151637116444e-05, + "loss": 1.0375, + "step": 27242 + }, + { + "epoch": 0.03, + "learning_rate": 4.990914494674959e-05, + "loss": 0.9162, + "step": 27243 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909138256136844e-05, + "loss": 1.0462, + "step": 27244 + }, + { + "epoch": 0.03, + "learning_rate": 4.990913156527821e-05, + "loss": 0.859, + "step": 27245 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909124874173684e-05, + "loss": 0.9271, + "step": 27246 + }, + { + "epoch": 0.03, + "learning_rate": 4.990911818282327e-05, + "loss": 1.2253, + "step": 27247 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909111491226956e-05, + "loss": 1.1501, + "step": 27248 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909104799384746e-05, + "loss": 0.989, + "step": 27249 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909098107296646e-05, + "loss": 1.0255, + "step": 27250 + }, + { + "epoch": 0.03, + "learning_rate": 4.990909141496266e-05, + "loss": 1.0777, + "step": 27251 + }, + { + "epoch": 0.03, + "learning_rate": 4.990908472238278e-05, + "loss": 0.9465, + "step": 27252 + }, + { + "epoch": 0.03, + "learning_rate": 4.990907802955701e-05, + "loss": 1.8296, + "step": 27253 + }, + { + "epoch": 0.03, + "learning_rate": 4.990907133648535e-05, + "loss": 0.7213, + "step": 27254 + }, + { + "epoch": 0.03, + "learning_rate": 4.990906464316779e-05, + "loss": 0.6841, + "step": 27255 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909057949604336e-05, + "loss": 0.6811, + "step": 27256 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909051255795e-05, + "loss": 0.638, + "step": 27257 + }, + { + "epoch": 0.03, + "learning_rate": 4.990904456173976e-05, + "loss": 0.6217, + "step": 27258 + }, + { + "epoch": 0.03, + "learning_rate": 4.990903786743865e-05, + "loss": 0.5802, + "step": 27259 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909031172891626e-05, + "loss": 0.5987, + "step": 27260 + }, + { + "epoch": 0.03, + "learning_rate": 4.990902447809872e-05, + "loss": 0.6173, + "step": 27261 + }, + { + "epoch": 0.03, + "learning_rate": 4.990901778305992e-05, + "loss": 0.6069, + "step": 27262 + }, + { + "epoch": 0.03, + "learning_rate": 4.9909011087775235e-05, + "loss": 0.606, + "step": 27263 + }, + { + "epoch": 0.03, + "learning_rate": 4.990900439224465e-05, + "loss": 0.6202, + "step": 27264 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908997696468183e-05, + "loss": 0.5989, + "step": 27265 + }, + { + "epoch": 0.03, + "learning_rate": 4.990899100044582e-05, + "loss": 0.6124, + "step": 27266 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908984304177564e-05, + "loss": 0.5295, + "step": 27267 + }, + { + "epoch": 0.03, + "learning_rate": 4.990897760766342e-05, + "loss": 0.6406, + "step": 27268 + }, + { + "epoch": 0.03, + "learning_rate": 4.990897091090338e-05, + "loss": 0.6193, + "step": 27269 + }, + { + "epoch": 0.03, + "learning_rate": 4.990896421389746e-05, + "loss": 0.6189, + "step": 27270 + }, + { + "epoch": 0.03, + "learning_rate": 4.990895751664564e-05, + "loss": 0.5185, + "step": 27271 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908950819147934e-05, + "loss": 0.5183, + "step": 27272 + }, + { + "epoch": 0.03, + "learning_rate": 4.990894412140433e-05, + "loss": 0.5313, + "step": 27273 + }, + { + "epoch": 0.03, + "learning_rate": 4.990893742341485e-05, + "loss": 0.701, + "step": 27274 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908930725179466e-05, + "loss": 0.9766, + "step": 27275 + }, + { + "epoch": 0.03, + "learning_rate": 4.990892402669819e-05, + "loss": 0.7653, + "step": 27276 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908917327971034e-05, + "loss": 1.2227, + "step": 27277 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908910628997986e-05, + "loss": 1.0102, + "step": 27278 + }, + { + "epoch": 0.03, + "learning_rate": 4.990890392977904e-05, + "loss": 1.1506, + "step": 27279 + }, + { + "epoch": 0.03, + "learning_rate": 4.990889723031421e-05, + "loss": 0.936, + "step": 27280 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908890530603487e-05, + "loss": 1.2616, + "step": 27281 + }, + { + "epoch": 0.03, + "learning_rate": 4.990888383064688e-05, + "loss": 0.9683, + "step": 27282 + }, + { + "epoch": 0.03, + "learning_rate": 4.990887713044438e-05, + "loss": 0.7111, + "step": 27283 + }, + { + "epoch": 0.03, + "learning_rate": 4.990887042999598e-05, + "loss": 0.7616, + "step": 27284 + }, + { + "epoch": 0.03, + "learning_rate": 4.99088637293017e-05, + "loss": 1.1338, + "step": 27285 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908857028361524e-05, + "loss": 0.9861, + "step": 27286 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908850327175464e-05, + "loss": 1.1483, + "step": 27287 + }, + { + "epoch": 0.03, + "learning_rate": 4.990884362574352e-05, + "loss": 0.8348, + "step": 27288 + }, + { + "epoch": 0.03, + "learning_rate": 4.990883692406567e-05, + "loss": 1.123, + "step": 27289 + }, + { + "epoch": 0.03, + "learning_rate": 4.990883022214195e-05, + "loss": 0.9673, + "step": 27290 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908823519972325e-05, + "loss": 1.0802, + "step": 27291 + }, + { + "epoch": 0.03, + "learning_rate": 4.990881681755681e-05, + "loss": 0.6471, + "step": 27292 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908810114895425e-05, + "loss": 0.4516, + "step": 27293 + }, + { + "epoch": 0.03, + "learning_rate": 4.990880341198813e-05, + "loss": 1.072, + "step": 27294 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908796708834956e-05, + "loss": 0.8767, + "step": 27295 + }, + { + "epoch": 0.03, + "learning_rate": 4.990879000543589e-05, + "loss": 0.9782, + "step": 27296 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908783301790926e-05, + "loss": 1.2875, + "step": 27297 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908776597900086e-05, + "loss": 1.2192, + "step": 27298 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908769893763356e-05, + "loss": 0.7961, + "step": 27299 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908763189380735e-05, + "loss": 1.0628, + "step": 27300 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908756484752224e-05, + "loss": 1.1809, + "step": 27301 + }, + { + "epoch": 0.03, + "learning_rate": 4.990874977987783e-05, + "loss": 1.0669, + "step": 27302 + }, + { + "epoch": 0.03, + "learning_rate": 4.990874307475754e-05, + "loss": 1.0891, + "step": 27303 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908736369391364e-05, + "loss": 0.9147, + "step": 27304 + }, + { + "epoch": 0.03, + "learning_rate": 4.99087296637793e-05, + "loss": 0.8192, + "step": 27305 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908722957921336e-05, + "loss": 0.2421, + "step": 27306 + }, + { + "epoch": 0.03, + "learning_rate": 4.99087162518175e-05, + "loss": 1.3424, + "step": 27307 + }, + { + "epoch": 0.03, + "learning_rate": 4.990870954546777e-05, + "loss": 1.4184, + "step": 27308 + }, + { + "epoch": 0.03, + "learning_rate": 4.990870283887215e-05, + "loss": 0.6105, + "step": 27309 + }, + { + "epoch": 0.03, + "learning_rate": 4.990869613203064e-05, + "loss": 0.1075, + "step": 27310 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908689424943245e-05, + "loss": 0.173, + "step": 27311 + }, + { + "epoch": 0.03, + "learning_rate": 4.990868271760997e-05, + "loss": 0.2078, + "step": 27312 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908676010030794e-05, + "loss": 0.0899, + "step": 27313 + }, + { + "epoch": 0.03, + "learning_rate": 4.990866930220574e-05, + "loss": 0.169, + "step": 27314 + }, + { + "epoch": 0.03, + "learning_rate": 4.990866259413479e-05, + "loss": 0.4511, + "step": 27315 + }, + { + "epoch": 0.03, + "learning_rate": 4.990865588581795e-05, + "loss": 0.9065, + "step": 27316 + }, + { + "epoch": 0.03, + "learning_rate": 4.990864917725523e-05, + "loss": 1.0429, + "step": 27317 + }, + { + "epoch": 0.03, + "learning_rate": 4.990864246844662e-05, + "loss": 1.1346, + "step": 27318 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908635759392116e-05, + "loss": 1.0786, + "step": 27319 + }, + { + "epoch": 0.03, + "learning_rate": 4.990862905009174e-05, + "loss": 1.1172, + "step": 27320 + }, + { + "epoch": 0.03, + "learning_rate": 4.990862234054546e-05, + "loss": 1.0875, + "step": 27321 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908615630753296e-05, + "loss": 1.1038, + "step": 27322 + }, + { + "epoch": 0.03, + "learning_rate": 4.990860892071525e-05, + "loss": 0.9505, + "step": 27323 + }, + { + "epoch": 0.03, + "learning_rate": 4.990860221043132e-05, + "loss": 1.1706, + "step": 27324 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908595499901496e-05, + "loss": 0.8188, + "step": 27325 + }, + { + "epoch": 0.03, + "learning_rate": 4.990858878912579e-05, + "loss": 1.129, + "step": 27326 + }, + { + "epoch": 0.03, + "learning_rate": 4.990858207810419e-05, + "loss": 0.7701, + "step": 27327 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908575366836705e-05, + "loss": 0.8286, + "step": 27328 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908568655323334e-05, + "loss": 0.8355, + "step": 27329 + }, + { + "epoch": 0.03, + "learning_rate": 4.990856194356408e-05, + "loss": 0.8331, + "step": 27330 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908555231558935e-05, + "loss": 0.8105, + "step": 27331 + }, + { + "epoch": 0.03, + "learning_rate": 4.99085485193079e-05, + "loss": 0.9011, + "step": 27332 + }, + { + "epoch": 0.03, + "learning_rate": 4.990854180681098e-05, + "loss": 0.8298, + "step": 27333 + }, + { + "epoch": 0.03, + "learning_rate": 4.990853509406818e-05, + "loss": 1.1051, + "step": 27334 + }, + { + "epoch": 0.03, + "learning_rate": 4.990852838107949e-05, + "loss": 1.0444, + "step": 27335 + }, + { + "epoch": 0.03, + "learning_rate": 4.990852166784491e-05, + "loss": 1.1214, + "step": 27336 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908514954364446e-05, + "loss": 0.7787, + "step": 27337 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908508240638096e-05, + "loss": 0.9952, + "step": 27338 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908501526665863e-05, + "loss": 0.6619, + "step": 27339 + }, + { + "epoch": 0.03, + "learning_rate": 4.990849481244774e-05, + "loss": 0.8137, + "step": 27340 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908488097983727e-05, + "loss": 1.0868, + "step": 27341 + }, + { + "epoch": 0.03, + "learning_rate": 4.990848138327383e-05, + "loss": 0.8065, + "step": 27342 + }, + { + "epoch": 0.03, + "learning_rate": 4.990847466831805e-05, + "loss": 0.6516, + "step": 27343 + }, + { + "epoch": 0.03, + "learning_rate": 4.990846795311639e-05, + "loss": 1.1638, + "step": 27344 + }, + { + "epoch": 0.03, + "learning_rate": 4.990846123766884e-05, + "loss": 1.1088, + "step": 27345 + }, + { + "epoch": 0.03, + "learning_rate": 4.990845452197539e-05, + "loss": 0.8209, + "step": 27346 + }, + { + "epoch": 0.03, + "learning_rate": 4.990844780603607e-05, + "loss": 1.3272, + "step": 27347 + }, + { + "epoch": 0.03, + "learning_rate": 4.990844108985086e-05, + "loss": 1.08, + "step": 27348 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908434373419765e-05, + "loss": 1.075, + "step": 27349 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908427656742786e-05, + "loss": 1.1984, + "step": 27350 + }, + { + "epoch": 0.03, + "learning_rate": 4.990842093981992e-05, + "loss": 0.7415, + "step": 27351 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908414222651165e-05, + "loss": 0.6888, + "step": 27352 + }, + { + "epoch": 0.03, + "learning_rate": 4.990840750523653e-05, + "loss": 0.9654, + "step": 27353 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908400787576e-05, + "loss": 0.9118, + "step": 27354 + }, + { + "epoch": 0.03, + "learning_rate": 4.990839406966959e-05, + "loss": 0.8828, + "step": 27355 + }, + { + "epoch": 0.03, + "learning_rate": 4.990838735151731e-05, + "loss": 0.9993, + "step": 27356 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908380633119123e-05, + "loss": 1.2514, + "step": 27357 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908373914475057e-05, + "loss": 1.1863, + "step": 27358 + }, + { + "epoch": 0.03, + "learning_rate": 4.990836719558511e-05, + "loss": 1.7146, + "step": 27359 + }, + { + "epoch": 0.03, + "learning_rate": 4.990836047644928e-05, + "loss": 0.8938, + "step": 27360 + }, + { + "epoch": 0.03, + "learning_rate": 4.990835375706756e-05, + "loss": 0.834, + "step": 27361 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908347037439954e-05, + "loss": 0.7464, + "step": 27362 + }, + { + "epoch": 0.03, + "learning_rate": 4.990834031756647e-05, + "loss": 0.9525, + "step": 27363 + }, + { + "epoch": 0.03, + "learning_rate": 4.99083335974471e-05, + "loss": 1.116, + "step": 27364 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908326877081836e-05, + "loss": 0.9974, + "step": 27365 + }, + { + "epoch": 0.03, + "learning_rate": 4.99083201564707e-05, + "loss": 1.2265, + "step": 27366 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908313435613676e-05, + "loss": 1.2869, + "step": 27367 + }, + { + "epoch": 0.03, + "learning_rate": 4.990830671451076e-05, + "loss": 1.0796, + "step": 27368 + }, + { + "epoch": 0.03, + "learning_rate": 4.990829999316197e-05, + "loss": 1.0934, + "step": 27369 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908293271567286e-05, + "loss": 1.026, + "step": 27370 + }, + { + "epoch": 0.03, + "learning_rate": 4.990828654972673e-05, + "loss": 1.1199, + "step": 27371 + }, + { + "epoch": 0.03, + "learning_rate": 4.990827982764028e-05, + "loss": 1.1187, + "step": 27372 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908273105307944e-05, + "loss": 1.5239, + "step": 27373 + }, + { + "epoch": 0.03, + "learning_rate": 4.990826638272973e-05, + "loss": 1.2044, + "step": 27374 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908259659905635e-05, + "loss": 1.5089, + "step": 27375 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908252936835644e-05, + "loss": 1.2654, + "step": 27376 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908246213519784e-05, + "loss": 1.3399, + "step": 27377 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908239489958034e-05, + "loss": 1.3356, + "step": 27378 + }, + { + "epoch": 0.03, + "learning_rate": 4.99082327661504e-05, + "loss": 1.3519, + "step": 27379 + }, + { + "epoch": 0.03, + "learning_rate": 4.990822604209688e-05, + "loss": 1.3109, + "step": 27380 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908219317797475e-05, + "loss": 1.2395, + "step": 27381 + }, + { + "epoch": 0.03, + "learning_rate": 4.990821259325219e-05, + "loss": 1.0454, + "step": 27382 + }, + { + "epoch": 0.03, + "learning_rate": 4.990820586846102e-05, + "loss": 1.045, + "step": 27383 + }, + { + "epoch": 0.03, + "learning_rate": 4.990819914342397e-05, + "loss": 0.8534, + "step": 27384 + }, + { + "epoch": 0.03, + "learning_rate": 4.990819241814104e-05, + "loss": 0.9316, + "step": 27385 + }, + { + "epoch": 0.03, + "learning_rate": 4.990818569261222e-05, + "loss": 0.8912, + "step": 27386 + }, + { + "epoch": 0.03, + "learning_rate": 4.990817896683752e-05, + "loss": 1.1141, + "step": 27387 + }, + { + "epoch": 0.03, + "learning_rate": 4.990817224081694e-05, + "loss": 0.9962, + "step": 27388 + }, + { + "epoch": 0.03, + "learning_rate": 4.990816551455047e-05, + "loss": 0.9092, + "step": 27389 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908158788038115e-05, + "loss": 1.4051, + "step": 27390 + }, + { + "epoch": 0.03, + "learning_rate": 4.990815206127989e-05, + "loss": 1.0029, + "step": 27391 + }, + { + "epoch": 0.03, + "learning_rate": 4.990814533427577e-05, + "loss": 1.1518, + "step": 27392 + }, + { + "epoch": 0.03, + "learning_rate": 4.990813860702578e-05, + "loss": 1.9848, + "step": 27393 + }, + { + "epoch": 0.03, + "learning_rate": 4.99081318795299e-05, + "loss": 1.7895, + "step": 27394 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908125151788134e-05, + "loss": 0.9852, + "step": 27395 + }, + { + "epoch": 0.03, + "learning_rate": 4.990811842380049e-05, + "loss": 1.0671, + "step": 27396 + }, + { + "epoch": 0.03, + "learning_rate": 4.990811169556696e-05, + "loss": 1.1041, + "step": 27397 + }, + { + "epoch": 0.03, + "learning_rate": 4.990810496708754e-05, + "loss": 1.1199, + "step": 27398 + }, + { + "epoch": 0.03, + "learning_rate": 4.990809823836225e-05, + "loss": 1.0155, + "step": 27399 + }, + { + "epoch": 0.03, + "learning_rate": 4.990809150939107e-05, + "loss": 0.938, + "step": 27400 + }, + { + "epoch": 0.03, + "learning_rate": 4.990808478017402e-05, + "loss": 0.6957, + "step": 27401 + }, + { + "epoch": 0.03, + "learning_rate": 4.990807805071108e-05, + "loss": 0.8675, + "step": 27402 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908071321002256e-05, + "loss": 0.9742, + "step": 27403 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908064591047555e-05, + "loss": 0.8063, + "step": 27404 + }, + { + "epoch": 0.03, + "learning_rate": 4.990805786084697e-05, + "loss": 0.9868, + "step": 27405 + }, + { + "epoch": 0.03, + "learning_rate": 4.99080511304005e-05, + "loss": 0.9545, + "step": 27406 + }, + { + "epoch": 0.03, + "learning_rate": 4.990804439970815e-05, + "loss": 1.0049, + "step": 27407 + }, + { + "epoch": 0.03, + "learning_rate": 4.990803766876992e-05, + "loss": 0.6532, + "step": 27408 + }, + { + "epoch": 0.03, + "learning_rate": 4.990803093758582e-05, + "loss": 0.9871, + "step": 27409 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908024206155815e-05, + "loss": 0.837, + "step": 27410 + }, + { + "epoch": 0.03, + "learning_rate": 4.990801747447994e-05, + "loss": 1.3217, + "step": 27411 + }, + { + "epoch": 0.03, + "learning_rate": 4.990801074255819e-05, + "loss": 0.8155, + "step": 27412 + }, + { + "epoch": 0.03, + "learning_rate": 4.990800401039055e-05, + "loss": 0.4876, + "step": 27413 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907997277977025e-05, + "loss": 1.2493, + "step": 27414 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907990545317626e-05, + "loss": 0.9459, + "step": 27415 + }, + { + "epoch": 0.03, + "learning_rate": 4.990798381241234e-05, + "loss": 1.1274, + "step": 27416 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907977079261184e-05, + "loss": 1.0627, + "step": 27417 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907970345864134e-05, + "loss": 0.8461, + "step": 27418 + }, + { + "epoch": 0.03, + "learning_rate": 4.990796361222121e-05, + "loss": 1.0114, + "step": 27419 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907956878332404e-05, + "loss": 1.125, + "step": 27420 + }, + { + "epoch": 0.03, + "learning_rate": 4.990795014419771e-05, + "loss": 0.7903, + "step": 27421 + }, + { + "epoch": 0.03, + "learning_rate": 4.990794340981715e-05, + "loss": 0.5785, + "step": 27422 + }, + { + "epoch": 0.03, + "learning_rate": 4.99079366751907e-05, + "loss": 0.5387, + "step": 27423 + }, + { + "epoch": 0.03, + "learning_rate": 4.990792994031837e-05, + "loss": 0.3755, + "step": 27424 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907923205200155e-05, + "loss": 0.797, + "step": 27425 + }, + { + "epoch": 0.03, + "learning_rate": 4.990791646983607e-05, + "loss": 0.567, + "step": 27426 + }, + { + "epoch": 0.03, + "learning_rate": 4.99079097342261e-05, + "loss": 0.4354, + "step": 27427 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907902998370246e-05, + "loss": 0.5497, + "step": 27428 + }, + { + "epoch": 0.03, + "learning_rate": 4.990789626226852e-05, + "loss": 0.8607, + "step": 27429 + }, + { + "epoch": 0.03, + "learning_rate": 4.99078895259209e-05, + "loss": 0.9411, + "step": 27430 + }, + { + "epoch": 0.03, + "learning_rate": 4.990788278932741e-05, + "loss": 1.0779, + "step": 27431 + }, + { + "epoch": 0.03, + "learning_rate": 4.990787605248804e-05, + "loss": 1.0584, + "step": 27432 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907869315402786e-05, + "loss": 0.886, + "step": 27433 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907862578071655e-05, + "loss": 0.8585, + "step": 27434 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907855840494646e-05, + "loss": 0.9671, + "step": 27435 + }, + { + "epoch": 0.03, + "learning_rate": 4.990784910267175e-05, + "loss": 1.1184, + "step": 27436 + }, + { + "epoch": 0.03, + "learning_rate": 4.990784236460298e-05, + "loss": 0.803, + "step": 27437 + }, + { + "epoch": 0.03, + "learning_rate": 4.990783562628833e-05, + "loss": 1.1492, + "step": 27438 + }, + { + "epoch": 0.03, + "learning_rate": 4.99078288877278e-05, + "loss": 0.9381, + "step": 27439 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907822148921386e-05, + "loss": 1.2156, + "step": 27440 + }, + { + "epoch": 0.03, + "learning_rate": 4.99078154098691e-05, + "loss": 0.892, + "step": 27441 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907808670570924e-05, + "loss": 0.7297, + "step": 27442 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907801931026875e-05, + "loss": 1.1257, + "step": 27443 + }, + { + "epoch": 0.03, + "learning_rate": 4.990779519123695e-05, + "loss": 1.1545, + "step": 27444 + }, + { + "epoch": 0.03, + "learning_rate": 4.990778845120114e-05, + "loss": 0.9053, + "step": 27445 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907781710919453e-05, + "loss": 0.7571, + "step": 27446 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907774970391884e-05, + "loss": 0.789, + "step": 27447 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907768229618444e-05, + "loss": 0.8883, + "step": 27448 + }, + { + "epoch": 0.03, + "learning_rate": 4.990776148859912e-05, + "loss": 1.0241, + "step": 27449 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907754747333914e-05, + "loss": 1.134, + "step": 27450 + }, + { + "epoch": 0.03, + "learning_rate": 4.990774800582283e-05, + "loss": 1.1467, + "step": 27451 + }, + { + "epoch": 0.03, + "learning_rate": 4.990774126406588e-05, + "loss": 0.7086, + "step": 27452 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907734522063034e-05, + "loss": 0.6248, + "step": 27453 + }, + { + "epoch": 0.03, + "learning_rate": 4.990772777981432e-05, + "loss": 1.2557, + "step": 27454 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907721037319723e-05, + "loss": 1.7849, + "step": 27455 + }, + { + "epoch": 0.03, + "learning_rate": 4.990771429457924e-05, + "loss": 1.0018, + "step": 27456 + }, + { + "epoch": 0.03, + "learning_rate": 4.990770755159289e-05, + "loss": 0.7148, + "step": 27457 + }, + { + "epoch": 0.03, + "learning_rate": 4.990770080836066e-05, + "loss": 0.6617, + "step": 27458 + }, + { + "epoch": 0.03, + "learning_rate": 4.990769406488255e-05, + "loss": 0.89, + "step": 27459 + }, + { + "epoch": 0.03, + "learning_rate": 4.990768732115856e-05, + "loss": 0.8707, + "step": 27460 + }, + { + "epoch": 0.03, + "learning_rate": 4.990768057718869e-05, + "loss": 0.8775, + "step": 27461 + }, + { + "epoch": 0.03, + "learning_rate": 4.990767383297295e-05, + "loss": 0.584, + "step": 27462 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907667088511324e-05, + "loss": 0.9088, + "step": 27463 + }, + { + "epoch": 0.03, + "learning_rate": 4.990766034380382e-05, + "loss": 0.9741, + "step": 27464 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907653598850445e-05, + "loss": 1.0751, + "step": 27465 + }, + { + "epoch": 0.03, + "learning_rate": 4.990764685365119e-05, + "loss": 1.1836, + "step": 27466 + }, + { + "epoch": 0.03, + "learning_rate": 4.990764010820605e-05, + "loss": 0.8842, + "step": 27467 + }, + { + "epoch": 0.03, + "learning_rate": 4.990763336251504e-05, + "loss": 1.3296, + "step": 27468 + }, + { + "epoch": 0.03, + "learning_rate": 4.990762661657815e-05, + "loss": 1.2634, + "step": 27469 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907619870395385e-05, + "loss": 0.9266, + "step": 27470 + }, + { + "epoch": 0.03, + "learning_rate": 4.990761312396673e-05, + "loss": 0.8711, + "step": 27471 + }, + { + "epoch": 0.03, + "learning_rate": 4.990760637729222e-05, + "loss": 1.0534, + "step": 27472 + }, + { + "epoch": 0.03, + "learning_rate": 4.990759963037181e-05, + "loss": 0.9023, + "step": 27473 + }, + { + "epoch": 0.03, + "learning_rate": 4.990759288320554e-05, + "loss": 0.7195, + "step": 27474 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907586135793386e-05, + "loss": 0.9525, + "step": 27475 + }, + { + "epoch": 0.03, + "learning_rate": 4.990757938813535e-05, + "loss": 1.1232, + "step": 27476 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907572640231445e-05, + "loss": 0.9577, + "step": 27477 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907565892081657e-05, + "loss": 1.0836, + "step": 27478 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907559143686e-05, + "loss": 1.0288, + "step": 27479 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907552395044456e-05, + "loss": 0.9623, + "step": 27480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907545646157037e-05, + "loss": 0.9658, + "step": 27481 + }, + { + "epoch": 0.03, + "learning_rate": 4.990753889702374e-05, + "loss": 1.0748, + "step": 27482 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907532147644575e-05, + "loss": 0.9827, + "step": 27483 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907525398019526e-05, + "loss": 1.1319, + "step": 27484 + }, + { + "epoch": 0.03, + "learning_rate": 4.990751864814861e-05, + "loss": 1.0493, + "step": 27485 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907511898031804e-05, + "loss": 0.6807, + "step": 27486 + }, + { + "epoch": 0.03, + "learning_rate": 4.990750514766913e-05, + "loss": 0.6604, + "step": 27487 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907498397060576e-05, + "loss": 0.9694, + "step": 27488 + }, + { + "epoch": 0.03, + "learning_rate": 4.990749164620614e-05, + "loss": 1.0879, + "step": 27489 + }, + { + "epoch": 0.03, + "learning_rate": 4.990748489510584e-05, + "loss": 1.0444, + "step": 27490 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907478143759654e-05, + "loss": 1.0801, + "step": 27491 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907471392167605e-05, + "loss": 0.9199, + "step": 27492 + }, + { + "epoch": 0.03, + "learning_rate": 4.990746464032967e-05, + "loss": 0.982, + "step": 27493 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907457888245856e-05, + "loss": 1.2102, + "step": 27494 + }, + { + "epoch": 0.03, + "learning_rate": 4.990745113591617e-05, + "loss": 0.7535, + "step": 27495 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907444383340606e-05, + "loss": 0.8128, + "step": 27496 + }, + { + "epoch": 0.03, + "learning_rate": 4.990743763051917e-05, + "loss": 1.0657, + "step": 27497 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907430877451863e-05, + "loss": 0.7447, + "step": 27498 + }, + { + "epoch": 0.03, + "learning_rate": 4.990742412413867e-05, + "loss": 0.979, + "step": 27499 + }, + { + "epoch": 0.03, + "learning_rate": 4.990741737057961e-05, + "loss": 0.972, + "step": 27500 + }, + { + "epoch": 0.03, + "eval_loss": 1.0637550354003906, + "eval_runtime": 124.9122, + "eval_samples_per_second": 11.088, + "eval_steps_per_second": 5.548, + "step": 27500 + }, + { + "epoch": 0.03, + "learning_rate": 4.990741061677466e-05, + "loss": 1.2999, + "step": 27501 + }, + { + "epoch": 0.03, + "learning_rate": 4.990740386272385e-05, + "loss": 1.151, + "step": 27502 + }, + { + "epoch": 0.03, + "learning_rate": 4.990739710842716e-05, + "loss": 1.0148, + "step": 27503 + }, + { + "epoch": 0.03, + "learning_rate": 4.99073903538846e-05, + "loss": 1.0193, + "step": 27504 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907383599096154e-05, + "loss": 1.006, + "step": 27505 + }, + { + "epoch": 0.03, + "learning_rate": 4.990737684406184e-05, + "loss": 1.2982, + "step": 27506 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907370088781644e-05, + "loss": 1.0302, + "step": 27507 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907363333255574e-05, + "loss": 1.0527, + "step": 27508 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907356577483633e-05, + "loss": 1.0209, + "step": 27509 + }, + { + "epoch": 0.03, + "learning_rate": 4.990734982146582e-05, + "loss": 1.0983, + "step": 27510 + }, + { + "epoch": 0.03, + "learning_rate": 4.990734306520213e-05, + "loss": 0.9588, + "step": 27511 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907336308692566e-05, + "loss": 1.1109, + "step": 27512 + }, + { + "epoch": 0.03, + "learning_rate": 4.990732955193712e-05, + "loss": 0.9269, + "step": 27513 + }, + { + "epoch": 0.03, + "learning_rate": 4.990732279493581e-05, + "loss": 1.1877, + "step": 27514 + }, + { + "epoch": 0.03, + "learning_rate": 4.990731603768861e-05, + "loss": 0.8668, + "step": 27515 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907309280195545e-05, + "loss": 0.911, + "step": 27516 + }, + { + "epoch": 0.03, + "learning_rate": 4.990730252245661e-05, + "loss": 0.9705, + "step": 27517 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907295764471794e-05, + "loss": 0.9137, + "step": 27518 + }, + { + "epoch": 0.03, + "learning_rate": 4.990728900624111e-05, + "loss": 0.8024, + "step": 27519 + }, + { + "epoch": 0.03, + "learning_rate": 4.990728224776455e-05, + "loss": 1.2102, + "step": 27520 + }, + { + "epoch": 0.03, + "learning_rate": 4.990727548904211e-05, + "loss": 1.0246, + "step": 27521 + }, + { + "epoch": 0.03, + "learning_rate": 4.99072687300738e-05, + "loss": 1.0258, + "step": 27522 + }, + { + "epoch": 0.03, + "learning_rate": 4.990726197085961e-05, + "loss": 0.916, + "step": 27523 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907255211399555e-05, + "loss": 0.861, + "step": 27524 + }, + { + "epoch": 0.03, + "learning_rate": 4.990724845169362e-05, + "loss": 0.895, + "step": 27525 + }, + { + "epoch": 0.03, + "learning_rate": 4.990724169174181e-05, + "loss": 1.0249, + "step": 27526 + }, + { + "epoch": 0.03, + "learning_rate": 4.990723493154414e-05, + "loss": 1.1805, + "step": 27527 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907228171100586e-05, + "loss": 1.1585, + "step": 27528 + }, + { + "epoch": 0.03, + "learning_rate": 4.990722141041115e-05, + "loss": 0.7603, + "step": 27529 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907214649475855e-05, + "loss": 1.0841, + "step": 27530 + }, + { + "epoch": 0.03, + "learning_rate": 4.990720788829468e-05, + "loss": 1.0684, + "step": 27531 + }, + { + "epoch": 0.03, + "learning_rate": 4.990720112686763e-05, + "loss": 0.978, + "step": 27532 + }, + { + "epoch": 0.03, + "learning_rate": 4.990719436519471e-05, + "loss": 1.0668, + "step": 27533 + }, + { + "epoch": 0.03, + "learning_rate": 4.990718760327592e-05, + "loss": 1.1842, + "step": 27534 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907180841111246e-05, + "loss": 0.8993, + "step": 27535 + }, + { + "epoch": 0.03, + "learning_rate": 4.99071740787007e-05, + "loss": 1.0363, + "step": 27536 + }, + { + "epoch": 0.03, + "learning_rate": 4.990716731604429e-05, + "loss": 0.8786, + "step": 27537 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907160553142004e-05, + "loss": 0.7507, + "step": 27538 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907153789993844e-05, + "loss": 1.3899, + "step": 27539 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907147026599806e-05, + "loss": 1.2364, + "step": 27540 + }, + { + "epoch": 0.03, + "learning_rate": 4.99071402629599e-05, + "loss": 1.3381, + "step": 27541 + }, + { + "epoch": 0.03, + "learning_rate": 4.990713349907412e-05, + "loss": 0.9828, + "step": 27542 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907126734942475e-05, + "loss": 1.161, + "step": 27543 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907119970564944e-05, + "loss": 1.0636, + "step": 27544 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907113205941544e-05, + "loss": 1.0134, + "step": 27545 + }, + { + "epoch": 0.03, + "learning_rate": 4.990710644107228e-05, + "loss": 1.0167, + "step": 27546 + }, + { + "epoch": 0.03, + "learning_rate": 4.990709967595714e-05, + "loss": 1.0319, + "step": 27547 + }, + { + "epoch": 0.03, + "learning_rate": 4.990709291059612e-05, + "loss": 0.6867, + "step": 27548 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907086144989235e-05, + "loss": 0.934, + "step": 27549 + }, + { + "epoch": 0.03, + "learning_rate": 4.990707937913648e-05, + "loss": 1.0217, + "step": 27550 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907072613037845e-05, + "loss": 0.9361, + "step": 27551 + }, + { + "epoch": 0.03, + "learning_rate": 4.990706584669335e-05, + "loss": 1.1049, + "step": 27552 + }, + { + "epoch": 0.03, + "learning_rate": 4.990705908010297e-05, + "loss": 0.8648, + "step": 27553 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907052313266723e-05, + "loss": 0.9345, + "step": 27554 + }, + { + "epoch": 0.03, + "learning_rate": 4.99070455461846e-05, + "loss": 1.0283, + "step": 27555 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907038778856606e-05, + "loss": 1.1007, + "step": 27556 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907032011282746e-05, + "loss": 0.9469, + "step": 27557 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907025243463016e-05, + "loss": 0.9699, + "step": 27558 + }, + { + "epoch": 0.03, + "learning_rate": 4.99070184753974e-05, + "loss": 1.0613, + "step": 27559 + }, + { + "epoch": 0.03, + "learning_rate": 4.9907011707085925e-05, + "loss": 1.0942, + "step": 27560 + }, + { + "epoch": 0.03, + "learning_rate": 4.990700493852858e-05, + "loss": 1.0141, + "step": 27561 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906998169725355e-05, + "loss": 1.2444, + "step": 27562 + }, + { + "epoch": 0.03, + "learning_rate": 4.990699140067626e-05, + "loss": 1.0579, + "step": 27563 + }, + { + "epoch": 0.03, + "learning_rate": 4.99069846313813e-05, + "loss": 1.2241, + "step": 27564 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906977861840464e-05, + "loss": 0.9581, + "step": 27565 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906971092053754e-05, + "loss": 0.987, + "step": 27566 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906964322021174e-05, + "loss": 0.3809, + "step": 27567 + }, + { + "epoch": 0.03, + "learning_rate": 4.990695755174273e-05, + "loss": 0.7408, + "step": 27568 + }, + { + "epoch": 0.03, + "learning_rate": 4.990695078121842e-05, + "loss": 0.8478, + "step": 27569 + }, + { + "epoch": 0.03, + "learning_rate": 4.990694401044822e-05, + "loss": 1.1592, + "step": 27570 + }, + { + "epoch": 0.03, + "learning_rate": 4.990693723943216e-05, + "loss": 1.0349, + "step": 27571 + }, + { + "epoch": 0.03, + "learning_rate": 4.990693046817023e-05, + "loss": 0.994, + "step": 27572 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906923696662425e-05, + "loss": 1.1201, + "step": 27573 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906916924908755e-05, + "loss": 0.9709, + "step": 27574 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906910152909216e-05, + "loss": 1.0456, + "step": 27575 + }, + { + "epoch": 0.03, + "learning_rate": 4.990690338066379e-05, + "loss": 0.9221, + "step": 27576 + }, + { + "epoch": 0.03, + "learning_rate": 4.990689660817251e-05, + "loss": 0.7766, + "step": 27577 + }, + { + "epoch": 0.03, + "learning_rate": 4.990688983543536e-05, + "loss": 0.9734, + "step": 27578 + }, + { + "epoch": 0.03, + "learning_rate": 4.990688306245233e-05, + "loss": 1.0414, + "step": 27579 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906876289223434e-05, + "loss": 0.9512, + "step": 27580 + }, + { + "epoch": 0.03, + "learning_rate": 4.990686951574867e-05, + "loss": 1.0802, + "step": 27581 + }, + { + "epoch": 0.03, + "learning_rate": 4.990686274202803e-05, + "loss": 0.8409, + "step": 27582 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906855968061526e-05, + "loss": 0.745, + "step": 27583 + }, + { + "epoch": 0.03, + "learning_rate": 4.990684919384915e-05, + "loss": 0.91, + "step": 27584 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906842419390904e-05, + "loss": 1.1638, + "step": 27585 + }, + { + "epoch": 0.03, + "learning_rate": 4.990683564468679e-05, + "loss": 1.0809, + "step": 27586 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906828869736796e-05, + "loss": 0.8113, + "step": 27587 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906822094540954e-05, + "loss": 0.9303, + "step": 27588 + }, + { + "epoch": 0.03, + "learning_rate": 4.990681531909922e-05, + "loss": 0.8237, + "step": 27589 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906808543411626e-05, + "loss": 1.0734, + "step": 27590 + }, + { + "epoch": 0.03, + "learning_rate": 4.990680176747816e-05, + "loss": 1.2248, + "step": 27591 + }, + { + "epoch": 0.03, + "learning_rate": 4.990679499129883e-05, + "loss": 0.8428, + "step": 27592 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906788214873626e-05, + "loss": 0.8084, + "step": 27593 + }, + { + "epoch": 0.03, + "learning_rate": 4.990678143820256e-05, + "loss": 1.0489, + "step": 27594 + }, + { + "epoch": 0.03, + "learning_rate": 4.990677466128562e-05, + "loss": 0.8364, + "step": 27595 + }, + { + "epoch": 0.03, + "learning_rate": 4.99067678841228e-05, + "loss": 0.9677, + "step": 27596 + }, + { + "epoch": 0.03, + "learning_rate": 4.990676110671412e-05, + "loss": 1.3534, + "step": 27597 + }, + { + "epoch": 0.03, + "learning_rate": 4.990675432905957e-05, + "loss": 1.0235, + "step": 27598 + }, + { + "epoch": 0.03, + "learning_rate": 4.990674755115916e-05, + "loss": 0.866, + "step": 27599 + }, + { + "epoch": 0.03, + "learning_rate": 4.990674077301287e-05, + "loss": 0.7143, + "step": 27600 + }, + { + "epoch": 0.03, + "learning_rate": 4.990673399462071e-05, + "loss": 1.0331, + "step": 27601 + }, + { + "epoch": 0.03, + "learning_rate": 4.990672721598268e-05, + "loss": 0.9158, + "step": 27602 + }, + { + "epoch": 0.03, + "learning_rate": 4.99067204370988e-05, + "loss": 0.9673, + "step": 27603 + }, + { + "epoch": 0.03, + "learning_rate": 4.990671365796903e-05, + "loss": 0.624, + "step": 27604 + }, + { + "epoch": 0.03, + "learning_rate": 4.99067068785934e-05, + "loss": 0.7752, + "step": 27605 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906700098971905e-05, + "loss": 0.9244, + "step": 27606 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906693319104533e-05, + "loss": 0.9467, + "step": 27607 + }, + { + "epoch": 0.03, + "learning_rate": 4.99066865389913e-05, + "loss": 1.0462, + "step": 27608 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906679758632195e-05, + "loss": 0.8384, + "step": 27609 + }, + { + "epoch": 0.03, + "learning_rate": 4.990667297802723e-05, + "loss": 0.9308, + "step": 27610 + }, + { + "epoch": 0.03, + "learning_rate": 4.990666619717638e-05, + "loss": 1.343, + "step": 27611 + }, + { + "epoch": 0.03, + "learning_rate": 4.990665941607967e-05, + "loss": 1.0103, + "step": 27612 + }, + { + "epoch": 0.03, + "learning_rate": 4.990665263473709e-05, + "loss": 1.0382, + "step": 27613 + }, + { + "epoch": 0.03, + "learning_rate": 4.990664585314865e-05, + "loss": 0.7143, + "step": 27614 + }, + { + "epoch": 0.03, + "learning_rate": 4.990663907131434e-05, + "loss": 0.678, + "step": 27615 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906632289234154e-05, + "loss": 0.8631, + "step": 27616 + }, + { + "epoch": 0.03, + "learning_rate": 4.990662550690811e-05, + "loss": 0.8231, + "step": 27617 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906618724336196e-05, + "loss": 0.7382, + "step": 27618 + }, + { + "epoch": 0.03, + "learning_rate": 4.990661194151841e-05, + "loss": 1.2207, + "step": 27619 + }, + { + "epoch": 0.03, + "learning_rate": 4.990660515845476e-05, + "loss": 1.0914, + "step": 27620 + }, + { + "epoch": 0.03, + "learning_rate": 4.990659837514524e-05, + "loss": 0.7356, + "step": 27621 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906591591589857e-05, + "loss": 1.1634, + "step": 27622 + }, + { + "epoch": 0.03, + "learning_rate": 4.99065848077886e-05, + "loss": 0.9894, + "step": 27623 + }, + { + "epoch": 0.03, + "learning_rate": 4.990657802374148e-05, + "loss": 1.0268, + "step": 27624 + }, + { + "epoch": 0.03, + "learning_rate": 4.99065712394485e-05, + "loss": 0.6853, + "step": 27625 + }, + { + "epoch": 0.03, + "learning_rate": 4.990656445490964e-05, + "loss": 1.3608, + "step": 27626 + }, + { + "epoch": 0.03, + "learning_rate": 4.990655767012492e-05, + "loss": 0.9854, + "step": 27627 + }, + { + "epoch": 0.03, + "learning_rate": 4.990655088509433e-05, + "loss": 1.0742, + "step": 27628 + }, + { + "epoch": 0.03, + "learning_rate": 4.990654409981787e-05, + "loss": 0.9559, + "step": 27629 + }, + { + "epoch": 0.03, + "learning_rate": 4.990653731429555e-05, + "loss": 0.7125, + "step": 27630 + }, + { + "epoch": 0.03, + "learning_rate": 4.990653052852736e-05, + "loss": 0.6802, + "step": 27631 + }, + { + "epoch": 0.03, + "learning_rate": 4.99065237425133e-05, + "loss": 0.9675, + "step": 27632 + }, + { + "epoch": 0.03, + "learning_rate": 4.990651695625338e-05, + "loss": 1.1337, + "step": 27633 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906510169747586e-05, + "loss": 1.0941, + "step": 27634 + }, + { + "epoch": 0.03, + "learning_rate": 4.990650338299593e-05, + "loss": 1.2268, + "step": 27635 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906496595998406e-05, + "loss": 1.1015, + "step": 27636 + }, + { + "epoch": 0.03, + "learning_rate": 4.990648980875502e-05, + "loss": 0.9686, + "step": 27637 + }, + { + "epoch": 0.03, + "learning_rate": 4.990648302126576e-05, + "loss": 0.8732, + "step": 27638 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906476233530644e-05, + "loss": 1.0634, + "step": 27639 + }, + { + "epoch": 0.03, + "learning_rate": 4.990646944554965e-05, + "loss": 1.1217, + "step": 27640 + }, + { + "epoch": 0.03, + "learning_rate": 4.990646265732279e-05, + "loss": 1.4184, + "step": 27641 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906455868850074e-05, + "loss": 1.1893, + "step": 27642 + }, + { + "epoch": 0.03, + "learning_rate": 4.990644908013149e-05, + "loss": 0.8549, + "step": 27643 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906442291167036e-05, + "loss": 0.8826, + "step": 27644 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906435501956715e-05, + "loss": 1.3192, + "step": 27645 + }, + { + "epoch": 0.03, + "learning_rate": 4.990642871250053e-05, + "loss": 0.953, + "step": 27646 + }, + { + "epoch": 0.03, + "learning_rate": 4.990642192279848e-05, + "loss": 1.0286, + "step": 27647 + }, + { + "epoch": 0.03, + "learning_rate": 4.990641513285057e-05, + "loss": 1.2223, + "step": 27648 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906408342656786e-05, + "loss": 1.1291, + "step": 27649 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906401552217136e-05, + "loss": 0.645, + "step": 27650 + }, + { + "epoch": 0.03, + "learning_rate": 4.990639476153162e-05, + "loss": 0.2411, + "step": 27651 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906387970600246e-05, + "loss": 0.2393, + "step": 27652 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906381179423e-05, + "loss": 0.7807, + "step": 27653 + }, + { + "epoch": 0.03, + "learning_rate": 4.990637438799989e-05, + "loss": 0.9829, + "step": 27654 + }, + { + "epoch": 0.03, + "learning_rate": 4.990636759633092e-05, + "loss": 0.916, + "step": 27655 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906360804416075e-05, + "loss": 1.0634, + "step": 27656 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906354012255375e-05, + "loss": 1.2363, + "step": 27657 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906347219848806e-05, + "loss": 1.0851, + "step": 27658 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906340427196374e-05, + "loss": 0.4341, + "step": 27659 + }, + { + "epoch": 0.03, + "learning_rate": 4.990633363429807e-05, + "loss": 0.506, + "step": 27660 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906326841153906e-05, + "loss": 0.8091, + "step": 27661 + }, + { + "epoch": 0.03, + "learning_rate": 4.990632004776388e-05, + "loss": 0.9059, + "step": 27662 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906313254127986e-05, + "loss": 0.987, + "step": 27663 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906306460246224e-05, + "loss": 1.2362, + "step": 27664 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906299666118606e-05, + "loss": 1.0144, + "step": 27665 + }, + { + "epoch": 0.03, + "learning_rate": 4.990629287174512e-05, + "loss": 1.1037, + "step": 27666 + }, + { + "epoch": 0.03, + "learning_rate": 4.990628607712576e-05, + "loss": 1.3354, + "step": 27667 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906279282260546e-05, + "loss": 1.3847, + "step": 27668 + }, + { + "epoch": 0.03, + "learning_rate": 4.990627248714947e-05, + "loss": 1.0799, + "step": 27669 + }, + { + "epoch": 0.03, + "learning_rate": 4.990626569179252e-05, + "loss": 1.0909, + "step": 27670 + }, + { + "epoch": 0.03, + "learning_rate": 4.990625889618972e-05, + "loss": 0.8836, + "step": 27671 + }, + { + "epoch": 0.03, + "learning_rate": 4.990625210034104e-05, + "loss": 1.0071, + "step": 27672 + }, + { + "epoch": 0.03, + "learning_rate": 4.99062453042465e-05, + "loss": 0.6878, + "step": 27673 + }, + { + "epoch": 0.03, + "learning_rate": 4.99062385079061e-05, + "loss": 0.7104, + "step": 27674 + }, + { + "epoch": 0.03, + "learning_rate": 4.990623171131984e-05, + "loss": 0.7282, + "step": 27675 + }, + { + "epoch": 0.03, + "learning_rate": 4.990622491448771e-05, + "loss": 0.9639, + "step": 27676 + }, + { + "epoch": 0.03, + "learning_rate": 4.990621811740972e-05, + "loss": 0.9342, + "step": 27677 + }, + { + "epoch": 0.03, + "learning_rate": 4.990621132008586e-05, + "loss": 0.894, + "step": 27678 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906204522516144e-05, + "loss": 1.1951, + "step": 27679 + }, + { + "epoch": 0.03, + "learning_rate": 4.990619772470056e-05, + "loss": 0.8055, + "step": 27680 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906190926639115e-05, + "loss": 0.8545, + "step": 27681 + }, + { + "epoch": 0.03, + "learning_rate": 4.99061841283318e-05, + "loss": 0.8146, + "step": 27682 + }, + { + "epoch": 0.03, + "learning_rate": 4.990617732977863e-05, + "loss": 1.1256, + "step": 27683 + }, + { + "epoch": 0.03, + "learning_rate": 4.990617053097959e-05, + "loss": 0.9315, + "step": 27684 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906163731934694e-05, + "loss": 0.9664, + "step": 27685 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906156932643936e-05, + "loss": 1.2161, + "step": 27686 + }, + { + "epoch": 0.03, + "learning_rate": 4.990615013310731e-05, + "loss": 0.9261, + "step": 27687 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906143333324816e-05, + "loss": 1.0038, + "step": 27688 + }, + { + "epoch": 0.03, + "learning_rate": 4.990613653329647e-05, + "loss": 1.0052, + "step": 27689 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906129733022244e-05, + "loss": 0.9924, + "step": 27690 + }, + { + "epoch": 0.03, + "learning_rate": 4.990612293250217e-05, + "loss": 0.7458, + "step": 27691 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906116131736226e-05, + "loss": 0.8152, + "step": 27692 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906109330724425e-05, + "loss": 0.7456, + "step": 27693 + }, + { + "epoch": 0.03, + "learning_rate": 4.990610252946676e-05, + "loss": 1.0483, + "step": 27694 + }, + { + "epoch": 0.03, + "learning_rate": 4.990609572796323e-05, + "loss": 0.9304, + "step": 27695 + }, + { + "epoch": 0.03, + "learning_rate": 4.990608892621384e-05, + "loss": 1.0806, + "step": 27696 + }, + { + "epoch": 0.03, + "learning_rate": 4.990608212421859e-05, + "loss": 0.7899, + "step": 27697 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906075321977476e-05, + "loss": 1.106, + "step": 27698 + }, + { + "epoch": 0.03, + "learning_rate": 4.990606851949049e-05, + "loss": 1.2892, + "step": 27699 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906061716757653e-05, + "loss": 1.0521, + "step": 27700 + }, + { + "epoch": 0.03, + "learning_rate": 4.990605491377895e-05, + "loss": 1.2741, + "step": 27701 + }, + { + "epoch": 0.03, + "learning_rate": 4.990604811055438e-05, + "loss": 1.3494, + "step": 27702 + }, + { + "epoch": 0.03, + "learning_rate": 4.990604130708396e-05, + "loss": 0.778, + "step": 27703 + }, + { + "epoch": 0.03, + "learning_rate": 4.990603450336767e-05, + "loss": 0.8735, + "step": 27704 + }, + { + "epoch": 0.03, + "learning_rate": 4.990602769940552e-05, + "loss": 1.032, + "step": 27705 + }, + { + "epoch": 0.03, + "learning_rate": 4.990602089519751e-05, + "loss": 0.958, + "step": 27706 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906014090743634e-05, + "loss": 0.6649, + "step": 27707 + }, + { + "epoch": 0.03, + "learning_rate": 4.99060072860439e-05, + "loss": 0.8987, + "step": 27708 + }, + { + "epoch": 0.03, + "learning_rate": 4.99060004810983e-05, + "loss": 1.0549, + "step": 27709 + }, + { + "epoch": 0.03, + "learning_rate": 4.990599367590684e-05, + "loss": 1.139, + "step": 27710 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905986870469524e-05, + "loss": 0.7844, + "step": 27711 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905980064786336e-05, + "loss": 1.0607, + "step": 27712 + }, + { + "epoch": 0.03, + "learning_rate": 4.99059732588573e-05, + "loss": 1.0729, + "step": 27713 + }, + { + "epoch": 0.03, + "learning_rate": 4.990596645268239e-05, + "loss": 0.9109, + "step": 27714 + }, + { + "epoch": 0.03, + "learning_rate": 4.990595964626163e-05, + "loss": 0.978, + "step": 27715 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905952839595003e-05, + "loss": 0.958, + "step": 27716 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905946032682514e-05, + "loss": 1.0667, + "step": 27717 + }, + { + "epoch": 0.03, + "learning_rate": 4.990593922552417e-05, + "loss": 1.0727, + "step": 27718 + }, + { + "epoch": 0.03, + "learning_rate": 4.990593241811996e-05, + "loss": 0.8503, + "step": 27719 + }, + { + "epoch": 0.03, + "learning_rate": 4.990592561046988e-05, + "loss": 0.4109, + "step": 27720 + }, + { + "epoch": 0.03, + "learning_rate": 4.990591880257396e-05, + "loss": 0.6537, + "step": 27721 + }, + { + "epoch": 0.03, + "learning_rate": 4.990591199443217e-05, + "loss": 0.8554, + "step": 27722 + }, + { + "epoch": 0.03, + "learning_rate": 4.990590518604451e-05, + "loss": 0.8576, + "step": 27723 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905898377411e-05, + "loss": 1.0251, + "step": 27724 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905891568531626e-05, + "loss": 1.0885, + "step": 27725 + }, + { + "epoch": 0.03, + "learning_rate": 4.990588475940639e-05, + "loss": 0.7234, + "step": 27726 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905877950035294e-05, + "loss": 0.6694, + "step": 27727 + }, + { + "epoch": 0.03, + "learning_rate": 4.990587114041834e-05, + "loss": 0.5955, + "step": 27728 + }, + { + "epoch": 0.03, + "learning_rate": 4.990586433055553e-05, + "loss": 0.852, + "step": 27729 + }, + { + "epoch": 0.03, + "learning_rate": 4.990585752044685e-05, + "loss": 0.9524, + "step": 27730 + }, + { + "epoch": 0.03, + "learning_rate": 4.990585071009231e-05, + "loss": 1.0313, + "step": 27731 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905843899491924e-05, + "loss": 0.9356, + "step": 27732 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905837088645664e-05, + "loss": 1.206, + "step": 27733 + }, + { + "epoch": 0.03, + "learning_rate": 4.990583027755354e-05, + "loss": 0.9267, + "step": 27734 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905823466215576e-05, + "loss": 0.6736, + "step": 27735 + }, + { + "epoch": 0.03, + "learning_rate": 4.990581665463174e-05, + "loss": 0.8159, + "step": 27736 + }, + { + "epoch": 0.03, + "learning_rate": 4.990580984280204e-05, + "loss": 0.8483, + "step": 27737 + }, + { + "epoch": 0.03, + "learning_rate": 4.990580303072649e-05, + "loss": 1.1653, + "step": 27738 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905796218405075e-05, + "loss": 1.0153, + "step": 27739 + }, + { + "epoch": 0.03, + "learning_rate": 4.99057894058378e-05, + "loss": 0.9638, + "step": 27740 + }, + { + "epoch": 0.03, + "learning_rate": 4.990578259302466e-05, + "loss": 1.0033, + "step": 27741 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905775779965675e-05, + "loss": 1.0464, + "step": 27742 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905768966660825e-05, + "loss": 0.8493, + "step": 27743 + }, + { + "epoch": 0.03, + "learning_rate": 4.990576215311011e-05, + "loss": 0.8214, + "step": 27744 + }, + { + "epoch": 0.03, + "learning_rate": 4.990575533931354e-05, + "loss": 1.1777, + "step": 27745 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905748525271114e-05, + "loss": 0.6194, + "step": 27746 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905741710982825e-05, + "loss": 0.9569, + "step": 27747 + }, + { + "epoch": 0.03, + "learning_rate": 4.990573489644867e-05, + "loss": 0.9122, + "step": 27748 + }, + { + "epoch": 0.03, + "learning_rate": 4.990572808166867e-05, + "loss": 1.0316, + "step": 27749 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905721266642804e-05, + "loss": 1.0631, + "step": 27750 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905714451371075e-05, + "loss": 0.8778, + "step": 27751 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905707635853496e-05, + "loss": 0.9357, + "step": 27752 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905700820090055e-05, + "loss": 0.816, + "step": 27753 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905694004080757e-05, + "loss": 0.967, + "step": 27754 + }, + { + "epoch": 0.03, + "learning_rate": 4.99056871878256e-05, + "loss": 0.7972, + "step": 27755 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905680371324585e-05, + "loss": 0.8806, + "step": 27756 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905673554577704e-05, + "loss": 0.5902, + "step": 27757 + }, + { + "epoch": 0.03, + "learning_rate": 4.990566673758497e-05, + "loss": 1.0673, + "step": 27758 + }, + { + "epoch": 0.03, + "learning_rate": 4.990565992034638e-05, + "loss": 1.0391, + "step": 27759 + }, + { + "epoch": 0.03, + "learning_rate": 4.990565310286193e-05, + "loss": 1.0098, + "step": 27760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905646285131623e-05, + "loss": 0.9799, + "step": 27761 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905639467155454e-05, + "loss": 0.6331, + "step": 27762 + }, + { + "epoch": 0.03, + "learning_rate": 4.990563264893343e-05, + "loss": 1.0631, + "step": 27763 + }, + { + "epoch": 0.03, + "learning_rate": 4.990562583046555e-05, + "loss": 1.0533, + "step": 27764 + }, + { + "epoch": 0.03, + "learning_rate": 4.990561901175181e-05, + "loss": 1.0353, + "step": 27765 + }, + { + "epoch": 0.03, + "learning_rate": 4.990561219279221e-05, + "loss": 0.9607, + "step": 27766 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905605373586754e-05, + "loss": 0.9504, + "step": 27767 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905598554135446e-05, + "loss": 0.7971, + "step": 27768 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905591734438275e-05, + "loss": 1.0303, + "step": 27769 + }, + { + "epoch": 0.03, + "learning_rate": 4.990558491449525e-05, + "loss": 0.9365, + "step": 27770 + }, + { + "epoch": 0.03, + "learning_rate": 4.990557809430636e-05, + "loss": 0.9375, + "step": 27771 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905571273871616e-05, + "loss": 0.5611, + "step": 27772 + }, + { + "epoch": 0.03, + "learning_rate": 4.990556445319102e-05, + "loss": 0.9402, + "step": 27773 + }, + { + "epoch": 0.03, + "learning_rate": 4.990555763226455e-05, + "loss": 1.1674, + "step": 27774 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905550811092244e-05, + "loss": 1.1372, + "step": 27775 + }, + { + "epoch": 0.03, + "learning_rate": 4.990554398967408e-05, + "loss": 0.9021, + "step": 27776 + }, + { + "epoch": 0.03, + "learning_rate": 4.990553716801004e-05, + "loss": 0.9845, + "step": 27777 + }, + { + "epoch": 0.03, + "learning_rate": 4.990553034610016e-05, + "loss": 0.9703, + "step": 27778 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905523523944415e-05, + "loss": 0.9554, + "step": 27779 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905516701542816e-05, + "loss": 1.0286, + "step": 27780 + }, + { + "epoch": 0.03, + "learning_rate": 4.990550987889536e-05, + "loss": 0.9224, + "step": 27781 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905503056002044e-05, + "loss": 0.8858, + "step": 27782 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905496232862884e-05, + "loss": 0.7031, + "step": 27783 + }, + { + "epoch": 0.03, + "learning_rate": 4.990548940947785e-05, + "loss": 0.5892, + "step": 27784 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905482585846966e-05, + "loss": 0.5881, + "step": 27785 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905475761970236e-05, + "loss": 0.9092, + "step": 27786 + }, + { + "epoch": 0.03, + "learning_rate": 4.990546893784763e-05, + "loss": 0.8128, + "step": 27787 + }, + { + "epoch": 0.03, + "learning_rate": 4.990546211347919e-05, + "loss": 0.9252, + "step": 27788 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905455288864875e-05, + "loss": 0.5737, + "step": 27789 + }, + { + "epoch": 0.03, + "learning_rate": 4.990544846400471e-05, + "loss": 0.6435, + "step": 27790 + }, + { + "epoch": 0.03, + "learning_rate": 4.990544163889869e-05, + "loss": 0.8783, + "step": 27791 + }, + { + "epoch": 0.03, + "learning_rate": 4.990543481354681e-05, + "loss": 0.8659, + "step": 27792 + }, + { + "epoch": 0.03, + "learning_rate": 4.990542798794908e-05, + "loss": 0.7595, + "step": 27793 + }, + { + "epoch": 0.03, + "learning_rate": 4.990542116210549e-05, + "loss": 0.4801, + "step": 27794 + }, + { + "epoch": 0.03, + "learning_rate": 4.990541433601605e-05, + "loss": 1.1763, + "step": 27795 + }, + { + "epoch": 0.03, + "learning_rate": 4.990540750968075e-05, + "loss": 0.7236, + "step": 27796 + }, + { + "epoch": 0.03, + "learning_rate": 4.990540068309959e-05, + "loss": 1.0081, + "step": 27797 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905393856272585e-05, + "loss": 0.8558, + "step": 27798 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905387029199715e-05, + "loss": 0.9825, + "step": 27799 + }, + { + "epoch": 0.03, + "learning_rate": 4.990538020188099e-05, + "loss": 1.1079, + "step": 27800 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905373374316414e-05, + "loss": 0.8487, + "step": 27801 + }, + { + "epoch": 0.03, + "learning_rate": 4.990536654650598e-05, + "loss": 1.0253, + "step": 27802 + }, + { + "epoch": 0.03, + "learning_rate": 4.990535971844969e-05, + "loss": 1.0818, + "step": 27803 + }, + { + "epoch": 0.03, + "learning_rate": 4.990535289014755e-05, + "loss": 0.8961, + "step": 27804 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905346061599553e-05, + "loss": 1.0148, + "step": 27805 + }, + { + "epoch": 0.03, + "learning_rate": 4.99053392328057e-05, + "loss": 1.0145, + "step": 27806 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905332403765995e-05, + "loss": 1.0668, + "step": 27807 + }, + { + "epoch": 0.03, + "learning_rate": 4.990532557448043e-05, + "loss": 1.0569, + "step": 27808 + }, + { + "epoch": 0.03, + "learning_rate": 4.990531874494901e-05, + "loss": 0.9331, + "step": 27809 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905311915171735e-05, + "loss": 0.7204, + "step": 27810 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905305085148616e-05, + "loss": 0.8553, + "step": 27811 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905298254879626e-05, + "loss": 1.2666, + "step": 27812 + }, + { + "epoch": 0.03, + "learning_rate": 4.990529142436479e-05, + "loss": 1.0179, + "step": 27813 + }, + { + "epoch": 0.03, + "learning_rate": 4.99052845936041e-05, + "loss": 1.0008, + "step": 27814 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905277762597554e-05, + "loss": 1.0917, + "step": 27815 + }, + { + "epoch": 0.03, + "learning_rate": 4.990527093134515e-05, + "loss": 1.0309, + "step": 27816 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905264099846895e-05, + "loss": 1.1319, + "step": 27817 + }, + { + "epoch": 0.03, + "learning_rate": 4.990525726810279e-05, + "loss": 0.9352, + "step": 27818 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905250436112824e-05, + "loss": 0.8021, + "step": 27819 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905243603877004e-05, + "loss": 1.0507, + "step": 27820 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905236771395334e-05, + "loss": 0.9361, + "step": 27821 + }, + { + "epoch": 0.03, + "learning_rate": 4.990522993866781e-05, + "loss": 0.9819, + "step": 27822 + }, + { + "epoch": 0.03, + "learning_rate": 4.990522310569443e-05, + "loss": 1.1695, + "step": 27823 + }, + { + "epoch": 0.03, + "learning_rate": 4.99052162724752e-05, + "loss": 1.2755, + "step": 27824 + }, + { + "epoch": 0.03, + "learning_rate": 4.990520943901011e-05, + "loss": 1.1384, + "step": 27825 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905202605299166e-05, + "loss": 1.0256, + "step": 27826 + }, + { + "epoch": 0.03, + "learning_rate": 4.990519577134237e-05, + "loss": 0.8643, + "step": 27827 + }, + { + "epoch": 0.03, + "learning_rate": 4.990518893713972e-05, + "loss": 0.5411, + "step": 27828 + }, + { + "epoch": 0.03, + "learning_rate": 4.990518210269122e-05, + "loss": 0.8263, + "step": 27829 + }, + { + "epoch": 0.03, + "learning_rate": 4.990517526799687e-05, + "loss": 0.758, + "step": 27830 + }, + { + "epoch": 0.03, + "learning_rate": 4.990516843305666e-05, + "loss": 1.0492, + "step": 27831 + }, + { + "epoch": 0.03, + "learning_rate": 4.99051615978706e-05, + "loss": 1.7941, + "step": 27832 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905154762438674e-05, + "loss": 1.3182, + "step": 27833 + }, + { + "epoch": 0.03, + "learning_rate": 4.990514792676091e-05, + "loss": 1.244, + "step": 27834 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905141090837294e-05, + "loss": 1.1169, + "step": 27835 + }, + { + "epoch": 0.03, + "learning_rate": 4.990513425466782e-05, + "loss": 1.0359, + "step": 27836 + }, + { + "epoch": 0.03, + "learning_rate": 4.990512741825249e-05, + "loss": 1.0787, + "step": 27837 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905120581591306e-05, + "loss": 1.07, + "step": 27838 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905113744684276e-05, + "loss": 0.9152, + "step": 27839 + }, + { + "epoch": 0.03, + "learning_rate": 4.990510690753138e-05, + "loss": 0.6232, + "step": 27840 + }, + { + "epoch": 0.03, + "learning_rate": 4.990510007013265e-05, + "loss": 0.8866, + "step": 27841 + }, + { + "epoch": 0.03, + "learning_rate": 4.990509323248806e-05, + "loss": 1.1758, + "step": 27842 + }, + { + "epoch": 0.03, + "learning_rate": 4.990508639459761e-05, + "loss": 1.0268, + "step": 27843 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905079556461324e-05, + "loss": 1.01, + "step": 27844 + }, + { + "epoch": 0.03, + "learning_rate": 4.990507271807917e-05, + "loss": 0.8055, + "step": 27845 + }, + { + "epoch": 0.03, + "learning_rate": 4.990506587945117e-05, + "loss": 0.9076, + "step": 27846 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905059040577315e-05, + "loss": 1.0533, + "step": 27847 + }, + { + "epoch": 0.03, + "learning_rate": 4.990505220145761e-05, + "loss": 0.8751, + "step": 27848 + }, + { + "epoch": 0.03, + "learning_rate": 4.990504536209205e-05, + "loss": 0.7527, + "step": 27849 + }, + { + "epoch": 0.03, + "learning_rate": 4.990503852248064e-05, + "loss": 0.9183, + "step": 27850 + }, + { + "epoch": 0.03, + "learning_rate": 4.990503168262337e-05, + "loss": 0.9372, + "step": 27851 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905024842520264e-05, + "loss": 1.072, + "step": 27852 + }, + { + "epoch": 0.03, + "learning_rate": 4.990501800217129e-05, + "loss": 0.9203, + "step": 27853 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905011161576475e-05, + "loss": 0.8439, + "step": 27854 + }, + { + "epoch": 0.03, + "learning_rate": 4.9905004320735803e-05, + "loss": 1.0147, + "step": 27855 + }, + { + "epoch": 0.03, + "learning_rate": 4.990499747964928e-05, + "loss": 0.9895, + "step": 27856 + }, + { + "epoch": 0.03, + "learning_rate": 4.990499063831691e-05, + "loss": 1.1987, + "step": 27857 + }, + { + "epoch": 0.03, + "learning_rate": 4.990498379673869e-05, + "loss": 0.9581, + "step": 27858 + }, + { + "epoch": 0.03, + "learning_rate": 4.990497695491461e-05, + "loss": 1.145, + "step": 27859 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904970112844686e-05, + "loss": 1.3166, + "step": 27860 + }, + { + "epoch": 0.03, + "learning_rate": 4.99049632705289e-05, + "loss": 1.2875, + "step": 27861 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904956427967276e-05, + "loss": 1.0196, + "step": 27862 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904949585159794e-05, + "loss": 1.1823, + "step": 27863 + }, + { + "epoch": 0.03, + "learning_rate": 4.990494274210646e-05, + "loss": 0.9371, + "step": 27864 + }, + { + "epoch": 0.03, + "learning_rate": 4.990493589880727e-05, + "loss": 0.9609, + "step": 27865 + }, + { + "epoch": 0.03, + "learning_rate": 4.990492905526224e-05, + "loss": 1.189, + "step": 27866 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904922211471354e-05, + "loss": 0.9733, + "step": 27867 + }, + { + "epoch": 0.03, + "learning_rate": 4.990491536743462e-05, + "loss": 0.9429, + "step": 27868 + }, + { + "epoch": 0.03, + "learning_rate": 4.990490852315203e-05, + "loss": 1.1624, + "step": 27869 + }, + { + "epoch": 0.03, + "learning_rate": 4.990490167862359e-05, + "loss": 1.4252, + "step": 27870 + }, + { + "epoch": 0.03, + "learning_rate": 4.99048948338493e-05, + "loss": 0.9018, + "step": 27871 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904887988829165e-05, + "loss": 0.8046, + "step": 27872 + }, + { + "epoch": 0.03, + "learning_rate": 4.990488114356318e-05, + "loss": 1.3373, + "step": 27873 + }, + { + "epoch": 0.03, + "learning_rate": 4.990487429805133e-05, + "loss": 0.9253, + "step": 27874 + }, + { + "epoch": 0.03, + "learning_rate": 4.990486745229364e-05, + "loss": 0.5874, + "step": 27875 + }, + { + "epoch": 0.03, + "learning_rate": 4.99048606062901e-05, + "loss": 1.0182, + "step": 27876 + }, + { + "epoch": 0.03, + "learning_rate": 4.990485376004071e-05, + "loss": 0.9424, + "step": 27877 + }, + { + "epoch": 0.03, + "learning_rate": 4.990484691354547e-05, + "loss": 1.139, + "step": 27878 + }, + { + "epoch": 0.03, + "learning_rate": 4.990484006680437e-05, + "loss": 0.9923, + "step": 27879 + }, + { + "epoch": 0.03, + "learning_rate": 4.990483321981743e-05, + "loss": 1.2163, + "step": 27880 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904826372584635e-05, + "loss": 1.0284, + "step": 27881 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904819525105996e-05, + "loss": 1.0258, + "step": 27882 + }, + { + "epoch": 0.03, + "learning_rate": 4.990481267738151e-05, + "loss": 0.7791, + "step": 27883 + }, + { + "epoch": 0.03, + "learning_rate": 4.990480582941116e-05, + "loss": 0.9971, + "step": 27884 + }, + { + "epoch": 0.03, + "learning_rate": 4.990479898119497e-05, + "loss": 1.0735, + "step": 27885 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904792132732924e-05, + "loss": 0.9736, + "step": 27886 + }, + { + "epoch": 0.03, + "learning_rate": 4.990478528402504e-05, + "loss": 1.0789, + "step": 27887 + }, + { + "epoch": 0.03, + "learning_rate": 4.99047784350713e-05, + "loss": 0.9628, + "step": 27888 + }, + { + "epoch": 0.03, + "learning_rate": 4.990477158587171e-05, + "loss": 0.6533, + "step": 27889 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904764736426265e-05, + "loss": 0.7606, + "step": 27890 + }, + { + "epoch": 0.03, + "learning_rate": 4.990475788673497e-05, + "loss": 0.5276, + "step": 27891 + }, + { + "epoch": 0.03, + "learning_rate": 4.990475103679784e-05, + "loss": 0.8716, + "step": 27892 + }, + { + "epoch": 0.03, + "learning_rate": 4.990474418661485e-05, + "loss": 0.9351, + "step": 27893 + }, + { + "epoch": 0.03, + "learning_rate": 4.990473733618601e-05, + "loss": 0.8573, + "step": 27894 + }, + { + "epoch": 0.03, + "learning_rate": 4.990473048551133e-05, + "loss": 0.8419, + "step": 27895 + }, + { + "epoch": 0.03, + "learning_rate": 4.99047236345908e-05, + "loss": 0.4738, + "step": 27896 + }, + { + "epoch": 0.03, + "learning_rate": 4.990471678342441e-05, + "loss": 1.0962, + "step": 27897 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904709932012186e-05, + "loss": 1.1059, + "step": 27898 + }, + { + "epoch": 0.03, + "learning_rate": 4.99047030803541e-05, + "loss": 1.1053, + "step": 27899 + }, + { + "epoch": 0.03, + "learning_rate": 4.990469622845017e-05, + "loss": 1.2107, + "step": 27900 + }, + { + "epoch": 0.03, + "learning_rate": 4.99046893763004e-05, + "loss": 0.9637, + "step": 27901 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904682523904766e-05, + "loss": 0.5717, + "step": 27902 + }, + { + "epoch": 0.03, + "learning_rate": 4.990467567126329e-05, + "loss": 0.6492, + "step": 27903 + }, + { + "epoch": 0.03, + "learning_rate": 4.990466881837597e-05, + "loss": 1.0332, + "step": 27904 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904661965242795e-05, + "loss": 1.0358, + "step": 27905 + }, + { + "epoch": 0.03, + "learning_rate": 4.990465511186377e-05, + "loss": 1.1549, + "step": 27906 + }, + { + "epoch": 0.03, + "learning_rate": 4.99046482582389e-05, + "loss": 1.0167, + "step": 27907 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904641404368185e-05, + "loss": 1.3105, + "step": 27908 + }, + { + "epoch": 0.03, + "learning_rate": 4.990463455025162e-05, + "loss": 1.0382, + "step": 27909 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904627695889205e-05, + "loss": 1.1233, + "step": 27910 + }, + { + "epoch": 0.03, + "learning_rate": 4.990462084128095e-05, + "loss": 0.7033, + "step": 27911 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904613986426834e-05, + "loss": 0.9632, + "step": 27912 + }, + { + "epoch": 0.03, + "learning_rate": 4.990460713132688e-05, + "loss": 0.8804, + "step": 27913 + }, + { + "epoch": 0.03, + "learning_rate": 4.990460027598107e-05, + "loss": 0.9075, + "step": 27914 + }, + { + "epoch": 0.03, + "learning_rate": 4.990459342038942e-05, + "loss": 0.6978, + "step": 27915 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904586564551916e-05, + "loss": 0.5195, + "step": 27916 + }, + { + "epoch": 0.03, + "learning_rate": 4.990457970846857e-05, + "loss": 0.9922, + "step": 27917 + }, + { + "epoch": 0.03, + "learning_rate": 4.990457285213938e-05, + "loss": 0.8882, + "step": 27918 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904565995564336e-05, + "loss": 0.9277, + "step": 27919 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904559138743446e-05, + "loss": 1.0428, + "step": 27920 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904552281676706e-05, + "loss": 1.0743, + "step": 27921 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904545424364116e-05, + "loss": 1.06, + "step": 27922 + }, + { + "epoch": 0.03, + "learning_rate": 4.990453856680568e-05, + "loss": 1.2203, + "step": 27923 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904531709001415e-05, + "loss": 1.0635, + "step": 27924 + }, + { + "epoch": 0.03, + "learning_rate": 4.990452485095128e-05, + "loss": 0.8842, + "step": 27925 + }, + { + "epoch": 0.03, + "learning_rate": 4.990451799265531e-05, + "loss": 1.009, + "step": 27926 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904511134113484e-05, + "loss": 1.065, + "step": 27927 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904504275325824e-05, + "loss": 0.8137, + "step": 27928 + }, + { + "epoch": 0.03, + "learning_rate": 4.990449741629231e-05, + "loss": 0.9956, + "step": 27929 + }, + { + "epoch": 0.03, + "learning_rate": 4.990449055701295e-05, + "loss": 1.0317, + "step": 27930 + }, + { + "epoch": 0.03, + "learning_rate": 4.990448369748774e-05, + "loss": 0.9656, + "step": 27931 + }, + { + "epoch": 0.03, + "learning_rate": 4.990447683771669e-05, + "loss": 0.9636, + "step": 27932 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904469977699785e-05, + "loss": 1.0347, + "step": 27933 + }, + { + "epoch": 0.03, + "learning_rate": 4.990446311743704e-05, + "loss": 0.9764, + "step": 27934 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904456256928447e-05, + "loss": 1.0576, + "step": 27935 + }, + { + "epoch": 0.03, + "learning_rate": 4.990444939617401e-05, + "loss": 0.9052, + "step": 27936 + }, + { + "epoch": 0.03, + "learning_rate": 4.990444253517372e-05, + "loss": 1.1236, + "step": 27937 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904435673927594e-05, + "loss": 0.8083, + "step": 27938 + }, + { + "epoch": 0.03, + "learning_rate": 4.990442881243561e-05, + "loss": 1.1179, + "step": 27939 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904421950697786e-05, + "loss": 1.2141, + "step": 27940 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904415088714115e-05, + "loss": 1.0086, + "step": 27941 + }, + { + "epoch": 0.03, + "learning_rate": 4.99044082264846e-05, + "loss": 1.0237, + "step": 27942 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904401364009243e-05, + "loss": 1.1294, + "step": 27943 + }, + { + "epoch": 0.03, + "learning_rate": 4.990439450128803e-05, + "loss": 0.9394, + "step": 27944 + }, + { + "epoch": 0.03, + "learning_rate": 4.990438763832098e-05, + "loss": 0.8005, + "step": 27945 + }, + { + "epoch": 0.03, + "learning_rate": 4.990438077510808e-05, + "loss": 1.0421, + "step": 27946 + }, + { + "epoch": 0.03, + "learning_rate": 4.990437391164934e-05, + "loss": 1.0653, + "step": 27947 + }, + { + "epoch": 0.03, + "learning_rate": 4.990436704794475e-05, + "loss": 0.9845, + "step": 27948 + }, + { + "epoch": 0.03, + "learning_rate": 4.990436018399431e-05, + "loss": 0.8786, + "step": 27949 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904353319798036e-05, + "loss": 0.9444, + "step": 27950 + }, + { + "epoch": 0.03, + "learning_rate": 4.990434645535591e-05, + "loss": 0.8474, + "step": 27951 + }, + { + "epoch": 0.03, + "learning_rate": 4.990433959066794e-05, + "loss": 0.8079, + "step": 27952 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904332725734126e-05, + "loss": 0.8527, + "step": 27953 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904325860554464e-05, + "loss": 1.0714, + "step": 27954 + }, + { + "epoch": 0.03, + "learning_rate": 4.990431899512896e-05, + "loss": 0.9427, + "step": 27955 + }, + { + "epoch": 0.03, + "learning_rate": 4.990431212945761e-05, + "loss": 1.0613, + "step": 27956 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904305263540406e-05, + "loss": 1.2071, + "step": 27957 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904298397377366e-05, + "loss": 0.8898, + "step": 27958 + }, + { + "epoch": 0.03, + "learning_rate": 4.990429153096848e-05, + "loss": 1.0221, + "step": 27959 + }, + { + "epoch": 0.03, + "learning_rate": 4.990428466431375e-05, + "loss": 0.9921, + "step": 27960 + }, + { + "epoch": 0.03, + "learning_rate": 4.990427779741318e-05, + "loss": 0.9794, + "step": 27961 + }, + { + "epoch": 0.03, + "learning_rate": 4.990427093026676e-05, + "loss": 0.8498, + "step": 27962 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904264062874494e-05, + "loss": 0.2687, + "step": 27963 + }, + { + "epoch": 0.03, + "learning_rate": 4.990425719523639e-05, + "loss": 0.6226, + "step": 27964 + }, + { + "epoch": 0.03, + "learning_rate": 4.990425032735243e-05, + "loss": 0.7986, + "step": 27965 + }, + { + "epoch": 0.03, + "learning_rate": 4.990424345922264e-05, + "loss": 0.806, + "step": 27966 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904236590847e-05, + "loss": 1.2528, + "step": 27967 + }, + { + "epoch": 0.03, + "learning_rate": 4.990422972222552e-05, + "loss": 1.0032, + "step": 27968 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904222853358185e-05, + "loss": 0.9903, + "step": 27969 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904215984245004e-05, + "loss": 1.0997, + "step": 27970 + }, + { + "epoch": 0.03, + "learning_rate": 4.990420911488599e-05, + "loss": 0.7579, + "step": 27971 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904202245281125e-05, + "loss": 0.9817, + "step": 27972 + }, + { + "epoch": 0.03, + "learning_rate": 4.990419537543043e-05, + "loss": 1.1755, + "step": 27973 + }, + { + "epoch": 0.03, + "learning_rate": 4.990418850533388e-05, + "loss": 0.7276, + "step": 27974 + }, + { + "epoch": 0.03, + "learning_rate": 4.990418163499149e-05, + "loss": 0.9457, + "step": 27975 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904174764403255e-05, + "loss": 0.6992, + "step": 27976 + }, + { + "epoch": 0.03, + "learning_rate": 4.990416789356917e-05, + "loss": 0.7609, + "step": 27977 + }, + { + "epoch": 0.03, + "learning_rate": 4.990416102248925e-05, + "loss": 1.111, + "step": 27978 + }, + { + "epoch": 0.03, + "learning_rate": 4.990415415116348e-05, + "loss": 1.0256, + "step": 27979 + }, + { + "epoch": 0.03, + "learning_rate": 4.990414727959187e-05, + "loss": 0.8097, + "step": 27980 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904140407774425e-05, + "loss": 1.0942, + "step": 27981 + }, + { + "epoch": 0.03, + "learning_rate": 4.990413353571113e-05, + "loss": 0.7206, + "step": 27982 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904126663401984e-05, + "loss": 0.8163, + "step": 27983 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904119790847e-05, + "loss": 1.203, + "step": 27984 + }, + { + "epoch": 0.03, + "learning_rate": 4.990411291804618e-05, + "loss": 0.7933, + "step": 27985 + }, + { + "epoch": 0.03, + "learning_rate": 4.990410604499951e-05, + "loss": 1.026, + "step": 27986 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904099171707e-05, + "loss": 1.2583, + "step": 27987 + }, + { + "epoch": 0.03, + "learning_rate": 4.990409229816865e-05, + "loss": 1.0807, + "step": 27988 + }, + { + "epoch": 0.03, + "learning_rate": 4.990408542438445e-05, + "loss": 0.9935, + "step": 27989 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904078550354415e-05, + "loss": 0.8608, + "step": 27990 + }, + { + "epoch": 0.03, + "learning_rate": 4.990407167607853e-05, + "loss": 1.0443, + "step": 27991 + }, + { + "epoch": 0.03, + "learning_rate": 4.990406480155681e-05, + "loss": 1.0556, + "step": 27992 + }, + { + "epoch": 0.03, + "learning_rate": 4.990405792678924e-05, + "loss": 0.7044, + "step": 27993 + }, + { + "epoch": 0.03, + "learning_rate": 4.990405105177583e-05, + "loss": 0.8708, + "step": 27994 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904044176516586e-05, + "loss": 1.0584, + "step": 27995 + }, + { + "epoch": 0.03, + "learning_rate": 4.990403730101149e-05, + "loss": 0.8515, + "step": 27996 + }, + { + "epoch": 0.03, + "learning_rate": 4.990403042526056e-05, + "loss": 1.2923, + "step": 27997 + }, + { + "epoch": 0.03, + "learning_rate": 4.990402354926378e-05, + "loss": 1.0697, + "step": 27998 + }, + { + "epoch": 0.03, + "learning_rate": 4.990401667302116e-05, + "loss": 1.2155, + "step": 27999 + }, + { + "epoch": 0.03, + "learning_rate": 4.9904009796532694e-05, + "loss": 1.2817, + "step": 28000 + }, + { + "epoch": 0.03, + "eval_loss": 1.061301350593567, + "eval_runtime": 129.3835, + "eval_samples_per_second": 10.705, + "eval_steps_per_second": 5.356, + "step": 28000 + }, + { + "epoch": 0.03, + "learning_rate": 4.99040029197984e-05, + "loss": 0.9983, + "step": 28001 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903996042818245e-05, + "loss": 1.0991, + "step": 28002 + }, + { + "epoch": 0.03, + "learning_rate": 4.990398916559226e-05, + "loss": 1.1118, + "step": 28003 + }, + { + "epoch": 0.03, + "learning_rate": 4.990398228812043e-05, + "loss": 1.0434, + "step": 28004 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903975410402764e-05, + "loss": 1.0248, + "step": 28005 + }, + { + "epoch": 0.03, + "learning_rate": 4.990396853243925e-05, + "loss": 1.0889, + "step": 28006 + }, + { + "epoch": 0.03, + "learning_rate": 4.99039616542299e-05, + "loss": 1.048, + "step": 28007 + }, + { + "epoch": 0.03, + "learning_rate": 4.990395477577471e-05, + "loss": 0.8462, + "step": 28008 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903947897073665e-05, + "loss": 1.0136, + "step": 28009 + }, + { + "epoch": 0.03, + "learning_rate": 4.990394101812679e-05, + "loss": 0.7795, + "step": 28010 + }, + { + "epoch": 0.03, + "learning_rate": 4.990393413893407e-05, + "loss": 0.6053, + "step": 28011 + }, + { + "epoch": 0.03, + "learning_rate": 4.990392725949551e-05, + "loss": 1.0684, + "step": 28012 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903920379811115e-05, + "loss": 0.8894, + "step": 28013 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903913499880875e-05, + "loss": 1.1518, + "step": 28014 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903906619704786e-05, + "loss": 0.9953, + "step": 28015 + }, + { + "epoch": 0.03, + "learning_rate": 4.990389973928287e-05, + "loss": 0.831, + "step": 28016 + }, + { + "epoch": 0.03, + "learning_rate": 4.99038928586151e-05, + "loss": 0.8108, + "step": 28017 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903885977701495e-05, + "loss": 0.9599, + "step": 28018 + }, + { + "epoch": 0.03, + "learning_rate": 4.990387909654205e-05, + "loss": 0.8391, + "step": 28019 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903872215136765e-05, + "loss": 0.8052, + "step": 28020 + }, + { + "epoch": 0.03, + "learning_rate": 4.990386533348564e-05, + "loss": 0.716, + "step": 28021 + }, + { + "epoch": 0.03, + "learning_rate": 4.990385845158867e-05, + "loss": 1.1299, + "step": 28022 + }, + { + "epoch": 0.03, + "learning_rate": 4.990385156944586e-05, + "loss": 1.4516, + "step": 28023 + }, + { + "epoch": 0.03, + "learning_rate": 4.990384468705721e-05, + "loss": 1.4178, + "step": 28024 + }, + { + "epoch": 0.03, + "learning_rate": 4.990383780442273e-05, + "loss": 1.4028, + "step": 28025 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903830921542394e-05, + "loss": 1.387, + "step": 28026 + }, + { + "epoch": 0.03, + "learning_rate": 4.990382403841623e-05, + "loss": 1.3419, + "step": 28027 + }, + { + "epoch": 0.03, + "learning_rate": 4.990381715504422e-05, + "loss": 1.3017, + "step": 28028 + }, + { + "epoch": 0.03, + "learning_rate": 4.990381027142637e-05, + "loss": 1.3854, + "step": 28029 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903803387562686e-05, + "loss": 1.3358, + "step": 28030 + }, + { + "epoch": 0.03, + "learning_rate": 4.990379650345315e-05, + "loss": 1.398, + "step": 28031 + }, + { + "epoch": 0.03, + "learning_rate": 4.990378961909778e-05, + "loss": 1.3703, + "step": 28032 + }, + { + "epoch": 0.03, + "learning_rate": 4.990378273449658e-05, + "loss": 1.4383, + "step": 28033 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903775849649533e-05, + "loss": 1.4452, + "step": 28034 + }, + { + "epoch": 0.03, + "learning_rate": 4.990376896455664e-05, + "loss": 1.5407, + "step": 28035 + }, + { + "epoch": 0.03, + "learning_rate": 4.990376207921792e-05, + "loss": 1.4526, + "step": 28036 + }, + { + "epoch": 0.03, + "learning_rate": 4.990375519363335e-05, + "loss": 1.3148, + "step": 28037 + }, + { + "epoch": 0.03, + "learning_rate": 4.990374830780294e-05, + "loss": 1.3843, + "step": 28038 + }, + { + "epoch": 0.03, + "learning_rate": 4.99037414217267e-05, + "loss": 1.3411, + "step": 28039 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903734535404614e-05, + "loss": 1.3977, + "step": 28040 + }, + { + "epoch": 0.03, + "learning_rate": 4.990372764883669e-05, + "loss": 1.4128, + "step": 28041 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903720762022934e-05, + "loss": 1.395, + "step": 28042 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903713874963334e-05, + "loss": 1.3484, + "step": 28043 + }, + { + "epoch": 0.03, + "learning_rate": 4.99037069876579e-05, + "loss": 1.3158, + "step": 28044 + }, + { + "epoch": 0.03, + "learning_rate": 4.990370010010661e-05, + "loss": 1.1074, + "step": 28045 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903693212309495e-05, + "loss": 1.2379, + "step": 28046 + }, + { + "epoch": 0.03, + "learning_rate": 4.990368632426654e-05, + "loss": 0.7073, + "step": 28047 + }, + { + "epoch": 0.03, + "learning_rate": 4.990367943597774e-05, + "loss": 1.0106, + "step": 28048 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903672547443104e-05, + "loss": 1.0081, + "step": 28049 + }, + { + "epoch": 0.03, + "learning_rate": 4.990366565866264e-05, + "loss": 1.245, + "step": 28050 + }, + { + "epoch": 0.03, + "learning_rate": 4.990365876963633e-05, + "loss": 1.0308, + "step": 28051 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903651880364175e-05, + "loss": 1.2028, + "step": 28052 + }, + { + "epoch": 0.03, + "learning_rate": 4.990364499084619e-05, + "loss": 0.8116, + "step": 28053 + }, + { + "epoch": 0.03, + "learning_rate": 4.990363810108236e-05, + "loss": 1.0331, + "step": 28054 + }, + { + "epoch": 0.03, + "learning_rate": 4.99036312110727e-05, + "loss": 0.9464, + "step": 28055 + }, + { + "epoch": 0.03, + "learning_rate": 4.990362432081719e-05, + "loss": 1.2963, + "step": 28056 + }, + { + "epoch": 0.03, + "learning_rate": 4.990361743031585e-05, + "loss": 0.7556, + "step": 28057 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903610539568676e-05, + "loss": 1.1131, + "step": 28058 + }, + { + "epoch": 0.03, + "learning_rate": 4.990360364857566e-05, + "loss": 0.8096, + "step": 28059 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903596757336803e-05, + "loss": 1.0185, + "step": 28060 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903589865852106e-05, + "loss": 0.8822, + "step": 28061 + }, + { + "epoch": 0.03, + "learning_rate": 4.990358297412158e-05, + "loss": 0.8732, + "step": 28062 + }, + { + "epoch": 0.03, + "learning_rate": 4.990357608214521e-05, + "loss": 1.0675, + "step": 28063 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903569189923004e-05, + "loss": 1.0767, + "step": 28064 + }, + { + "epoch": 0.03, + "learning_rate": 4.990356229745496e-05, + "loss": 0.9652, + "step": 28065 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903555404741085e-05, + "loss": 0.8328, + "step": 28066 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903548511781365e-05, + "loss": 1.3185, + "step": 28067 + }, + { + "epoch": 0.03, + "learning_rate": 4.990354161857581e-05, + "loss": 1.6722, + "step": 28068 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903534725124415e-05, + "loss": 1.5298, + "step": 28069 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903527831427187e-05, + "loss": 1.1478, + "step": 28070 + }, + { + "epoch": 0.03, + "learning_rate": 4.990352093748412e-05, + "loss": 1.0807, + "step": 28071 + }, + { + "epoch": 0.03, + "learning_rate": 4.990351404329522e-05, + "loss": 1.1685, + "step": 28072 + }, + { + "epoch": 0.03, + "learning_rate": 4.990350714886047e-05, + "loss": 0.2383, + "step": 28073 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903500254179904e-05, + "loss": 0.1214, + "step": 28074 + }, + { + "epoch": 0.03, + "learning_rate": 4.990349335925348e-05, + "loss": 0.0569, + "step": 28075 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903486464081236e-05, + "loss": 0.1609, + "step": 28076 + }, + { + "epoch": 0.03, + "learning_rate": 4.990347956866315e-05, + "loss": 0.0924, + "step": 28077 + }, + { + "epoch": 0.03, + "learning_rate": 4.990347267299922e-05, + "loss": 0.2206, + "step": 28078 + }, + { + "epoch": 0.03, + "learning_rate": 4.990346577708946e-05, + "loss": 0.0928, + "step": 28079 + }, + { + "epoch": 0.03, + "learning_rate": 4.990345888093386e-05, + "loss": 0.0923, + "step": 28080 + }, + { + "epoch": 0.03, + "learning_rate": 4.990345198453242e-05, + "loss": 0.128, + "step": 28081 + }, + { + "epoch": 0.03, + "learning_rate": 4.990344508788516e-05, + "loss": 0.0848, + "step": 28082 + }, + { + "epoch": 0.03, + "learning_rate": 4.990343819099205e-05, + "loss": 0.1401, + "step": 28083 + }, + { + "epoch": 0.03, + "learning_rate": 4.990343129385311e-05, + "loss": 0.0911, + "step": 28084 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903424396468326e-05, + "loss": 0.0883, + "step": 28085 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903417498837713e-05, + "loss": 0.0866, + "step": 28086 + }, + { + "epoch": 0.03, + "learning_rate": 4.990341060096126e-05, + "loss": 0.0672, + "step": 28087 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903403702838967e-05, + "loss": 0.0651, + "step": 28088 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903396804470846e-05, + "loss": 0.1197, + "step": 28089 + }, + { + "epoch": 0.03, + "learning_rate": 4.990338990585689e-05, + "loss": 0.0862, + "step": 28090 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903383006997096e-05, + "loss": 0.0857, + "step": 28091 + }, + { + "epoch": 0.03, + "learning_rate": 4.990337610789147e-05, + "loss": 0.1085, + "step": 28092 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903369208539995e-05, + "loss": 0.0891, + "step": 28093 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903362308942694e-05, + "loss": 0.1708, + "step": 28094 + }, + { + "epoch": 0.03, + "learning_rate": 4.990335540909956e-05, + "loss": 0.0792, + "step": 28095 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903348509010584e-05, + "loss": 0.1123, + "step": 28096 + }, + { + "epoch": 0.03, + "learning_rate": 4.990334160867578e-05, + "loss": 0.0985, + "step": 28097 + }, + { + "epoch": 0.03, + "learning_rate": 4.990333470809513e-05, + "loss": 0.0868, + "step": 28098 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903327807268654e-05, + "loss": 0.1019, + "step": 28099 + }, + { + "epoch": 0.03, + "learning_rate": 4.990332090619634e-05, + "loss": 0.1015, + "step": 28100 + }, + { + "epoch": 0.03, + "learning_rate": 4.990331400487819e-05, + "loss": 0.4918, + "step": 28101 + }, + { + "epoch": 0.03, + "learning_rate": 4.990330710331421e-05, + "loss": 0.1164, + "step": 28102 + }, + { + "epoch": 0.03, + "learning_rate": 4.990330020150439e-05, + "loss": 0.3043, + "step": 28103 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903293299448733e-05, + "loss": 0.5307, + "step": 28104 + }, + { + "epoch": 0.03, + "learning_rate": 4.990328639714724e-05, + "loss": 0.2775, + "step": 28105 + }, + { + "epoch": 0.03, + "learning_rate": 4.990327949459992e-05, + "loss": 0.5126, + "step": 28106 + }, + { + "epoch": 0.03, + "learning_rate": 4.990327259180676e-05, + "loss": 0.2787, + "step": 28107 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903265688767766e-05, + "loss": 0.0764, + "step": 28108 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903258785482945e-05, + "loss": 0.1381, + "step": 28109 + }, + { + "epoch": 0.03, + "learning_rate": 4.990325188195229e-05, + "loss": 0.1171, + "step": 28110 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903244978175786e-05, + "loss": 0.243, + "step": 28111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903238074153456e-05, + "loss": 0.2703, + "step": 28112 + }, + { + "epoch": 0.03, + "learning_rate": 4.990323116988529e-05, + "loss": 0.3748, + "step": 28113 + }, + { + "epoch": 0.03, + "learning_rate": 4.990322426537129e-05, + "loss": 0.8999, + "step": 28114 + }, + { + "epoch": 0.03, + "learning_rate": 4.990321736061146e-05, + "loss": 0.7932, + "step": 28115 + }, + { + "epoch": 0.03, + "learning_rate": 4.990321045560579e-05, + "loss": 1.0352, + "step": 28116 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903203550354286e-05, + "loss": 1.2696, + "step": 28117 + }, + { + "epoch": 0.03, + "learning_rate": 4.990319664485695e-05, + "loss": 1.2741, + "step": 28118 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903189739113784e-05, + "loss": 2.2469, + "step": 28119 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903182833124786e-05, + "loss": 1.7212, + "step": 28120 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903175926889945e-05, + "loss": 1.2671, + "step": 28121 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903169020409275e-05, + "loss": 0.8349, + "step": 28122 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903162113682775e-05, + "loss": 0.9352, + "step": 28123 + }, + { + "epoch": 0.03, + "learning_rate": 4.990315520671043e-05, + "loss": 0.2389, + "step": 28124 + }, + { + "epoch": 0.03, + "learning_rate": 4.990314829949226e-05, + "loss": 0.1606, + "step": 28125 + }, + { + "epoch": 0.03, + "learning_rate": 4.990314139202826e-05, + "loss": 0.2806, + "step": 28126 + }, + { + "epoch": 0.03, + "learning_rate": 4.990313448431842e-05, + "loss": 0.9006, + "step": 28127 + }, + { + "epoch": 0.03, + "learning_rate": 4.990312757636275e-05, + "loss": 0.1338, + "step": 28128 + }, + { + "epoch": 0.03, + "learning_rate": 4.990312066816125e-05, + "loss": 0.0675, + "step": 28129 + }, + { + "epoch": 0.03, + "learning_rate": 4.990311375971391e-05, + "loss": 0.0577, + "step": 28130 + }, + { + "epoch": 0.03, + "learning_rate": 4.990310685102074e-05, + "loss": 0.0589, + "step": 28131 + }, + { + "epoch": 0.03, + "learning_rate": 4.990309994208173e-05, + "loss": 0.0446, + "step": 28132 + }, + { + "epoch": 0.03, + "learning_rate": 4.99030930328969e-05, + "loss": 0.0436, + "step": 28133 + }, + { + "epoch": 0.03, + "learning_rate": 4.990308612346623e-05, + "loss": 0.0522, + "step": 28134 + }, + { + "epoch": 0.03, + "learning_rate": 4.990307921378973e-05, + "loss": 0.8274, + "step": 28135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903072303867395e-05, + "loss": 0.9632, + "step": 28136 + }, + { + "epoch": 0.03, + "learning_rate": 4.990306539369923e-05, + "loss": 0.9154, + "step": 28137 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903058483285224e-05, + "loss": 0.966, + "step": 28138 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903051572625395e-05, + "loss": 0.7533, + "step": 28139 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903044661719736e-05, + "loss": 0.9982, + "step": 28140 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903037750568235e-05, + "loss": 0.9029, + "step": 28141 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903030839170905e-05, + "loss": 0.9644, + "step": 28142 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903023927527745e-05, + "loss": 1.1409, + "step": 28143 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903017015638755e-05, + "loss": 1.1985, + "step": 28144 + }, + { + "epoch": 0.03, + "learning_rate": 4.990301010350393e-05, + "loss": 1.0451, + "step": 28145 + }, + { + "epoch": 0.03, + "learning_rate": 4.990300319112327e-05, + "loss": 1.6279, + "step": 28146 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902996278496785e-05, + "loss": 1.2381, + "step": 28147 + }, + { + "epoch": 0.03, + "learning_rate": 4.990298936562446e-05, + "loss": 1.0972, + "step": 28148 + }, + { + "epoch": 0.03, + "learning_rate": 4.990298245250631e-05, + "loss": 1.0395, + "step": 28149 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902975539142324e-05, + "loss": 0.7105, + "step": 28150 + }, + { + "epoch": 0.03, + "learning_rate": 4.99029686255325e-05, + "loss": 1.5591, + "step": 28151 + }, + { + "epoch": 0.03, + "learning_rate": 4.990296171167685e-05, + "loss": 1.1703, + "step": 28152 + }, + { + "epoch": 0.03, + "learning_rate": 4.990295479757537e-05, + "loss": 1.2056, + "step": 28153 + }, + { + "epoch": 0.03, + "learning_rate": 4.990294788322806e-05, + "loss": 0.8667, + "step": 28154 + }, + { + "epoch": 0.03, + "learning_rate": 4.990294096863492e-05, + "loss": 0.8883, + "step": 28155 + }, + { + "epoch": 0.03, + "learning_rate": 4.990293405379595e-05, + "loss": 0.9588, + "step": 28156 + }, + { + "epoch": 0.03, + "learning_rate": 4.990292713871114e-05, + "loss": 1.1358, + "step": 28157 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902920223380506e-05, + "loss": 1.3921, + "step": 28158 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902913307804036e-05, + "loss": 1.0609, + "step": 28159 + }, + { + "epoch": 0.03, + "learning_rate": 4.990290639198174e-05, + "loss": 1.0441, + "step": 28160 + }, + { + "epoch": 0.03, + "learning_rate": 4.990289947591361e-05, + "loss": 1.1335, + "step": 28161 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902892559599646e-05, + "loss": 1.1218, + "step": 28162 + }, + { + "epoch": 0.03, + "learning_rate": 4.990288564303985e-05, + "loss": 0.7981, + "step": 28163 + }, + { + "epoch": 0.03, + "learning_rate": 4.990287872623424e-05, + "loss": 1.0358, + "step": 28164 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902871809182786e-05, + "loss": 1.1926, + "step": 28165 + }, + { + "epoch": 0.03, + "learning_rate": 4.99028648918855e-05, + "loss": 1.3177, + "step": 28166 + }, + { + "epoch": 0.03, + "learning_rate": 4.990285797434239e-05, + "loss": 0.8957, + "step": 28167 + }, + { + "epoch": 0.03, + "learning_rate": 4.990285105655344e-05, + "loss": 1.0052, + "step": 28168 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902844138518665e-05, + "loss": 1.0644, + "step": 28169 + }, + { + "epoch": 0.03, + "learning_rate": 4.990283722023806e-05, + "loss": 0.6659, + "step": 28170 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902830301711626e-05, + "loss": 0.9906, + "step": 28171 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902823382939356e-05, + "loss": 1.0605, + "step": 28172 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902816463921256e-05, + "loss": 0.9513, + "step": 28173 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902809544657334e-05, + "loss": 0.7361, + "step": 28174 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902802625147576e-05, + "loss": 0.9466, + "step": 28175 + }, + { + "epoch": 0.03, + "learning_rate": 4.990279570539199e-05, + "loss": 0.8939, + "step": 28176 + }, + { + "epoch": 0.03, + "learning_rate": 4.990278878539058e-05, + "loss": 1.091, + "step": 28177 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902781865143326e-05, + "loss": 1.2918, + "step": 28178 + }, + { + "epoch": 0.03, + "learning_rate": 4.990277494465026e-05, + "loss": 0.8869, + "step": 28179 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902768023911346e-05, + "loss": 0.4539, + "step": 28180 + }, + { + "epoch": 0.03, + "learning_rate": 4.990276110292661e-05, + "loss": 0.4692, + "step": 28181 + }, + { + "epoch": 0.03, + "learning_rate": 4.990275418169604e-05, + "loss": 0.4196, + "step": 28182 + }, + { + "epoch": 0.03, + "learning_rate": 4.990274726021965e-05, + "loss": 0.4764, + "step": 28183 + }, + { + "epoch": 0.03, + "learning_rate": 4.990274033849743e-05, + "loss": 0.3788, + "step": 28184 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902733416529376e-05, + "loss": 0.3883, + "step": 28185 + }, + { + "epoch": 0.03, + "learning_rate": 4.990272649431549e-05, + "loss": 0.4327, + "step": 28186 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902719571855786e-05, + "loss": 0.3982, + "step": 28187 + }, + { + "epoch": 0.03, + "learning_rate": 4.990271264915024e-05, + "loss": 0.4688, + "step": 28188 + }, + { + "epoch": 0.03, + "learning_rate": 4.990270572619887e-05, + "loss": 0.5443, + "step": 28189 + }, + { + "epoch": 0.03, + "learning_rate": 4.990269880300167e-05, + "loss": 0.3551, + "step": 28190 + }, + { + "epoch": 0.03, + "learning_rate": 4.990269187955864e-05, + "loss": 0.4007, + "step": 28191 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902684955869785e-05, + "loss": 0.4227, + "step": 28192 + }, + { + "epoch": 0.03, + "learning_rate": 4.99026780319351e-05, + "loss": 0.3538, + "step": 28193 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902671107754585e-05, + "loss": 0.3862, + "step": 28194 + }, + { + "epoch": 0.03, + "learning_rate": 4.990266418332824e-05, + "loss": 0.3946, + "step": 28195 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902657258656074e-05, + "loss": 0.3744, + "step": 28196 + }, + { + "epoch": 0.03, + "learning_rate": 4.990265033373807e-05, + "loss": 0.4054, + "step": 28197 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902643408574246e-05, + "loss": 0.3262, + "step": 28198 + }, + { + "epoch": 0.03, + "learning_rate": 4.990263648316459e-05, + "loss": 0.3724, + "step": 28199 + }, + { + "epoch": 0.03, + "learning_rate": 4.99026295575091e-05, + "loss": 0.8889, + "step": 28200 + }, + { + "epoch": 0.03, + "learning_rate": 4.990262263160779e-05, + "loss": 1.1527, + "step": 28201 + }, + { + "epoch": 0.03, + "learning_rate": 4.990261570546064e-05, + "loss": 1.1593, + "step": 28202 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902608779067675e-05, + "loss": 0.6983, + "step": 28203 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902601852428874e-05, + "loss": 1.044, + "step": 28204 + }, + { + "epoch": 0.03, + "learning_rate": 4.990259492554425e-05, + "loss": 1.0238, + "step": 28205 + }, + { + "epoch": 0.03, + "learning_rate": 4.99025879984138e-05, + "loss": 0.8343, + "step": 28206 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902581071037516e-05, + "loss": 0.8743, + "step": 28207 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902574143415405e-05, + "loss": 0.7489, + "step": 28208 + }, + { + "epoch": 0.03, + "learning_rate": 4.990256721554747e-05, + "loss": 0.9361, + "step": 28209 + }, + { + "epoch": 0.03, + "learning_rate": 4.99025602874337e-05, + "loss": 0.8569, + "step": 28210 + }, + { + "epoch": 0.03, + "learning_rate": 4.990255335907411e-05, + "loss": 0.7816, + "step": 28211 + }, + { + "epoch": 0.03, + "learning_rate": 4.990254643046869e-05, + "loss": 1.4015, + "step": 28212 + }, + { + "epoch": 0.03, + "learning_rate": 4.990253950161744e-05, + "loss": 0.7967, + "step": 28213 + }, + { + "epoch": 0.03, + "learning_rate": 4.990253257252036e-05, + "loss": 0.9797, + "step": 28214 + }, + { + "epoch": 0.03, + "learning_rate": 4.990252564317747e-05, + "loss": 1.334, + "step": 28215 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902518713588735e-05, + "loss": 0.9531, + "step": 28216 + }, + { + "epoch": 0.03, + "learning_rate": 4.990251178375418e-05, + "loss": 0.9718, + "step": 28217 + }, + { + "epoch": 0.03, + "learning_rate": 4.990250485367379e-05, + "loss": 0.8477, + "step": 28218 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902497923347584e-05, + "loss": 0.9986, + "step": 28219 + }, + { + "epoch": 0.03, + "learning_rate": 4.990249099277555e-05, + "loss": 1.2111, + "step": 28220 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902484061957683e-05, + "loss": 1.1321, + "step": 28221 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902477130893996e-05, + "loss": 1.0763, + "step": 28222 + }, + { + "epoch": 0.03, + "learning_rate": 4.990247019958447e-05, + "loss": 1.3244, + "step": 28223 + }, + { + "epoch": 0.03, + "learning_rate": 4.990246326802913e-05, + "loss": 1.1802, + "step": 28224 + }, + { + "epoch": 0.03, + "learning_rate": 4.990245633622795e-05, + "loss": 0.9435, + "step": 28225 + }, + { + "epoch": 0.03, + "learning_rate": 4.990244940418096e-05, + "loss": 0.8416, + "step": 28226 + }, + { + "epoch": 0.03, + "learning_rate": 4.990244247188813e-05, + "loss": 0.9066, + "step": 28227 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902435539349484e-05, + "loss": 0.9976, + "step": 28228 + }, + { + "epoch": 0.03, + "learning_rate": 4.990242860656501e-05, + "loss": 0.9405, + "step": 28229 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902421673534703e-05, + "loss": 1.0618, + "step": 28230 + }, + { + "epoch": 0.03, + "learning_rate": 4.990241474025857e-05, + "loss": 1.2564, + "step": 28231 + }, + { + "epoch": 0.03, + "learning_rate": 4.990240780673662e-05, + "loss": 0.4284, + "step": 28232 + }, + { + "epoch": 0.03, + "learning_rate": 4.990240087296884e-05, + "loss": 0.3401, + "step": 28233 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902393938955225e-05, + "loss": 0.1685, + "step": 28234 + }, + { + "epoch": 0.03, + "learning_rate": 4.990238700469579e-05, + "loss": 0.1363, + "step": 28235 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902380070190534e-05, + "loss": 0.1509, + "step": 28236 + }, + { + "epoch": 0.03, + "learning_rate": 4.990237313543945e-05, + "loss": 0.0958, + "step": 28237 + }, + { + "epoch": 0.03, + "learning_rate": 4.990236620044253e-05, + "loss": 0.2739, + "step": 28238 + }, + { + "epoch": 0.03, + "learning_rate": 4.99023592651998e-05, + "loss": 0.2117, + "step": 28239 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902352329711234e-05, + "loss": 0.1476, + "step": 28240 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902345393976845e-05, + "loss": 0.0874, + "step": 28241 + }, + { + "epoch": 0.03, + "learning_rate": 4.990233845799663e-05, + "loss": 0.0692, + "step": 28242 + }, + { + "epoch": 0.03, + "learning_rate": 4.99023315217706e-05, + "loss": 0.0705, + "step": 28243 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902324585298734e-05, + "loss": 0.0752, + "step": 28244 + }, + { + "epoch": 0.03, + "learning_rate": 4.990231764858104e-05, + "loss": 0.101, + "step": 28245 + }, + { + "epoch": 0.03, + "learning_rate": 4.990231071161753e-05, + "loss": 0.2341, + "step": 28246 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902303774408186e-05, + "loss": 0.8638, + "step": 28247 + }, + { + "epoch": 0.03, + "learning_rate": 4.990229683695302e-05, + "loss": 0.9512, + "step": 28248 + }, + { + "epoch": 0.03, + "learning_rate": 4.990228989925203e-05, + "loss": 1.1633, + "step": 28249 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902282961305216e-05, + "loss": 1.0772, + "step": 28250 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902276023112574e-05, + "loss": 1.0707, + "step": 28251 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902269084674116e-05, + "loss": 1.0195, + "step": 28252 + }, + { + "epoch": 0.03, + "learning_rate": 4.990226214598982e-05, + "loss": 0.8853, + "step": 28253 + }, + { + "epoch": 0.03, + "learning_rate": 4.990225520705971e-05, + "loss": 1.1606, + "step": 28254 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902248267883774e-05, + "loss": 0.9902, + "step": 28255 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902241328462006e-05, + "loss": 0.8227, + "step": 28256 + }, + { + "epoch": 0.03, + "learning_rate": 4.990223438879442e-05, + "loss": 1.228, + "step": 28257 + }, + { + "epoch": 0.03, + "learning_rate": 4.990222744888101e-05, + "loss": 1.0418, + "step": 28258 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902220508721774e-05, + "loss": 1.0187, + "step": 28259 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902213568316716e-05, + "loss": 1.033, + "step": 28260 + }, + { + "epoch": 0.03, + "learning_rate": 4.990220662766583e-05, + "loss": 1.202, + "step": 28261 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902199686769125e-05, + "loss": 1.0442, + "step": 28262 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902192745626586e-05, + "loss": 1.0568, + "step": 28263 + }, + { + "epoch": 0.03, + "learning_rate": 4.990218580423823e-05, + "loss": 0.8532, + "step": 28264 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902178862604053e-05, + "loss": 1.0368, + "step": 28265 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902171920724053e-05, + "loss": 1.0165, + "step": 28266 + }, + { + "epoch": 0.03, + "learning_rate": 4.990216497859822e-05, + "loss": 0.973, + "step": 28267 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902158036226566e-05, + "loss": 0.8017, + "step": 28268 + }, + { + "epoch": 0.03, + "learning_rate": 4.99021510936091e-05, + "loss": 1.253, + "step": 28269 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902144150745794e-05, + "loss": 1.5565, + "step": 28270 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902137207636675e-05, + "loss": 1.0083, + "step": 28271 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902130264281727e-05, + "loss": 1.1158, + "step": 28272 + }, + { + "epoch": 0.03, + "learning_rate": 4.990212332068096e-05, + "loss": 0.8845, + "step": 28273 + }, + { + "epoch": 0.03, + "learning_rate": 4.990211637683436e-05, + "loss": 0.8166, + "step": 28274 + }, + { + "epoch": 0.03, + "learning_rate": 4.990210943274195e-05, + "loss": 0.8357, + "step": 28275 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902102488403714e-05, + "loss": 0.9141, + "step": 28276 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902095543819646e-05, + "loss": 0.9203, + "step": 28277 + }, + { + "epoch": 0.03, + "learning_rate": 4.990208859898977e-05, + "loss": 0.802, + "step": 28278 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902081653914056e-05, + "loss": 1.1592, + "step": 28279 + }, + { + "epoch": 0.03, + "learning_rate": 4.990207470859253e-05, + "loss": 1.0773, + "step": 28280 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902067763025176e-05, + "loss": 1.2081, + "step": 28281 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902060817211995e-05, + "loss": 1.504, + "step": 28282 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902053871153e-05, + "loss": 0.9278, + "step": 28283 + }, + { + "epoch": 0.03, + "learning_rate": 4.990204692484818e-05, + "loss": 1.0537, + "step": 28284 + }, + { + "epoch": 0.03, + "learning_rate": 4.990203997829753e-05, + "loss": 0.8877, + "step": 28285 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902033031501075e-05, + "loss": 1.2112, + "step": 28286 + }, + { + "epoch": 0.03, + "learning_rate": 4.990202608445878e-05, + "loss": 1.1075, + "step": 28287 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902019137170667e-05, + "loss": 1.0489, + "step": 28288 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902012189636735e-05, + "loss": 0.9458, + "step": 28289 + }, + { + "epoch": 0.03, + "learning_rate": 4.990200524185698e-05, + "loss": 0.8962, + "step": 28290 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901998293831405e-05, + "loss": 0.9982, + "step": 28291 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901991345560006e-05, + "loss": 0.8113, + "step": 28292 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901984397042785e-05, + "loss": 0.8342, + "step": 28293 + }, + { + "epoch": 0.03, + "learning_rate": 4.990197744827974e-05, + "loss": 0.6742, + "step": 28294 + }, + { + "epoch": 0.03, + "learning_rate": 4.990197049927088e-05, + "loss": 0.883, + "step": 28295 + }, + { + "epoch": 0.03, + "learning_rate": 4.990196355001619e-05, + "loss": 0.9611, + "step": 28296 + }, + { + "epoch": 0.03, + "learning_rate": 4.990195660051568e-05, + "loss": 1.0629, + "step": 28297 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901949650769353e-05, + "loss": 0.8641, + "step": 28298 + }, + { + "epoch": 0.03, + "learning_rate": 4.99019427007772e-05, + "loss": 0.8821, + "step": 28299 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901935750539225e-05, + "loss": 1.0841, + "step": 28300 + }, + { + "epoch": 0.03, + "learning_rate": 4.990192880005543e-05, + "loss": 0.6032, + "step": 28301 + }, + { + "epoch": 0.03, + "learning_rate": 4.990192184932582e-05, + "loss": 0.9826, + "step": 28302 + }, + { + "epoch": 0.03, + "learning_rate": 4.990191489835038e-05, + "loss": 0.7673, + "step": 28303 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901907947129124e-05, + "loss": 0.8564, + "step": 28304 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901900995662046e-05, + "loss": 1.152, + "step": 28305 + }, + { + "epoch": 0.03, + "learning_rate": 4.990189404394914e-05, + "loss": 0.9959, + "step": 28306 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901887091990416e-05, + "loss": 0.5881, + "step": 28307 + }, + { + "epoch": 0.03, + "learning_rate": 4.990188013978587e-05, + "loss": 0.8403, + "step": 28308 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901873187335516e-05, + "loss": 0.3552, + "step": 28309 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901866234639325e-05, + "loss": 0.6944, + "step": 28310 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901859281697326e-05, + "loss": 1.0386, + "step": 28311 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901852328509504e-05, + "loss": 0.6666, + "step": 28312 + }, + { + "epoch": 0.03, + "learning_rate": 4.990184537507585e-05, + "loss": 0.2924, + "step": 28313 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901838421396385e-05, + "loss": 0.7351, + "step": 28314 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901831467471096e-05, + "loss": 0.6323, + "step": 28315 + }, + { + "epoch": 0.03, + "learning_rate": 4.990182451329999e-05, + "loss": 0.7889, + "step": 28316 + }, + { + "epoch": 0.03, + "learning_rate": 4.990181755888306e-05, + "loss": 1.1487, + "step": 28317 + }, + { + "epoch": 0.03, + "learning_rate": 4.990181060422031e-05, + "loss": 0.9646, + "step": 28318 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901803649311746e-05, + "loss": 0.687, + "step": 28319 + }, + { + "epoch": 0.03, + "learning_rate": 4.990179669415736e-05, + "loss": 1.1109, + "step": 28320 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901789738757146e-05, + "loss": 1.1146, + "step": 28321 + }, + { + "epoch": 0.03, + "learning_rate": 4.990178278311112e-05, + "loss": 1.0354, + "step": 28322 + }, + { + "epoch": 0.03, + "learning_rate": 4.990177582721927e-05, + "loss": 0.9813, + "step": 28323 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901768871081605e-05, + "loss": 1.0834, + "step": 28324 + }, + { + "epoch": 0.03, + "learning_rate": 4.990176191469812e-05, + "loss": 0.9412, + "step": 28325 + }, + { + "epoch": 0.03, + "learning_rate": 4.990175495806881e-05, + "loss": 0.9777, + "step": 28326 + }, + { + "epoch": 0.03, + "learning_rate": 4.990174800119368e-05, + "loss": 0.7876, + "step": 28327 + }, + { + "epoch": 0.03, + "learning_rate": 4.990174104407274e-05, + "loss": 0.813, + "step": 28328 + }, + { + "epoch": 0.03, + "learning_rate": 4.990173408670596e-05, + "loss": 1.0251, + "step": 28329 + }, + { + "epoch": 0.03, + "learning_rate": 4.990172712909338e-05, + "loss": 1.1366, + "step": 28330 + }, + { + "epoch": 0.03, + "learning_rate": 4.990172017123498e-05, + "loss": 0.963, + "step": 28331 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901713213130755e-05, + "loss": 0.8269, + "step": 28332 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901706254780714e-05, + "loss": 1.1094, + "step": 28333 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901699296184844e-05, + "loss": 0.8338, + "step": 28334 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901692337343165e-05, + "loss": 1.0127, + "step": 28335 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901685378255664e-05, + "loss": 1.3828, + "step": 28336 + }, + { + "epoch": 0.03, + "learning_rate": 4.990167841892235e-05, + "loss": 1.3905, + "step": 28337 + }, + { + "epoch": 0.03, + "learning_rate": 4.99016714593432e-05, + "loss": 1.0525, + "step": 28338 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901664499518245e-05, + "loss": 1.2133, + "step": 28339 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901657539447474e-05, + "loss": 0.7084, + "step": 28340 + }, + { + "epoch": 0.03, + "learning_rate": 4.990165057913087e-05, + "loss": 1.1137, + "step": 28341 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901643618568464e-05, + "loss": 1.0293, + "step": 28342 + }, + { + "epoch": 0.03, + "learning_rate": 4.990163665776023e-05, + "loss": 1.179, + "step": 28343 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901629696706184e-05, + "loss": 0.8268, + "step": 28344 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901622735406314e-05, + "loss": 0.7043, + "step": 28345 + }, + { + "epoch": 0.03, + "learning_rate": 4.990161577386062e-05, + "loss": 0.381, + "step": 28346 + }, + { + "epoch": 0.03, + "learning_rate": 4.990160881206912e-05, + "loss": 0.967, + "step": 28347 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901601850031796e-05, + "loss": 1.2824, + "step": 28348 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901594887748656e-05, + "loss": 0.7894, + "step": 28349 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901587925219693e-05, + "loss": 0.7901, + "step": 28350 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901580962444915e-05, + "loss": 1.2485, + "step": 28351 + }, + { + "epoch": 0.03, + "learning_rate": 4.990157399942432e-05, + "loss": 1.0291, + "step": 28352 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901567036157905e-05, + "loss": 0.8999, + "step": 28353 + }, + { + "epoch": 0.03, + "learning_rate": 4.990156007264567e-05, + "loss": 0.7966, + "step": 28354 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901553108887625e-05, + "loss": 0.8517, + "step": 28355 + }, + { + "epoch": 0.03, + "learning_rate": 4.990154614488376e-05, + "loss": 1.205, + "step": 28356 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901539180634075e-05, + "loss": 0.9917, + "step": 28357 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901532216138574e-05, + "loss": 1.1575, + "step": 28358 + }, + { + "epoch": 0.03, + "learning_rate": 4.990152525139725e-05, + "loss": 0.9967, + "step": 28359 + }, + { + "epoch": 0.03, + "learning_rate": 4.990151828641012e-05, + "loss": 1.0684, + "step": 28360 + }, + { + "epoch": 0.03, + "learning_rate": 4.990151132117716e-05, + "loss": 1.2854, + "step": 28361 + }, + { + "epoch": 0.03, + "learning_rate": 4.990150435569839e-05, + "loss": 0.8797, + "step": 28362 + }, + { + "epoch": 0.03, + "learning_rate": 4.990149738997381e-05, + "loss": 0.9127, + "step": 28363 + }, + { + "epoch": 0.03, + "learning_rate": 4.99014904240034e-05, + "loss": 0.9272, + "step": 28364 + }, + { + "epoch": 0.03, + "learning_rate": 4.990148345778718e-05, + "loss": 1.0818, + "step": 28365 + }, + { + "epoch": 0.03, + "learning_rate": 4.990147649132514e-05, + "loss": 1.237, + "step": 28366 + }, + { + "epoch": 0.03, + "learning_rate": 4.990146952461728e-05, + "loss": 0.8212, + "step": 28367 + }, + { + "epoch": 0.03, + "learning_rate": 4.990146255766361e-05, + "loss": 0.8524, + "step": 28368 + }, + { + "epoch": 0.03, + "learning_rate": 4.990145559046412e-05, + "loss": 1.082, + "step": 28369 + }, + { + "epoch": 0.03, + "learning_rate": 4.990144862301881e-05, + "loss": 1.1686, + "step": 28370 + }, + { + "epoch": 0.03, + "learning_rate": 4.990144165532769e-05, + "loss": 0.9144, + "step": 28371 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901434687390746e-05, + "loss": 0.884, + "step": 28372 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901427719207995e-05, + "loss": 0.9381, + "step": 28373 + }, + { + "epoch": 0.03, + "learning_rate": 4.990142075077942e-05, + "loss": 0.9497, + "step": 28374 + }, + { + "epoch": 0.03, + "learning_rate": 4.990141378210503e-05, + "loss": 1.1184, + "step": 28375 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901406813184826e-05, + "loss": 1.0486, + "step": 28376 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901399844018804e-05, + "loss": 0.9403, + "step": 28377 + }, + { + "epoch": 0.03, + "learning_rate": 4.990139287460697e-05, + "loss": 1.0929, + "step": 28378 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901385904949315e-05, + "loss": 0.9653, + "step": 28379 + }, + { + "epoch": 0.03, + "learning_rate": 4.990137893504585e-05, + "loss": 1.2322, + "step": 28380 + }, + { + "epoch": 0.03, + "learning_rate": 4.990137196489656e-05, + "loss": 1.1323, + "step": 28381 + }, + { + "epoch": 0.03, + "learning_rate": 4.990136499450146e-05, + "loss": 0.849, + "step": 28382 + }, + { + "epoch": 0.03, + "learning_rate": 4.990135802386055e-05, + "loss": 0.9672, + "step": 28383 + }, + { + "epoch": 0.03, + "learning_rate": 4.990135105297381e-05, + "loss": 0.8402, + "step": 28384 + }, + { + "epoch": 0.03, + "learning_rate": 4.990134408184126e-05, + "loss": 0.9123, + "step": 28385 + }, + { + "epoch": 0.03, + "learning_rate": 4.99013371104629e-05, + "loss": 1.0671, + "step": 28386 + }, + { + "epoch": 0.03, + "learning_rate": 4.990133013883872e-05, + "loss": 0.6706, + "step": 28387 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901323166968726e-05, + "loss": 1.1225, + "step": 28388 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901316194852916e-05, + "loss": 1.2613, + "step": 28389 + }, + { + "epoch": 0.03, + "learning_rate": 4.990130922249129e-05, + "loss": 1.1993, + "step": 28390 + }, + { + "epoch": 0.03, + "learning_rate": 4.990130224988385e-05, + "loss": 1.0665, + "step": 28391 + }, + { + "epoch": 0.03, + "learning_rate": 4.990129527703059e-05, + "loss": 0.9423, + "step": 28392 + }, + { + "epoch": 0.03, + "learning_rate": 4.990128830393152e-05, + "loss": 1.1464, + "step": 28393 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901281330586636e-05, + "loss": 0.6218, + "step": 28394 + }, + { + "epoch": 0.03, + "learning_rate": 4.990127435699593e-05, + "loss": 1.0324, + "step": 28395 + }, + { + "epoch": 0.03, + "learning_rate": 4.990126738315942e-05, + "loss": 1.0962, + "step": 28396 + }, + { + "epoch": 0.03, + "learning_rate": 4.990126040907709e-05, + "loss": 0.9578, + "step": 28397 + }, + { + "epoch": 0.03, + "learning_rate": 4.990125343474894e-05, + "loss": 0.9774, + "step": 28398 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901246460174984e-05, + "loss": 1.1583, + "step": 28399 + }, + { + "epoch": 0.03, + "learning_rate": 4.990123948535521e-05, + "loss": 1.0595, + "step": 28400 + }, + { + "epoch": 0.03, + "learning_rate": 4.990123251028962e-05, + "loss": 1.0672, + "step": 28401 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901225534978214e-05, + "loss": 1.0685, + "step": 28402 + }, + { + "epoch": 0.03, + "learning_rate": 4.990121855942099e-05, + "loss": 0.9695, + "step": 28403 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901211583617965e-05, + "loss": 1.1479, + "step": 28404 + }, + { + "epoch": 0.03, + "learning_rate": 4.990120460756912e-05, + "loss": 1.0288, + "step": 28405 + }, + { + "epoch": 0.03, + "learning_rate": 4.990119763127445e-05, + "loss": 1.0141, + "step": 28406 + }, + { + "epoch": 0.03, + "learning_rate": 4.990119065473398e-05, + "loss": 1.2768, + "step": 28407 + }, + { + "epoch": 0.03, + "learning_rate": 4.990118367794769e-05, + "loss": 1.0206, + "step": 28408 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901176700915587e-05, + "loss": 0.8942, + "step": 28409 + }, + { + "epoch": 0.03, + "learning_rate": 4.990116972363767e-05, + "loss": 1.1095, + "step": 28410 + }, + { + "epoch": 0.03, + "learning_rate": 4.990116274611394e-05, + "loss": 1.043, + "step": 28411 + }, + { + "epoch": 0.03, + "learning_rate": 4.99011557683444e-05, + "loss": 1.0484, + "step": 28412 + }, + { + "epoch": 0.03, + "learning_rate": 4.990114879032903e-05, + "loss": 0.9186, + "step": 28413 + }, + { + "epoch": 0.03, + "learning_rate": 4.990114181206786e-05, + "loss": 0.8102, + "step": 28414 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901134833560883e-05, + "loss": 0.9608, + "step": 28415 + }, + { + "epoch": 0.03, + "learning_rate": 4.990112785480808e-05, + "loss": 0.8926, + "step": 28416 + }, + { + "epoch": 0.03, + "learning_rate": 4.990112087580947e-05, + "loss": 1.1341, + "step": 28417 + }, + { + "epoch": 0.03, + "learning_rate": 4.990111389656505e-05, + "loss": 1.053, + "step": 28418 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901106917074805e-05, + "loss": 0.9904, + "step": 28419 + }, + { + "epoch": 0.03, + "learning_rate": 4.990109993733875e-05, + "loss": 0.5957, + "step": 28420 + }, + { + "epoch": 0.03, + "learning_rate": 4.990109295735689e-05, + "loss": 1.2327, + "step": 28421 + }, + { + "epoch": 0.03, + "learning_rate": 4.99010859771292e-05, + "loss": 1.0083, + "step": 28422 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901078996655716e-05, + "loss": 0.8457, + "step": 28423 + }, + { + "epoch": 0.03, + "learning_rate": 4.990107201593641e-05, + "loss": 0.9625, + "step": 28424 + }, + { + "epoch": 0.03, + "learning_rate": 4.990106503497129e-05, + "loss": 0.704, + "step": 28425 + }, + { + "epoch": 0.03, + "learning_rate": 4.990105805376036e-05, + "loss": 0.668, + "step": 28426 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901051072303615e-05, + "loss": 0.7635, + "step": 28427 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901044090601066e-05, + "loss": 1.2434, + "step": 28428 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901037108652694e-05, + "loss": 1.0575, + "step": 28429 + }, + { + "epoch": 0.03, + "learning_rate": 4.990103012645851e-05, + "loss": 0.8439, + "step": 28430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901023144018523e-05, + "loss": 1.1202, + "step": 28431 + }, + { + "epoch": 0.03, + "learning_rate": 4.990101616133271e-05, + "loss": 1.0784, + "step": 28432 + }, + { + "epoch": 0.03, + "learning_rate": 4.990100917840109e-05, + "loss": 1.2409, + "step": 28433 + }, + { + "epoch": 0.03, + "learning_rate": 4.990100219522367e-05, + "loss": 0.9861, + "step": 28434 + }, + { + "epoch": 0.03, + "learning_rate": 4.990099521180042e-05, + "loss": 0.6059, + "step": 28435 + }, + { + "epoch": 0.03, + "learning_rate": 4.990098822813136e-05, + "loss": 0.7963, + "step": 28436 + }, + { + "epoch": 0.03, + "learning_rate": 4.99009812442165e-05, + "loss": 0.9145, + "step": 28437 + }, + { + "epoch": 0.03, + "learning_rate": 4.990097426005582e-05, + "loss": 1.3814, + "step": 28438 + }, + { + "epoch": 0.03, + "learning_rate": 4.990096727564933e-05, + "loss": 0.9499, + "step": 28439 + }, + { + "epoch": 0.03, + "learning_rate": 4.990096029099702e-05, + "loss": 0.7156, + "step": 28440 + }, + { + "epoch": 0.03, + "learning_rate": 4.990095330609891e-05, + "loss": 0.9307, + "step": 28441 + }, + { + "epoch": 0.03, + "learning_rate": 4.990094632095499e-05, + "loss": 1.083, + "step": 28442 + }, + { + "epoch": 0.03, + "learning_rate": 4.990093933556524e-05, + "loss": 0.6087, + "step": 28443 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900932349929696e-05, + "loss": 0.931, + "step": 28444 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900925364048334e-05, + "loss": 0.992, + "step": 28445 + }, + { + "epoch": 0.03, + "learning_rate": 4.990091837792117e-05, + "loss": 1.1452, + "step": 28446 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900911391548175e-05, + "loss": 0.8142, + "step": 28447 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900904404929385e-05, + "loss": 0.9016, + "step": 28448 + }, + { + "epoch": 0.03, + "learning_rate": 4.990089741806478e-05, + "loss": 0.913, + "step": 28449 + }, + { + "epoch": 0.03, + "learning_rate": 4.990089043095436e-05, + "loss": 0.6357, + "step": 28450 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900883443598136e-05, + "loss": 0.8098, + "step": 28451 + }, + { + "epoch": 0.03, + "learning_rate": 4.990087645599609e-05, + "loss": 1.1466, + "step": 28452 + }, + { + "epoch": 0.03, + "learning_rate": 4.990086946814824e-05, + "loss": 0.9825, + "step": 28453 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900862480054586e-05, + "loss": 0.997, + "step": 28454 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900855491715107e-05, + "loss": 0.8618, + "step": 28455 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900848503129825e-05, + "loss": 1.2629, + "step": 28456 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900841514298735e-05, + "loss": 0.8893, + "step": 28457 + }, + { + "epoch": 0.03, + "learning_rate": 4.990083452522183e-05, + "loss": 0.3115, + "step": 28458 + }, + { + "epoch": 0.03, + "learning_rate": 4.990082753589911e-05, + "loss": 1.0696, + "step": 28459 + }, + { + "epoch": 0.03, + "learning_rate": 4.990082054633058e-05, + "loss": 1.4425, + "step": 28460 + }, + { + "epoch": 0.03, + "learning_rate": 4.990081355651625e-05, + "loss": 0.4861, + "step": 28461 + }, + { + "epoch": 0.03, + "learning_rate": 4.990080656645611e-05, + "loss": 0.2454, + "step": 28462 + }, + { + "epoch": 0.03, + "learning_rate": 4.990079957615015e-05, + "loss": 0.6129, + "step": 28463 + }, + { + "epoch": 0.03, + "learning_rate": 4.990079258559838e-05, + "loss": 0.6831, + "step": 28464 + }, + { + "epoch": 0.03, + "learning_rate": 4.99007855948008e-05, + "loss": 1.0326, + "step": 28465 + }, + { + "epoch": 0.03, + "learning_rate": 4.990077860375742e-05, + "loss": 1.0479, + "step": 28466 + }, + { + "epoch": 0.03, + "learning_rate": 4.990077161246822e-05, + "loss": 1.1652, + "step": 28467 + }, + { + "epoch": 0.03, + "learning_rate": 4.990076462093321e-05, + "loss": 0.8019, + "step": 28468 + }, + { + "epoch": 0.03, + "learning_rate": 4.99007576291524e-05, + "loss": 0.8394, + "step": 28469 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900750637125774e-05, + "loss": 0.3869, + "step": 28470 + }, + { + "epoch": 0.03, + "learning_rate": 4.990074364485334e-05, + "loss": 1.0521, + "step": 28471 + }, + { + "epoch": 0.03, + "learning_rate": 4.990073665233509e-05, + "loss": 0.6032, + "step": 28472 + }, + { + "epoch": 0.03, + "learning_rate": 4.990072965957103e-05, + "loss": 1.002, + "step": 28473 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900722666561175e-05, + "loss": 1.3056, + "step": 28474 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900715673305496e-05, + "loss": 1.0339, + "step": 28475 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900708679804015e-05, + "loss": 1.0378, + "step": 28476 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900701686056725e-05, + "loss": 0.8763, + "step": 28477 + }, + { + "epoch": 0.03, + "learning_rate": 4.990069469206362e-05, + "loss": 0.3617, + "step": 28478 + }, + { + "epoch": 0.03, + "learning_rate": 4.990068769782471e-05, + "loss": 0.7455, + "step": 28479 + }, + { + "epoch": 0.03, + "learning_rate": 4.990068070333999e-05, + "loss": 0.8597, + "step": 28480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900673708609455e-05, + "loss": 0.6916, + "step": 28481 + }, + { + "epoch": 0.03, + "learning_rate": 4.990066671363312e-05, + "loss": 0.9329, + "step": 28482 + }, + { + "epoch": 0.03, + "learning_rate": 4.990065971841097e-05, + "loss": 1.4452, + "step": 28483 + }, + { + "epoch": 0.03, + "learning_rate": 4.990065272294302e-05, + "loss": 1.1547, + "step": 28484 + }, + { + "epoch": 0.03, + "learning_rate": 4.990064572722925e-05, + "loss": 1.1403, + "step": 28485 + }, + { + "epoch": 0.03, + "learning_rate": 4.990063873126968e-05, + "loss": 0.9198, + "step": 28486 + }, + { + "epoch": 0.03, + "learning_rate": 4.99006317350643e-05, + "loss": 1.2678, + "step": 28487 + }, + { + "epoch": 0.03, + "learning_rate": 4.99006247386131e-05, + "loss": 0.9227, + "step": 28488 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900617741916105e-05, + "loss": 1.1111, + "step": 28489 + }, + { + "epoch": 0.03, + "learning_rate": 4.99006107449733e-05, + "loss": 0.7573, + "step": 28490 + }, + { + "epoch": 0.03, + "learning_rate": 4.990060374778468e-05, + "loss": 1.0136, + "step": 28491 + }, + { + "epoch": 0.03, + "learning_rate": 4.990059675035026e-05, + "loss": 0.8462, + "step": 28492 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900589752670026e-05, + "loss": 1.2104, + "step": 28493 + }, + { + "epoch": 0.03, + "learning_rate": 4.990058275474398e-05, + "loss": 1.0044, + "step": 28494 + }, + { + "epoch": 0.03, + "learning_rate": 4.990057575657214e-05, + "loss": 0.2718, + "step": 28495 + }, + { + "epoch": 0.03, + "learning_rate": 4.990056875815448e-05, + "loss": 0.5345, + "step": 28496 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900561759491016e-05, + "loss": 0.8885, + "step": 28497 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900554760581745e-05, + "loss": 0.9176, + "step": 28498 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900547761426664e-05, + "loss": 0.7901, + "step": 28499 + }, + { + "epoch": 0.03, + "learning_rate": 4.990054076202577e-05, + "loss": 0.8011, + "step": 28500 + }, + { + "epoch": 0.03, + "eval_loss": 1.059694766998291, + "eval_runtime": 126.0487, + "eval_samples_per_second": 10.988, + "eval_steps_per_second": 5.498, + "step": 28500 + }, + { + "epoch": 0.03, + "learning_rate": 4.990053376237908e-05, + "loss": 0.9402, + "step": 28501 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900526762486576e-05, + "loss": 0.8917, + "step": 28502 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900519762348267e-05, + "loss": 0.9597, + "step": 28503 + }, + { + "epoch": 0.03, + "learning_rate": 4.990051276196415e-05, + "loss": 0.7764, + "step": 28504 + }, + { + "epoch": 0.03, + "learning_rate": 4.990050576133423e-05, + "loss": 0.9763, + "step": 28505 + }, + { + "epoch": 0.03, + "learning_rate": 4.990049876045849e-05, + "loss": 0.9869, + "step": 28506 + }, + { + "epoch": 0.03, + "learning_rate": 4.990049175933695e-05, + "loss": 1.2697, + "step": 28507 + }, + { + "epoch": 0.03, + "learning_rate": 4.99004847579696e-05, + "loss": 0.7036, + "step": 28508 + }, + { + "epoch": 0.03, + "learning_rate": 4.990047775635644e-05, + "loss": 1.0836, + "step": 28509 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900470754497483e-05, + "loss": 0.8644, + "step": 28510 + }, + { + "epoch": 0.03, + "learning_rate": 4.990046375239272e-05, + "loss": 1.0702, + "step": 28511 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900456750042146e-05, + "loss": 0.9568, + "step": 28512 + }, + { + "epoch": 0.03, + "learning_rate": 4.990044974744577e-05, + "loss": 0.8329, + "step": 28513 + }, + { + "epoch": 0.03, + "learning_rate": 4.990044274460357e-05, + "loss": 1.0985, + "step": 28514 + }, + { + "epoch": 0.03, + "learning_rate": 4.990043574151558e-05, + "loss": 1.0644, + "step": 28515 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900428738181784e-05, + "loss": 0.9356, + "step": 28516 + }, + { + "epoch": 0.03, + "learning_rate": 4.990042173460216e-05, + "loss": 0.974, + "step": 28517 + }, + { + "epoch": 0.03, + "learning_rate": 4.990041473077676e-05, + "loss": 1.0704, + "step": 28518 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900407726705534e-05, + "loss": 0.9438, + "step": 28519 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900400722388506e-05, + "loss": 0.822, + "step": 28520 + }, + { + "epoch": 0.03, + "learning_rate": 4.990039371782567e-05, + "loss": 0.8225, + "step": 28521 + }, + { + "epoch": 0.03, + "learning_rate": 4.990038671301703e-05, + "loss": 0.7054, + "step": 28522 + }, + { + "epoch": 0.03, + "learning_rate": 4.990037970796259e-05, + "loss": 0.8314, + "step": 28523 + }, + { + "epoch": 0.03, + "learning_rate": 4.990037270266233e-05, + "loss": 0.7516, + "step": 28524 + }, + { + "epoch": 0.03, + "learning_rate": 4.990036569711627e-05, + "loss": 0.9527, + "step": 28525 + }, + { + "epoch": 0.03, + "learning_rate": 4.990035869132441e-05, + "loss": 1.0401, + "step": 28526 + }, + { + "epoch": 0.03, + "learning_rate": 4.990035168528674e-05, + "loss": 0.9183, + "step": 28527 + }, + { + "epoch": 0.03, + "learning_rate": 4.990034467900326e-05, + "loss": 0.9367, + "step": 28528 + }, + { + "epoch": 0.03, + "learning_rate": 4.990033767247399e-05, + "loss": 1.0987, + "step": 28529 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900330665698897e-05, + "loss": 1.0158, + "step": 28530 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900323658678004e-05, + "loss": 1.0933, + "step": 28531 + }, + { + "epoch": 0.03, + "learning_rate": 4.99003166514113e-05, + "loss": 1.021, + "step": 28532 + }, + { + "epoch": 0.03, + "learning_rate": 4.99003096438988e-05, + "loss": 1.1417, + "step": 28533 + }, + { + "epoch": 0.03, + "learning_rate": 4.990030263614049e-05, + "loss": 1.0498, + "step": 28534 + }, + { + "epoch": 0.03, + "learning_rate": 4.990029562813638e-05, + "loss": 0.9506, + "step": 28535 + }, + { + "epoch": 0.03, + "learning_rate": 4.990028861988646e-05, + "loss": 1.035, + "step": 28536 + }, + { + "epoch": 0.03, + "learning_rate": 4.990028161139073e-05, + "loss": 1.0413, + "step": 28537 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900274602649206e-05, + "loss": 0.7374, + "step": 28538 + }, + { + "epoch": 0.03, + "learning_rate": 4.990026759366187e-05, + "loss": 0.7415, + "step": 28539 + }, + { + "epoch": 0.03, + "learning_rate": 4.990026058442872e-05, + "loss": 0.987, + "step": 28540 + }, + { + "epoch": 0.03, + "learning_rate": 4.990025357494978e-05, + "loss": 1.2085, + "step": 28541 + }, + { + "epoch": 0.03, + "learning_rate": 4.990024656522504e-05, + "loss": 1.0309, + "step": 28542 + }, + { + "epoch": 0.03, + "learning_rate": 4.990023955525448e-05, + "loss": 0.5912, + "step": 28543 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900232545038123e-05, + "loss": 0.8013, + "step": 28544 + }, + { + "epoch": 0.03, + "learning_rate": 4.990022553457596e-05, + "loss": 0.8033, + "step": 28545 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900218523867994e-05, + "loss": 0.6054, + "step": 28546 + }, + { + "epoch": 0.03, + "learning_rate": 4.990021151291422e-05, + "loss": 0.362, + "step": 28547 + }, + { + "epoch": 0.03, + "learning_rate": 4.990020450171464e-05, + "loss": 0.5153, + "step": 28548 + }, + { + "epoch": 0.03, + "learning_rate": 4.990019749026926e-05, + "loss": 0.1314, + "step": 28549 + }, + { + "epoch": 0.03, + "learning_rate": 4.990019047857808e-05, + "loss": 0.2516, + "step": 28550 + }, + { + "epoch": 0.03, + "learning_rate": 4.990018346664108e-05, + "loss": 0.4292, + "step": 28551 + }, + { + "epoch": 0.03, + "learning_rate": 4.990017645445829e-05, + "loss": 0.248, + "step": 28552 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900169442029696e-05, + "loss": 0.0982, + "step": 28553 + }, + { + "epoch": 0.03, + "learning_rate": 4.990016242935529e-05, + "loss": 0.1042, + "step": 28554 + }, + { + "epoch": 0.03, + "learning_rate": 4.990015541643509e-05, + "loss": 0.0644, + "step": 28555 + }, + { + "epoch": 0.03, + "learning_rate": 4.990014840326908e-05, + "loss": 0.3811, + "step": 28556 + }, + { + "epoch": 0.03, + "learning_rate": 4.990014138985726e-05, + "loss": 0.4002, + "step": 28557 + }, + { + "epoch": 0.03, + "learning_rate": 4.990013437619965e-05, + "loss": 0.287, + "step": 28558 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900127362296226e-05, + "loss": 0.1507, + "step": 28559 + }, + { + "epoch": 0.03, + "learning_rate": 4.990012034814701e-05, + "loss": 0.2797, + "step": 28560 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900113333751977e-05, + "loss": 0.0635, + "step": 28561 + }, + { + "epoch": 0.03, + "learning_rate": 4.990010631911114e-05, + "loss": 0.2525, + "step": 28562 + }, + { + "epoch": 0.03, + "learning_rate": 4.990009930422451e-05, + "loss": 0.2745, + "step": 28563 + }, + { + "epoch": 0.03, + "learning_rate": 4.990009228909207e-05, + "loss": 0.2307, + "step": 28564 + }, + { + "epoch": 0.03, + "learning_rate": 4.990008527371383e-05, + "loss": 0.1444, + "step": 28565 + }, + { + "epoch": 0.03, + "learning_rate": 4.990007825808979e-05, + "loss": 0.0858, + "step": 28566 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900071242219934e-05, + "loss": 0.0412, + "step": 28567 + }, + { + "epoch": 0.03, + "learning_rate": 4.990006422610429e-05, + "loss": 0.7176, + "step": 28568 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900057209742836e-05, + "loss": 0.6349, + "step": 28569 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900050193135584e-05, + "loss": 0.5989, + "step": 28570 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900043176282516e-05, + "loss": 0.6219, + "step": 28571 + }, + { + "epoch": 0.03, + "learning_rate": 4.990003615918366e-05, + "loss": 0.7263, + "step": 28572 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900029141838993e-05, + "loss": 1.0618, + "step": 28573 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900022124248526e-05, + "loss": 1.1109, + "step": 28574 + }, + { + "epoch": 0.03, + "learning_rate": 4.9900015106412256e-05, + "loss": 1.3556, + "step": 28575 + }, + { + "epoch": 0.03, + "learning_rate": 4.990000808833018e-05, + "loss": 1.0419, + "step": 28576 + }, + { + "epoch": 0.03, + "learning_rate": 4.990000107000231e-05, + "loss": 0.9642, + "step": 28577 + }, + { + "epoch": 0.03, + "learning_rate": 4.989999405142863e-05, + "loss": 1.1836, + "step": 28578 + }, + { + "epoch": 0.03, + "learning_rate": 4.989998703260916e-05, + "loss": 1.105, + "step": 28579 + }, + { + "epoch": 0.03, + "learning_rate": 4.989998001354388e-05, + "loss": 0.6727, + "step": 28580 + }, + { + "epoch": 0.03, + "learning_rate": 4.989997299423279e-05, + "loss": 0.8764, + "step": 28581 + }, + { + "epoch": 0.03, + "learning_rate": 4.989996597467591e-05, + "loss": 1.5111, + "step": 28582 + }, + { + "epoch": 0.03, + "learning_rate": 4.989995895487322e-05, + "loss": 0.8916, + "step": 28583 + }, + { + "epoch": 0.03, + "learning_rate": 4.989995193482473e-05, + "loss": 0.7059, + "step": 28584 + }, + { + "epoch": 0.03, + "learning_rate": 4.989994491453044e-05, + "loss": 0.8515, + "step": 28585 + }, + { + "epoch": 0.03, + "learning_rate": 4.989993789399035e-05, + "loss": 1.1137, + "step": 28586 + }, + { + "epoch": 0.03, + "learning_rate": 4.989993087320445e-05, + "loss": 0.9509, + "step": 28587 + }, + { + "epoch": 0.03, + "learning_rate": 4.989992385217276e-05, + "loss": 0.9518, + "step": 28588 + }, + { + "epoch": 0.03, + "learning_rate": 4.989991683089526e-05, + "loss": 0.5354, + "step": 28589 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899909809371956e-05, + "loss": 1.2061, + "step": 28590 + }, + { + "epoch": 0.03, + "learning_rate": 4.989990278760286e-05, + "loss": 1.2859, + "step": 28591 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899895765587964e-05, + "loss": 1.371, + "step": 28592 + }, + { + "epoch": 0.03, + "learning_rate": 4.989988874332726e-05, + "loss": 0.9958, + "step": 28593 + }, + { + "epoch": 0.03, + "learning_rate": 4.989988172082075e-05, + "loss": 0.5965, + "step": 28594 + }, + { + "epoch": 0.03, + "learning_rate": 4.989987469806845e-05, + "loss": 1.0089, + "step": 28595 + }, + { + "epoch": 0.03, + "learning_rate": 4.989986767507035e-05, + "loss": 0.9946, + "step": 28596 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899860651826436e-05, + "loss": 1.074, + "step": 28597 + }, + { + "epoch": 0.03, + "learning_rate": 4.989985362833672e-05, + "loss": 1.4328, + "step": 28598 + }, + { + "epoch": 0.03, + "learning_rate": 4.989984660460122e-05, + "loss": 1.0511, + "step": 28599 + }, + { + "epoch": 0.03, + "learning_rate": 4.989983958061991e-05, + "loss": 0.8966, + "step": 28600 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899832556392796e-05, + "loss": 1.0801, + "step": 28601 + }, + { + "epoch": 0.03, + "learning_rate": 4.989982553191989e-05, + "loss": 0.8664, + "step": 28602 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899818507201176e-05, + "loss": 0.3566, + "step": 28603 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899811482236656e-05, + "loss": 0.4538, + "step": 28604 + }, + { + "epoch": 0.03, + "learning_rate": 4.989980445702635e-05, + "loss": 1.1877, + "step": 28605 + }, + { + "epoch": 0.03, + "learning_rate": 4.989979743157023e-05, + "loss": 1.3525, + "step": 28606 + }, + { + "epoch": 0.03, + "learning_rate": 4.989979040586832e-05, + "loss": 0.6419, + "step": 28607 + }, + { + "epoch": 0.03, + "learning_rate": 4.989978337992061e-05, + "loss": 0.6325, + "step": 28608 + }, + { + "epoch": 0.03, + "learning_rate": 4.989977635372709e-05, + "loss": 0.575, + "step": 28609 + }, + { + "epoch": 0.03, + "learning_rate": 4.989976932728778e-05, + "loss": 0.56, + "step": 28610 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899762300602664e-05, + "loss": 0.4272, + "step": 28611 + }, + { + "epoch": 0.03, + "learning_rate": 4.989975527367175e-05, + "loss": 0.9889, + "step": 28612 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899748246495035e-05, + "loss": 0.9483, + "step": 28613 + }, + { + "epoch": 0.03, + "learning_rate": 4.989974121907253e-05, + "loss": 1.04, + "step": 28614 + }, + { + "epoch": 0.03, + "learning_rate": 4.989973419140421e-05, + "loss": 0.9122, + "step": 28615 + }, + { + "epoch": 0.03, + "learning_rate": 4.98997271634901e-05, + "loss": 1.1204, + "step": 28616 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899720135330184e-05, + "loss": 0.8129, + "step": 28617 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899713106924474e-05, + "loss": 1.0336, + "step": 28618 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899706078272955e-05, + "loss": 1.1311, + "step": 28619 + }, + { + "epoch": 0.03, + "learning_rate": 4.989969904937565e-05, + "loss": 1.2843, + "step": 28620 + }, + { + "epoch": 0.03, + "learning_rate": 4.989969202023254e-05, + "loss": 0.8608, + "step": 28621 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899684990843626e-05, + "loss": 1.4016, + "step": 28622 + }, + { + "epoch": 0.03, + "learning_rate": 4.989967796120892e-05, + "loss": 1.157, + "step": 28623 + }, + { + "epoch": 0.03, + "learning_rate": 4.989967093132841e-05, + "loss": 1.0876, + "step": 28624 + }, + { + "epoch": 0.03, + "learning_rate": 4.98996639012021e-05, + "loss": 1.2017, + "step": 28625 + }, + { + "epoch": 0.03, + "learning_rate": 4.989965687083e-05, + "loss": 1.1174, + "step": 28626 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899649840212096e-05, + "loss": 1.2353, + "step": 28627 + }, + { + "epoch": 0.03, + "learning_rate": 4.989964280934839e-05, + "loss": 1.0438, + "step": 28628 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899635778238884e-05, + "loss": 0.5082, + "step": 28629 + }, + { + "epoch": 0.03, + "learning_rate": 4.989962874688359e-05, + "loss": 0.8514, + "step": 28630 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899621715282485e-05, + "loss": 1.0902, + "step": 28631 + }, + { + "epoch": 0.03, + "learning_rate": 4.989961468343558e-05, + "loss": 1.0801, + "step": 28632 + }, + { + "epoch": 0.03, + "learning_rate": 4.989960765134288e-05, + "loss": 0.8906, + "step": 28633 + }, + { + "epoch": 0.03, + "learning_rate": 4.989960061900439e-05, + "loss": 0.7168, + "step": 28634 + }, + { + "epoch": 0.03, + "learning_rate": 4.989959358642009e-05, + "loss": 0.6165, + "step": 28635 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899586553590005e-05, + "loss": 0.7574, + "step": 28636 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899579520514114e-05, + "loss": 1.0258, + "step": 28637 + }, + { + "epoch": 0.03, + "learning_rate": 4.989957248719242e-05, + "loss": 0.9461, + "step": 28638 + }, + { + "epoch": 0.03, + "learning_rate": 4.989956545362494e-05, + "loss": 0.8615, + "step": 28639 + }, + { + "epoch": 0.03, + "learning_rate": 4.989955841981165e-05, + "loss": 0.8364, + "step": 28640 + }, + { + "epoch": 0.03, + "learning_rate": 4.989955138575257e-05, + "loss": 0.215, + "step": 28641 + }, + { + "epoch": 0.03, + "learning_rate": 4.989954435144769e-05, + "loss": 0.3466, + "step": 28642 + }, + { + "epoch": 0.03, + "learning_rate": 4.989953731689701e-05, + "loss": 1.0194, + "step": 28643 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899530282100536e-05, + "loss": 1.0064, + "step": 28644 + }, + { + "epoch": 0.03, + "learning_rate": 4.989952324705826e-05, + "loss": 1.6024, + "step": 28645 + }, + { + "epoch": 0.03, + "learning_rate": 4.989951621177019e-05, + "loss": 1.121, + "step": 28646 + }, + { + "epoch": 0.03, + "learning_rate": 4.989950917623632e-05, + "loss": 1.0352, + "step": 28647 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899502140456654e-05, + "loss": 0.8574, + "step": 28648 + }, + { + "epoch": 0.03, + "learning_rate": 4.989949510443119e-05, + "loss": 1.0526, + "step": 28649 + }, + { + "epoch": 0.03, + "learning_rate": 4.989948806815993e-05, + "loss": 2.3448, + "step": 28650 + }, + { + "epoch": 0.03, + "learning_rate": 4.989948103164287e-05, + "loss": 0.9199, + "step": 28651 + }, + { + "epoch": 0.03, + "learning_rate": 4.989947399488001e-05, + "loss": 0.9277, + "step": 28652 + }, + { + "epoch": 0.03, + "learning_rate": 4.989946695787137e-05, + "loss": 1.2516, + "step": 28653 + }, + { + "epoch": 0.03, + "learning_rate": 4.989945992061691e-05, + "loss": 1.3325, + "step": 28654 + }, + { + "epoch": 0.03, + "learning_rate": 4.989945288311667e-05, + "loss": 1.1112, + "step": 28655 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899445845370616e-05, + "loss": 1.0724, + "step": 28656 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899438807378784e-05, + "loss": 1.0268, + "step": 28657 + }, + { + "epoch": 0.03, + "learning_rate": 4.989943176914115e-05, + "loss": 0.7917, + "step": 28658 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899424730657706e-05, + "loss": 1.1986, + "step": 28659 + }, + { + "epoch": 0.03, + "learning_rate": 4.989941769192848e-05, + "loss": 1.1738, + "step": 28660 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899410652953446e-05, + "loss": 0.916, + "step": 28661 + }, + { + "epoch": 0.03, + "learning_rate": 4.989940361373263e-05, + "loss": 0.8998, + "step": 28662 + }, + { + "epoch": 0.03, + "learning_rate": 4.989939657426601e-05, + "loss": 0.8212, + "step": 28663 + }, + { + "epoch": 0.03, + "learning_rate": 4.989938953455359e-05, + "loss": 0.8998, + "step": 28664 + }, + { + "epoch": 0.03, + "learning_rate": 4.989938249459538e-05, + "loss": 1.2043, + "step": 28665 + }, + { + "epoch": 0.03, + "learning_rate": 4.989937545439137e-05, + "loss": 1.2122, + "step": 28666 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899368413941564e-05, + "loss": 0.9562, + "step": 28667 + }, + { + "epoch": 0.03, + "learning_rate": 4.989936137324597e-05, + "loss": 1.1825, + "step": 28668 + }, + { + "epoch": 0.03, + "learning_rate": 4.989935433230457e-05, + "loss": 1.4023, + "step": 28669 + }, + { + "epoch": 0.03, + "learning_rate": 4.989934729111737e-05, + "loss": 1.3786, + "step": 28670 + }, + { + "epoch": 0.03, + "learning_rate": 4.989934024968438e-05, + "loss": 1.295, + "step": 28671 + }, + { + "epoch": 0.03, + "learning_rate": 4.98993332080056e-05, + "loss": 1.1711, + "step": 28672 + }, + { + "epoch": 0.03, + "learning_rate": 4.989932616608102e-05, + "loss": 1.0478, + "step": 28673 + }, + { + "epoch": 0.03, + "learning_rate": 4.989931912391065e-05, + "loss": 0.9863, + "step": 28674 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899312081494474e-05, + "loss": 1.2418, + "step": 28675 + }, + { + "epoch": 0.03, + "learning_rate": 4.989930503883251e-05, + "loss": 1.0336, + "step": 28676 + }, + { + "epoch": 0.03, + "learning_rate": 4.989929799592474e-05, + "loss": 1.6233, + "step": 28677 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899290952771185e-05, + "loss": 1.2143, + "step": 28678 + }, + { + "epoch": 0.03, + "learning_rate": 4.989928390937183e-05, + "loss": 0.4854, + "step": 28679 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899276865726686e-05, + "loss": 0.392, + "step": 28680 + }, + { + "epoch": 0.03, + "learning_rate": 4.989926982183574e-05, + "loss": 0.5483, + "step": 28681 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899262777699e-05, + "loss": 0.9352, + "step": 28682 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899255733316465e-05, + "loss": 0.9551, + "step": 28683 + }, + { + "epoch": 0.03, + "learning_rate": 4.989924868868814e-05, + "loss": 1.1013, + "step": 28684 + }, + { + "epoch": 0.03, + "learning_rate": 4.989924164381401e-05, + "loss": 0.8728, + "step": 28685 + }, + { + "epoch": 0.03, + "learning_rate": 4.989923459869409e-05, + "loss": 0.9696, + "step": 28686 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899227553328385e-05, + "loss": 0.7358, + "step": 28687 + }, + { + "epoch": 0.03, + "learning_rate": 4.989922050771687e-05, + "loss": 1.0078, + "step": 28688 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899213461859575e-05, + "loss": 1.0475, + "step": 28689 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899206415756474e-05, + "loss": 0.9979, + "step": 28690 + }, + { + "epoch": 0.03, + "learning_rate": 4.989919936940758e-05, + "loss": 1.2319, + "step": 28691 + }, + { + "epoch": 0.03, + "learning_rate": 4.989919232281289e-05, + "loss": 1.2622, + "step": 28692 + }, + { + "epoch": 0.03, + "learning_rate": 4.989918527597242e-05, + "loss": 1.2747, + "step": 28693 + }, + { + "epoch": 0.03, + "learning_rate": 4.989917822888614e-05, + "loss": 1.1397, + "step": 28694 + }, + { + "epoch": 0.03, + "learning_rate": 4.989917118155407e-05, + "loss": 1.1862, + "step": 28695 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899164133976204e-05, + "loss": 1.2062, + "step": 28696 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899157086152555e-05, + "loss": 1.1237, + "step": 28697 + }, + { + "epoch": 0.03, + "learning_rate": 4.98991500380831e-05, + "loss": 0.841, + "step": 28698 + }, + { + "epoch": 0.03, + "learning_rate": 4.989914298976786e-05, + "loss": 1.1708, + "step": 28699 + }, + { + "epoch": 0.03, + "learning_rate": 4.989913594120682e-05, + "loss": 0.9753, + "step": 28700 + }, + { + "epoch": 0.03, + "learning_rate": 4.989912889239998e-05, + "loss": 1.0414, + "step": 28701 + }, + { + "epoch": 0.03, + "learning_rate": 4.989912184334736e-05, + "loss": 0.9227, + "step": 28702 + }, + { + "epoch": 0.03, + "learning_rate": 4.989911479404894e-05, + "loss": 1.1541, + "step": 28703 + }, + { + "epoch": 0.03, + "learning_rate": 4.989910774450472e-05, + "loss": 1.096, + "step": 28704 + }, + { + "epoch": 0.03, + "learning_rate": 4.989910069471472e-05, + "loss": 0.8285, + "step": 28705 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899093644678916e-05, + "loss": 1.3583, + "step": 28706 + }, + { + "epoch": 0.03, + "learning_rate": 4.989908659439733e-05, + "loss": 1.4777, + "step": 28707 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899079543869936e-05, + "loss": 1.2747, + "step": 28708 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899072493096755e-05, + "loss": 1.1566, + "step": 28709 + }, + { + "epoch": 0.03, + "learning_rate": 4.989906544207779e-05, + "loss": 0.9429, + "step": 28710 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899058390813016e-05, + "loss": 0.877, + "step": 28711 + }, + { + "epoch": 0.03, + "learning_rate": 4.989905133930246e-05, + "loss": 0.8357, + "step": 28712 + }, + { + "epoch": 0.03, + "learning_rate": 4.98990442875461e-05, + "loss": 1.1905, + "step": 28713 + }, + { + "epoch": 0.03, + "learning_rate": 4.989903723554396e-05, + "loss": 1.1011, + "step": 28714 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899030183296026e-05, + "loss": 1.0383, + "step": 28715 + }, + { + "epoch": 0.03, + "learning_rate": 4.989902313080229e-05, + "loss": 1.2375, + "step": 28716 + }, + { + "epoch": 0.03, + "learning_rate": 4.989901607806277e-05, + "loss": 1.1616, + "step": 28717 + }, + { + "epoch": 0.03, + "learning_rate": 4.9899009025077456e-05, + "loss": 1.1348, + "step": 28718 + }, + { + "epoch": 0.03, + "learning_rate": 4.989900197184634e-05, + "loss": 1.0037, + "step": 28719 + }, + { + "epoch": 0.03, + "learning_rate": 4.989899491836945e-05, + "loss": 0.648, + "step": 28720 + }, + { + "epoch": 0.03, + "learning_rate": 4.989898786464675e-05, + "loss": 1.338, + "step": 28721 + }, + { + "epoch": 0.03, + "learning_rate": 4.989898081067826e-05, + "loss": 1.1413, + "step": 28722 + }, + { + "epoch": 0.03, + "learning_rate": 4.989897375646399e-05, + "loss": 1.0504, + "step": 28723 + }, + { + "epoch": 0.03, + "learning_rate": 4.989896670200391e-05, + "loss": 1.1597, + "step": 28724 + }, + { + "epoch": 0.03, + "learning_rate": 4.989895964729805e-05, + "loss": 1.1147, + "step": 28725 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898952592346396e-05, + "loss": 1.2063, + "step": 28726 + }, + { + "epoch": 0.03, + "learning_rate": 4.989894553714895e-05, + "loss": 0.9647, + "step": 28727 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898938481705714e-05, + "loss": 1.1278, + "step": 28728 + }, + { + "epoch": 0.03, + "learning_rate": 4.989893142601668e-05, + "loss": 0.3865, + "step": 28729 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898924370081864e-05, + "loss": 0.858, + "step": 28730 + }, + { + "epoch": 0.03, + "learning_rate": 4.989891731390125e-05, + "loss": 1.045, + "step": 28731 + }, + { + "epoch": 0.03, + "learning_rate": 4.989891025747484e-05, + "loss": 0.9492, + "step": 28732 + }, + { + "epoch": 0.03, + "learning_rate": 4.989890320080265e-05, + "loss": 0.962, + "step": 28733 + }, + { + "epoch": 0.03, + "learning_rate": 4.989889614388465e-05, + "loss": 1.3121, + "step": 28734 + }, + { + "epoch": 0.03, + "learning_rate": 4.989888908672088e-05, + "loss": 1.5073, + "step": 28735 + }, + { + "epoch": 0.03, + "learning_rate": 4.989888202931131e-05, + "loss": 1.647, + "step": 28736 + }, + { + "epoch": 0.03, + "learning_rate": 4.989887497165594e-05, + "loss": 0.9382, + "step": 28737 + }, + { + "epoch": 0.03, + "learning_rate": 4.989886791375479e-05, + "loss": 1.0809, + "step": 28738 + }, + { + "epoch": 0.03, + "learning_rate": 4.989886085560785e-05, + "loss": 1.5419, + "step": 28739 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898853797215113e-05, + "loss": 1.507, + "step": 28740 + }, + { + "epoch": 0.03, + "learning_rate": 4.989884673857659e-05, + "loss": 0.8779, + "step": 28741 + }, + { + "epoch": 0.03, + "learning_rate": 4.989883967969227e-05, + "loss": 0.5897, + "step": 28742 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898832620562166e-05, + "loss": 0.5506, + "step": 28743 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898825561186256e-05, + "loss": 0.5394, + "step": 28744 + }, + { + "epoch": 0.03, + "learning_rate": 4.989881850156457e-05, + "loss": 1.1847, + "step": 28745 + }, + { + "epoch": 0.03, + "learning_rate": 4.989881144169709e-05, + "loss": 1.1404, + "step": 28746 + }, + { + "epoch": 0.03, + "learning_rate": 4.989880438158382e-05, + "loss": 0.9296, + "step": 28747 + }, + { + "epoch": 0.03, + "learning_rate": 4.989879732122475e-05, + "loss": 0.8682, + "step": 28748 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898790260619905e-05, + "loss": 1.2293, + "step": 28749 + }, + { + "epoch": 0.03, + "learning_rate": 4.989878319976926e-05, + "loss": 1.2358, + "step": 28750 + }, + { + "epoch": 0.03, + "learning_rate": 4.989877613867283e-05, + "loss": 1.027, + "step": 28751 + }, + { + "epoch": 0.03, + "learning_rate": 4.989876907733061e-05, + "loss": 0.8733, + "step": 28752 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898762015742595e-05, + "loss": 0.8829, + "step": 28753 + }, + { + "epoch": 0.03, + "learning_rate": 4.989875495390879e-05, + "loss": 0.8916, + "step": 28754 + }, + { + "epoch": 0.03, + "learning_rate": 4.98987478918292e-05, + "loss": 0.8475, + "step": 28755 + }, + { + "epoch": 0.03, + "learning_rate": 4.989874082950382e-05, + "loss": 0.7215, + "step": 28756 + }, + { + "epoch": 0.03, + "learning_rate": 4.989873376693264e-05, + "loss": 0.9166, + "step": 28757 + }, + { + "epoch": 0.03, + "learning_rate": 4.989872670411568e-05, + "loss": 1.3159, + "step": 28758 + }, + { + "epoch": 0.03, + "learning_rate": 4.989871964105293e-05, + "loss": 1.1122, + "step": 28759 + }, + { + "epoch": 0.03, + "learning_rate": 4.989871257774439e-05, + "loss": 1.1782, + "step": 28760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898705514190054e-05, + "loss": 1.2824, + "step": 28761 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898698450389934e-05, + "loss": 0.8737, + "step": 28762 + }, + { + "epoch": 0.03, + "learning_rate": 4.989869138634402e-05, + "loss": 1.5407, + "step": 28763 + }, + { + "epoch": 0.03, + "learning_rate": 4.989868432205233e-05, + "loss": 1.0472, + "step": 28764 + }, + { + "epoch": 0.03, + "learning_rate": 4.989867725751483e-05, + "loss": 0.9275, + "step": 28765 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898670192731554e-05, + "loss": 1.0443, + "step": 28766 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898663127702484e-05, + "loss": 0.9821, + "step": 28767 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898656062427626e-05, + "loss": 1.0266, + "step": 28768 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898648996906986e-05, + "loss": 1.3478, + "step": 28769 + }, + { + "epoch": 0.03, + "learning_rate": 4.989864193114054e-05, + "loss": 1.7217, + "step": 28770 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898634865128326e-05, + "loss": 1.4757, + "step": 28771 + }, + { + "epoch": 0.03, + "learning_rate": 4.989862779887031e-05, + "loss": 0.886, + "step": 28772 + }, + { + "epoch": 0.03, + "learning_rate": 4.989862073236651e-05, + "loss": 0.6483, + "step": 28773 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898613665616914e-05, + "loss": 0.7847, + "step": 28774 + }, + { + "epoch": 0.03, + "learning_rate": 4.989860659862154e-05, + "loss": 0.9859, + "step": 28775 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898599531380374e-05, + "loss": 0.8349, + "step": 28776 + }, + { + "epoch": 0.03, + "learning_rate": 4.989859246389341e-05, + "loss": 1.1458, + "step": 28777 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898585396160666e-05, + "loss": 0.9526, + "step": 28778 + }, + { + "epoch": 0.03, + "learning_rate": 4.989857832818214e-05, + "loss": 0.8921, + "step": 28779 + }, + { + "epoch": 0.03, + "learning_rate": 4.989857125995782e-05, + "loss": 0.6175, + "step": 28780 + }, + { + "epoch": 0.03, + "learning_rate": 4.989856419148771e-05, + "loss": 0.7263, + "step": 28781 + }, + { + "epoch": 0.03, + "learning_rate": 4.989855712277181e-05, + "loss": 1.053, + "step": 28782 + }, + { + "epoch": 0.03, + "learning_rate": 4.989855005381012e-05, + "loss": 0.8914, + "step": 28783 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898542984602656e-05, + "loss": 0.9494, + "step": 28784 + }, + { + "epoch": 0.03, + "learning_rate": 4.989853591514939e-05, + "loss": 1.071, + "step": 28785 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898528845450344e-05, + "loss": 1.0856, + "step": 28786 + }, + { + "epoch": 0.03, + "learning_rate": 4.98985217755055e-05, + "loss": 0.908, + "step": 28787 + }, + { + "epoch": 0.03, + "learning_rate": 4.989851470531488e-05, + "loss": 0.9302, + "step": 28788 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898507634878464e-05, + "loss": 1.0564, + "step": 28789 + }, + { + "epoch": 0.03, + "learning_rate": 4.989850056419627e-05, + "loss": 1.0, + "step": 28790 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898493493268286e-05, + "loss": 0.8991, + "step": 28791 + }, + { + "epoch": 0.03, + "learning_rate": 4.989848642209451e-05, + "loss": 0.8118, + "step": 28792 + }, + { + "epoch": 0.03, + "learning_rate": 4.989847935067494e-05, + "loss": 0.9091, + "step": 28793 + }, + { + "epoch": 0.03, + "learning_rate": 4.98984722790096e-05, + "loss": 1.3065, + "step": 28794 + }, + { + "epoch": 0.03, + "learning_rate": 4.989846520709846e-05, + "loss": 0.9606, + "step": 28795 + }, + { + "epoch": 0.03, + "learning_rate": 4.989845813494154e-05, + "loss": 1.0161, + "step": 28796 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898451062538824e-05, + "loss": 1.1581, + "step": 28797 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898443989890327e-05, + "loss": 1.1614, + "step": 28798 + }, + { + "epoch": 0.03, + "learning_rate": 4.989843691699604e-05, + "loss": 0.9223, + "step": 28799 + }, + { + "epoch": 0.03, + "learning_rate": 4.989842984385597e-05, + "loss": 0.9396, + "step": 28800 + }, + { + "epoch": 0.03, + "learning_rate": 4.989842277047011e-05, + "loss": 0.8434, + "step": 28801 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898415696838465e-05, + "loss": 0.9033, + "step": 28802 + }, + { + "epoch": 0.03, + "learning_rate": 4.989840862296103e-05, + "loss": 0.998, + "step": 28803 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898401548837816e-05, + "loss": 0.5246, + "step": 28804 + }, + { + "epoch": 0.03, + "learning_rate": 4.989839447446881e-05, + "loss": 0.834, + "step": 28805 + }, + { + "epoch": 0.03, + "learning_rate": 4.989838739985402e-05, + "loss": 1.177, + "step": 28806 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898380324993445e-05, + "loss": 1.0343, + "step": 28807 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898373249887075e-05, + "loss": 0.9242, + "step": 28808 + }, + { + "epoch": 0.03, + "learning_rate": 4.989836617453493e-05, + "loss": 1.132, + "step": 28809 + }, + { + "epoch": 0.03, + "learning_rate": 4.989835909893698e-05, + "loss": 0.9288, + "step": 28810 + }, + { + "epoch": 0.03, + "learning_rate": 4.989835202309327e-05, + "loss": 1.4192, + "step": 28811 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898344947003764e-05, + "loss": 0.874, + "step": 28812 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898337870668464e-05, + "loss": 1.0229, + "step": 28813 + }, + { + "epoch": 0.03, + "learning_rate": 4.989833079408738e-05, + "loss": 1.0179, + "step": 28814 + }, + { + "epoch": 0.03, + "learning_rate": 4.989832371726052e-05, + "loss": 1.1, + "step": 28815 + }, + { + "epoch": 0.03, + "learning_rate": 4.989831664018787e-05, + "loss": 0.829, + "step": 28816 + }, + { + "epoch": 0.03, + "learning_rate": 4.989830956286943e-05, + "loss": 1.0219, + "step": 28817 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898302485305205e-05, + "loss": 0.4041, + "step": 28818 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898295407495194e-05, + "loss": 0.6985, + "step": 28819 + }, + { + "epoch": 0.03, + "learning_rate": 4.98982883294394e-05, + "loss": 0.8285, + "step": 28820 + }, + { + "epoch": 0.03, + "learning_rate": 4.989828125113782e-05, + "loss": 1.019, + "step": 28821 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898274172590456e-05, + "loss": 1.0239, + "step": 28822 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898267093797304e-05, + "loss": 1.0249, + "step": 28823 + }, + { + "epoch": 0.03, + "learning_rate": 4.989826001475837e-05, + "loss": 1.2123, + "step": 28824 + }, + { + "epoch": 0.03, + "learning_rate": 4.989825293547364e-05, + "loss": 1.2797, + "step": 28825 + }, + { + "epoch": 0.03, + "learning_rate": 4.989824585594314e-05, + "loss": 1.1551, + "step": 28826 + }, + { + "epoch": 0.03, + "learning_rate": 4.989823877616685e-05, + "loss": 1.0152, + "step": 28827 + }, + { + "epoch": 0.03, + "learning_rate": 4.989823169614477e-05, + "loss": 0.6396, + "step": 28828 + }, + { + "epoch": 0.03, + "learning_rate": 4.989822461587691e-05, + "loss": 1.0515, + "step": 28829 + }, + { + "epoch": 0.03, + "learning_rate": 4.989821753536327e-05, + "loss": 0.978, + "step": 28830 + }, + { + "epoch": 0.03, + "learning_rate": 4.989821045460383e-05, + "loss": 0.6876, + "step": 28831 + }, + { + "epoch": 0.03, + "learning_rate": 4.989820337359862e-05, + "loss": 0.9216, + "step": 28832 + }, + { + "epoch": 0.03, + "learning_rate": 4.989819629234762e-05, + "loss": 0.9109, + "step": 28833 + }, + { + "epoch": 0.03, + "learning_rate": 4.989818921085084e-05, + "loss": 0.6072, + "step": 28834 + }, + { + "epoch": 0.03, + "learning_rate": 4.989818212910827e-05, + "loss": 1.0917, + "step": 28835 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898175047119915e-05, + "loss": 1.0272, + "step": 28836 + }, + { + "epoch": 0.03, + "learning_rate": 4.989816796488578e-05, + "loss": 0.8012, + "step": 28837 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898160882405856e-05, + "loss": 0.9323, + "step": 28838 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898153799680154e-05, + "loss": 1.0394, + "step": 28839 + }, + { + "epoch": 0.03, + "learning_rate": 4.989814671670866e-05, + "loss": 1.4442, + "step": 28840 + }, + { + "epoch": 0.03, + "learning_rate": 4.989813963349139e-05, + "loss": 0.9732, + "step": 28841 + }, + { + "epoch": 0.03, + "learning_rate": 4.989813255002833e-05, + "loss": 0.3551, + "step": 28842 + }, + { + "epoch": 0.03, + "learning_rate": 4.989812546631949e-05, + "loss": 0.3674, + "step": 28843 + }, + { + "epoch": 0.03, + "learning_rate": 4.989811838236487e-05, + "loss": 0.3411, + "step": 28844 + }, + { + "epoch": 0.03, + "learning_rate": 4.989811129816446e-05, + "loss": 0.4558, + "step": 28845 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898104213718264e-05, + "loss": 0.2535, + "step": 28846 + }, + { + "epoch": 0.03, + "learning_rate": 4.989809712902629e-05, + "loss": 0.1645, + "step": 28847 + }, + { + "epoch": 0.03, + "learning_rate": 4.989809004408853e-05, + "loss": 0.0995, + "step": 28848 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898082958904986e-05, + "loss": 0.072, + "step": 28849 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898075873475657e-05, + "loss": 0.0719, + "step": 28850 + }, + { + "epoch": 0.03, + "learning_rate": 4.989806878780055e-05, + "loss": 0.0329, + "step": 28851 + }, + { + "epoch": 0.03, + "learning_rate": 4.989806170187966e-05, + "loss": 0.0611, + "step": 28852 + }, + { + "epoch": 0.03, + "learning_rate": 4.989805461571298e-05, + "loss": 0.0598, + "step": 28853 + }, + { + "epoch": 0.03, + "learning_rate": 4.989804752930052e-05, + "loss": 0.0613, + "step": 28854 + }, + { + "epoch": 0.03, + "learning_rate": 4.989804044264228e-05, + "loss": 0.198, + "step": 28855 + }, + { + "epoch": 0.03, + "learning_rate": 4.989803335573825e-05, + "loss": 0.0957, + "step": 28856 + }, + { + "epoch": 0.03, + "learning_rate": 4.989802626858844e-05, + "loss": 0.049, + "step": 28857 + }, + { + "epoch": 0.03, + "learning_rate": 4.989801918119286e-05, + "loss": 0.0785, + "step": 28858 + }, + { + "epoch": 0.03, + "learning_rate": 4.9898012093551484e-05, + "loss": 1.0949, + "step": 28859 + }, + { + "epoch": 0.03, + "learning_rate": 4.989800500566432e-05, + "loss": 1.0429, + "step": 28860 + }, + { + "epoch": 0.03, + "learning_rate": 4.989799791753138e-05, + "loss": 1.2065, + "step": 28861 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897990829152666e-05, + "loss": 1.178, + "step": 28862 + }, + { + "epoch": 0.03, + "learning_rate": 4.989798374052816e-05, + "loss": 0.927, + "step": 28863 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897976651657876e-05, + "loss": 1.1034, + "step": 28864 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897969562541805e-05, + "loss": 0.6125, + "step": 28865 + }, + { + "epoch": 0.03, + "learning_rate": 4.989796247317995e-05, + "loss": 0.9577, + "step": 28866 + }, + { + "epoch": 0.03, + "learning_rate": 4.989795538357232e-05, + "loss": 0.7278, + "step": 28867 + }, + { + "epoch": 0.03, + "learning_rate": 4.98979482937189e-05, + "loss": 0.3926, + "step": 28868 + }, + { + "epoch": 0.03, + "learning_rate": 4.98979412036197e-05, + "loss": 0.3831, + "step": 28869 + }, + { + "epoch": 0.03, + "learning_rate": 4.989793411327473e-05, + "loss": 0.9334, + "step": 28870 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897927022683965e-05, + "loss": 1.0732, + "step": 28871 + }, + { + "epoch": 0.03, + "learning_rate": 4.989791993184742e-05, + "loss": 0.9936, + "step": 28872 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897912840765095e-05, + "loss": 1.0296, + "step": 28873 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897905749436994e-05, + "loss": 0.9023, + "step": 28874 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897898657863104e-05, + "loss": 1.0704, + "step": 28875 + }, + { + "epoch": 0.03, + "learning_rate": 4.989789156604343e-05, + "loss": 1.033, + "step": 28876 + }, + { + "epoch": 0.03, + "learning_rate": 4.989788447397798e-05, + "loss": 0.9554, + "step": 28877 + }, + { + "epoch": 0.03, + "learning_rate": 4.989787738166674e-05, + "loss": 1.0071, + "step": 28878 + }, + { + "epoch": 0.03, + "learning_rate": 4.989787028910973e-05, + "loss": 1.0048, + "step": 28879 + }, + { + "epoch": 0.03, + "learning_rate": 4.989786319630694e-05, + "loss": 0.5843, + "step": 28880 + }, + { + "epoch": 0.03, + "learning_rate": 4.989785610325836e-05, + "loss": 0.3442, + "step": 28881 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897849009964004e-05, + "loss": 0.4914, + "step": 28882 + }, + { + "epoch": 0.03, + "learning_rate": 4.989784191642386e-05, + "loss": 0.3567, + "step": 28883 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897834822637946e-05, + "loss": 0.3282, + "step": 28884 + }, + { + "epoch": 0.03, + "learning_rate": 4.989782772860624e-05, + "loss": 0.307, + "step": 28885 + }, + { + "epoch": 0.03, + "learning_rate": 4.989782063432876e-05, + "loss": 0.369, + "step": 28886 + }, + { + "epoch": 0.03, + "learning_rate": 4.98978135398055e-05, + "loss": 0.5162, + "step": 28887 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897806445036455e-05, + "loss": 1.1952, + "step": 28888 + }, + { + "epoch": 0.03, + "learning_rate": 4.989779935002163e-05, + "loss": 1.0199, + "step": 28889 + }, + { + "epoch": 0.03, + "learning_rate": 4.989779225476103e-05, + "loss": 0.9132, + "step": 28890 + }, + { + "epoch": 0.03, + "learning_rate": 4.989778515925464e-05, + "loss": 1.1699, + "step": 28891 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897778063502475e-05, + "loss": 0.8231, + "step": 28892 + }, + { + "epoch": 0.03, + "learning_rate": 4.989777096750453e-05, + "loss": 1.1207, + "step": 28893 + }, + { + "epoch": 0.03, + "learning_rate": 4.989776387126081e-05, + "loss": 1.0462, + "step": 28894 + }, + { + "epoch": 0.03, + "learning_rate": 4.98977567747713e-05, + "loss": 0.434, + "step": 28895 + }, + { + "epoch": 0.03, + "learning_rate": 4.989774967803601e-05, + "loss": 0.2738, + "step": 28896 + }, + { + "epoch": 0.03, + "learning_rate": 4.989774258105495e-05, + "loss": 0.8974, + "step": 28897 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897735483828104e-05, + "loss": 1.1216, + "step": 28898 + }, + { + "epoch": 0.03, + "learning_rate": 4.989772838635547e-05, + "loss": 1.1644, + "step": 28899 + }, + { + "epoch": 0.03, + "learning_rate": 4.989772128863707e-05, + "loss": 1.8721, + "step": 28900 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897714190672876e-05, + "loss": 1.4062, + "step": 28901 + }, + { + "epoch": 0.03, + "learning_rate": 4.989770709246292e-05, + "loss": 1.3411, + "step": 28902 + }, + { + "epoch": 0.03, + "learning_rate": 4.989769999400717e-05, + "loss": 0.4413, + "step": 28903 + }, + { + "epoch": 0.03, + "learning_rate": 4.989769289530564e-05, + "loss": 1.0114, + "step": 28904 + }, + { + "epoch": 0.03, + "learning_rate": 4.989768579635834e-05, + "loss": 1.4632, + "step": 28905 + }, + { + "epoch": 0.03, + "learning_rate": 4.989767869716525e-05, + "loss": 0.9671, + "step": 28906 + }, + { + "epoch": 0.03, + "learning_rate": 4.989767159772639e-05, + "loss": 0.9991, + "step": 28907 + }, + { + "epoch": 0.03, + "learning_rate": 4.989766449804175e-05, + "loss": 0.0565, + "step": 28908 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897657398111324e-05, + "loss": 0.1174, + "step": 28909 + }, + { + "epoch": 0.03, + "learning_rate": 4.989765029793512e-05, + "loss": 0.0573, + "step": 28910 + }, + { + "epoch": 0.03, + "learning_rate": 4.989764319751315e-05, + "loss": 0.793, + "step": 28911 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897636096845376e-05, + "loss": 0.9251, + "step": 28912 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897628995931844e-05, + "loss": 1.1071, + "step": 28913 + }, + { + "epoch": 0.03, + "learning_rate": 4.989762189477253e-05, + "loss": 1.2617, + "step": 28914 + }, + { + "epoch": 0.03, + "learning_rate": 4.989761479336743e-05, + "loss": 0.634, + "step": 28915 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897607691716555e-05, + "loss": 0.9604, + "step": 28916 + }, + { + "epoch": 0.03, + "learning_rate": 4.98976005898199e-05, + "loss": 0.9846, + "step": 28917 + }, + { + "epoch": 0.03, + "learning_rate": 4.989759348767747e-05, + "loss": 1.0023, + "step": 28918 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897586385289257e-05, + "loss": 0.9485, + "step": 28919 + }, + { + "epoch": 0.03, + "learning_rate": 4.989757928265527e-05, + "loss": 1.1136, + "step": 28920 + }, + { + "epoch": 0.03, + "learning_rate": 4.98975721797755e-05, + "loss": 0.798, + "step": 28921 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897565076649955e-05, + "loss": 0.9756, + "step": 28922 + }, + { + "epoch": 0.03, + "learning_rate": 4.989755797327863e-05, + "loss": 1.1705, + "step": 28923 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897550869661525e-05, + "loss": 0.9846, + "step": 28924 + }, + { + "epoch": 0.03, + "learning_rate": 4.989754376579865e-05, + "loss": 0.6421, + "step": 28925 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897536661689995e-05, + "loss": 1.0426, + "step": 28926 + }, + { + "epoch": 0.03, + "learning_rate": 4.989752955733556e-05, + "loss": 1.1374, + "step": 28927 + }, + { + "epoch": 0.03, + "learning_rate": 4.989752245273534e-05, + "loss": 0.9853, + "step": 28928 + }, + { + "epoch": 0.03, + "learning_rate": 4.989751534788934e-05, + "loss": 0.7698, + "step": 28929 + }, + { + "epoch": 0.03, + "learning_rate": 4.989750824279757e-05, + "loss": 0.9847, + "step": 28930 + }, + { + "epoch": 0.03, + "learning_rate": 4.989750113746002e-05, + "loss": 1.0451, + "step": 28931 + }, + { + "epoch": 0.03, + "learning_rate": 4.98974940318767e-05, + "loss": 1.205, + "step": 28932 + }, + { + "epoch": 0.03, + "learning_rate": 4.98974869260476e-05, + "loss": 0.7799, + "step": 28933 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897479819972715e-05, + "loss": 1.0969, + "step": 28934 + }, + { + "epoch": 0.03, + "learning_rate": 4.989747271365206e-05, + "loss": 1.1205, + "step": 28935 + }, + { + "epoch": 0.03, + "learning_rate": 4.989746560708562e-05, + "loss": 1.1791, + "step": 28936 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897458500273406e-05, + "loss": 0.8873, + "step": 28937 + }, + { + "epoch": 0.03, + "learning_rate": 4.989745139321542e-05, + "loss": 1.1595, + "step": 28938 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897444285911656e-05, + "loss": 0.8781, + "step": 28939 + }, + { + "epoch": 0.03, + "learning_rate": 4.989743717836211e-05, + "loss": 1.0265, + "step": 28940 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897430070566786e-05, + "loss": 1.5526, + "step": 28941 + }, + { + "epoch": 0.03, + "learning_rate": 4.989742296252569e-05, + "loss": 0.9612, + "step": 28942 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897415854238814e-05, + "loss": 0.8811, + "step": 28943 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897408745706166e-05, + "loss": 1.1159, + "step": 28944 + }, + { + "epoch": 0.03, + "learning_rate": 4.989740163692773e-05, + "loss": 0.9269, + "step": 28945 + }, + { + "epoch": 0.03, + "learning_rate": 4.989739452790353e-05, + "loss": 1.131, + "step": 28946 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897387418633544e-05, + "loss": 0.9195, + "step": 28947 + }, + { + "epoch": 0.03, + "learning_rate": 4.989738030911779e-05, + "loss": 0.8737, + "step": 28948 + }, + { + "epoch": 0.03, + "learning_rate": 4.989737319935625e-05, + "loss": 0.6232, + "step": 28949 + }, + { + "epoch": 0.03, + "learning_rate": 4.989736608934894e-05, + "loss": 1.0677, + "step": 28950 + }, + { + "epoch": 0.03, + "learning_rate": 4.989735897909585e-05, + "loss": 1.4946, + "step": 28951 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897351868596983e-05, + "loss": 1.1452, + "step": 28952 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897344757852346e-05, + "loss": 0.9629, + "step": 28953 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897337646861933e-05, + "loss": 1.0514, + "step": 28954 + }, + { + "epoch": 0.03, + "learning_rate": 4.989733053562574e-05, + "loss": 1.3857, + "step": 28955 + }, + { + "epoch": 0.03, + "learning_rate": 4.989732342414377e-05, + "loss": 1.0388, + "step": 28956 + }, + { + "epoch": 0.03, + "learning_rate": 4.989731631241603e-05, + "loss": 0.9176, + "step": 28957 + }, + { + "epoch": 0.03, + "learning_rate": 4.989730920044251e-05, + "loss": 0.8916, + "step": 28958 + }, + { + "epoch": 0.03, + "learning_rate": 4.989730208822321e-05, + "loss": 1.6187, + "step": 28959 + }, + { + "epoch": 0.03, + "learning_rate": 4.989729497575815e-05, + "loss": 1.1341, + "step": 28960 + }, + { + "epoch": 0.03, + "learning_rate": 4.989728786304729e-05, + "loss": 1.3322, + "step": 28961 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897280750090676e-05, + "loss": 1.1932, + "step": 28962 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897273636888275e-05, + "loss": 1.1081, + "step": 28963 + }, + { + "epoch": 0.03, + "learning_rate": 4.98972665234401e-05, + "loss": 1.035, + "step": 28964 + }, + { + "epoch": 0.03, + "learning_rate": 4.989725940974615e-05, + "loss": 0.8887, + "step": 28965 + }, + { + "epoch": 0.03, + "learning_rate": 4.989725229580643e-05, + "loss": 0.6033, + "step": 28966 + }, + { + "epoch": 0.03, + "learning_rate": 4.989724518162093e-05, + "loss": 1.0705, + "step": 28967 + }, + { + "epoch": 0.03, + "learning_rate": 4.989723806718965e-05, + "loss": 1.3072, + "step": 28968 + }, + { + "epoch": 0.03, + "learning_rate": 4.989723095251261e-05, + "loss": 0.5436, + "step": 28969 + }, + { + "epoch": 0.03, + "learning_rate": 4.989722383758979e-05, + "loss": 0.637, + "step": 28970 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897216722421185e-05, + "loss": 0.6695, + "step": 28971 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897209607006814e-05, + "loss": 1.0799, + "step": 28972 + }, + { + "epoch": 0.03, + "learning_rate": 4.989720249134666e-05, + "loss": 1.1542, + "step": 28973 + }, + { + "epoch": 0.03, + "learning_rate": 4.989719537544074e-05, + "loss": 1.1452, + "step": 28974 + }, + { + "epoch": 0.03, + "learning_rate": 4.989718825928904e-05, + "loss": 0.9649, + "step": 28975 + }, + { + "epoch": 0.03, + "learning_rate": 4.989718114289157e-05, + "loss": 1.1745, + "step": 28976 + }, + { + "epoch": 0.03, + "learning_rate": 4.989717402624832e-05, + "loss": 1.3928, + "step": 28977 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897166909359306e-05, + "loss": 1.8586, + "step": 28978 + }, + { + "epoch": 0.03, + "learning_rate": 4.989715979222451e-05, + "loss": 1.354, + "step": 28979 + }, + { + "epoch": 0.03, + "learning_rate": 4.989715267484394e-05, + "loss": 1.279, + "step": 28980 + }, + { + "epoch": 0.03, + "learning_rate": 4.989714555721759e-05, + "loss": 1.0072, + "step": 28981 + }, + { + "epoch": 0.03, + "learning_rate": 4.989713843934548e-05, + "loss": 0.8671, + "step": 28982 + }, + { + "epoch": 0.03, + "learning_rate": 4.989713132122759e-05, + "loss": 1.1895, + "step": 28983 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897124202863916e-05, + "loss": 1.0972, + "step": 28984 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897117084254476e-05, + "loss": 1.119, + "step": 28985 + }, + { + "epoch": 0.03, + "learning_rate": 4.989710996539926e-05, + "loss": 1.0334, + "step": 28986 + }, + { + "epoch": 0.03, + "learning_rate": 4.989710284629827e-05, + "loss": 1.1169, + "step": 28987 + }, + { + "epoch": 0.03, + "learning_rate": 4.989709572695151e-05, + "loss": 1.0835, + "step": 28988 + }, + { + "epoch": 0.03, + "learning_rate": 4.989708860735898e-05, + "loss": 1.0788, + "step": 28989 + }, + { + "epoch": 0.03, + "learning_rate": 4.989708148752067e-05, + "loss": 1.0233, + "step": 28990 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897074367436586e-05, + "loss": 0.7061, + "step": 28991 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897067247106734e-05, + "loss": 0.7283, + "step": 28992 + }, + { + "epoch": 0.03, + "learning_rate": 4.98970601265311e-05, + "loss": 1.3084, + "step": 28993 + }, + { + "epoch": 0.03, + "learning_rate": 4.98970530057097e-05, + "loss": 0.9021, + "step": 28994 + }, + { + "epoch": 0.03, + "learning_rate": 4.989704588464252e-05, + "loss": 0.9384, + "step": 28995 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897038763329576e-05, + "loss": 1.0217, + "step": 28996 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897031641770855e-05, + "loss": 0.9787, + "step": 28997 + }, + { + "epoch": 0.03, + "learning_rate": 4.989702451996636e-05, + "loss": 1.0799, + "step": 28998 + }, + { + "epoch": 0.03, + "learning_rate": 4.989701739791609e-05, + "loss": 0.9109, + "step": 28999 + }, + { + "epoch": 0.03, + "learning_rate": 4.989701027562005e-05, + "loss": 1.0983, + "step": 29000 + }, + { + "epoch": 0.03, + "eval_loss": 1.0596212148666382, + "eval_runtime": 126.066, + "eval_samples_per_second": 10.986, + "eval_steps_per_second": 5.497, + "step": 29000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9897003153078235e-05, + "loss": 1.2042, + "step": 29001 + }, + { + "epoch": 0.03, + "learning_rate": 4.989699603029065e-05, + "loss": 0.9688, + "step": 29002 + }, + { + "epoch": 0.03, + "learning_rate": 4.989698890725729e-05, + "loss": 0.929, + "step": 29003 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896981783978156e-05, + "loss": 0.8213, + "step": 29004 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896974660453255e-05, + "loss": 0.849, + "step": 29005 + }, + { + "epoch": 0.03, + "learning_rate": 4.989696753668258e-05, + "loss": 0.8995, + "step": 29006 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896960412666135e-05, + "loss": 1.3146, + "step": 29007 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896953288403915e-05, + "loss": 0.8484, + "step": 29008 + }, + { + "epoch": 0.03, + "learning_rate": 4.989694616389592e-05, + "loss": 1.1348, + "step": 29009 + }, + { + "epoch": 0.03, + "learning_rate": 4.989693903914215e-05, + "loss": 1.0171, + "step": 29010 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896931914142614e-05, + "loss": 1.271, + "step": 29011 + }, + { + "epoch": 0.03, + "learning_rate": 4.98969247888973e-05, + "loss": 1.4952, + "step": 29012 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896917663406226e-05, + "loss": 1.0631, + "step": 29013 + }, + { + "epoch": 0.03, + "learning_rate": 4.989691053766937e-05, + "loss": 1.0919, + "step": 29014 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896903411686745e-05, + "loss": 1.0492, + "step": 29015 + }, + { + "epoch": 0.03, + "learning_rate": 4.989689628545835e-05, + "loss": 1.1008, + "step": 29016 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896889158984184e-05, + "loss": 0.6918, + "step": 29017 + }, + { + "epoch": 0.03, + "learning_rate": 4.989688203226424e-05, + "loss": 0.7526, + "step": 29018 + }, + { + "epoch": 0.03, + "learning_rate": 4.989687490529853e-05, + "loss": 1.0927, + "step": 29019 + }, + { + "epoch": 0.03, + "learning_rate": 4.989686777808704e-05, + "loss": 0.9819, + "step": 29020 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896860650629795e-05, + "loss": 1.0669, + "step": 29021 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896853522926766e-05, + "loss": 0.9665, + "step": 29022 + }, + { + "epoch": 0.03, + "learning_rate": 4.989684639497797e-05, + "loss": 1.0373, + "step": 29023 + }, + { + "epoch": 0.03, + "learning_rate": 4.98968392667834e-05, + "loss": 1.1351, + "step": 29024 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896832138343066e-05, + "loss": 1.082, + "step": 29025 + }, + { + "epoch": 0.03, + "learning_rate": 4.989682500965695e-05, + "loss": 1.2862, + "step": 29026 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896817880725065e-05, + "loss": 0.8061, + "step": 29027 + }, + { + "epoch": 0.03, + "learning_rate": 4.989681075154742e-05, + "loss": 1.0785, + "step": 29028 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896803622124e-05, + "loss": 1.1208, + "step": 29029 + }, + { + "epoch": 0.03, + "learning_rate": 4.98967964924548e-05, + "loss": 1.0553, + "step": 29030 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896789362539835e-05, + "loss": 0.8657, + "step": 29031 + }, + { + "epoch": 0.03, + "learning_rate": 4.98967822323791e-05, + "loss": 0.6038, + "step": 29032 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896775101972594e-05, + "loss": 0.9803, + "step": 29033 + }, + { + "epoch": 0.03, + "learning_rate": 4.989676797132032e-05, + "loss": 0.9201, + "step": 29034 + }, + { + "epoch": 0.03, + "learning_rate": 4.989676084042227e-05, + "loss": 0.8263, + "step": 29035 + }, + { + "epoch": 0.03, + "learning_rate": 4.989675370927845e-05, + "loss": 1.341, + "step": 29036 + }, + { + "epoch": 0.03, + "learning_rate": 4.989674657788887e-05, + "loss": 0.8832, + "step": 29037 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896739446253514e-05, + "loss": 1.0133, + "step": 29038 + }, + { + "epoch": 0.03, + "learning_rate": 4.989673231437238e-05, + "loss": 1.0844, + "step": 29039 + }, + { + "epoch": 0.03, + "learning_rate": 4.989672518224548e-05, + "loss": 0.9587, + "step": 29040 + }, + { + "epoch": 0.03, + "learning_rate": 4.989671804987282e-05, + "loss": 1.1303, + "step": 29041 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896710917254384e-05, + "loss": 0.9136, + "step": 29042 + }, + { + "epoch": 0.03, + "learning_rate": 4.989670378439017e-05, + "loss": 0.6165, + "step": 29043 + }, + { + "epoch": 0.03, + "learning_rate": 4.989669665128019e-05, + "loss": 0.6482, + "step": 29044 + }, + { + "epoch": 0.03, + "learning_rate": 4.989668951792446e-05, + "loss": 0.9987, + "step": 29045 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896682384322935e-05, + "loss": 0.9547, + "step": 29046 + }, + { + "epoch": 0.03, + "learning_rate": 4.989667525047565e-05, + "loss": 1.2747, + "step": 29047 + }, + { + "epoch": 0.03, + "learning_rate": 4.98966681163826e-05, + "loss": 1.3389, + "step": 29048 + }, + { + "epoch": 0.03, + "learning_rate": 4.989666098204377e-05, + "loss": 1.036, + "step": 29049 + }, + { + "epoch": 0.03, + "learning_rate": 4.989665384745917e-05, + "loss": 1.073, + "step": 29050 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896646712628814e-05, + "loss": 0.926, + "step": 29051 + }, + { + "epoch": 0.03, + "learning_rate": 4.989663957755268e-05, + "loss": 0.8147, + "step": 29052 + }, + { + "epoch": 0.03, + "learning_rate": 4.989663244223078e-05, + "loss": 0.9254, + "step": 29053 + }, + { + "epoch": 0.03, + "learning_rate": 4.98966253066631e-05, + "loss": 0.8017, + "step": 29054 + }, + { + "epoch": 0.03, + "learning_rate": 4.989661817084967e-05, + "loss": 0.6406, + "step": 29055 + }, + { + "epoch": 0.03, + "learning_rate": 4.989661103479046e-05, + "loss": 1.5227, + "step": 29056 + }, + { + "epoch": 0.03, + "learning_rate": 4.989660389848548e-05, + "loss": 0.8772, + "step": 29057 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896596761934734e-05, + "loss": 0.354, + "step": 29058 + }, + { + "epoch": 0.03, + "learning_rate": 4.989658962513822e-05, + "loss": 0.7528, + "step": 29059 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896582488095936e-05, + "loss": 1.3361, + "step": 29060 + }, + { + "epoch": 0.03, + "learning_rate": 4.989657535080788e-05, + "loss": 1.1203, + "step": 29061 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896568213274065e-05, + "loss": 1.0425, + "step": 29062 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896561075494474e-05, + "loss": 1.0864, + "step": 29063 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896553937469114e-05, + "loss": 1.0477, + "step": 29064 + }, + { + "epoch": 0.03, + "learning_rate": 4.989654679919799e-05, + "loss": 0.8958, + "step": 29065 + }, + { + "epoch": 0.03, + "learning_rate": 4.98965396606811e-05, + "loss": 0.3584, + "step": 29066 + }, + { + "epoch": 0.03, + "learning_rate": 4.989653252191844e-05, + "loss": 0.3127, + "step": 29067 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896525382910006e-05, + "loss": 0.6144, + "step": 29068 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896518243655805e-05, + "loss": 1.0873, + "step": 29069 + }, + { + "epoch": 0.03, + "learning_rate": 4.989651110415584e-05, + "loss": 1.0176, + "step": 29070 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896503964410105e-05, + "loss": 1.0428, + "step": 29071 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896496824418606e-05, + "loss": 1.0524, + "step": 29072 + }, + { + "epoch": 0.03, + "learning_rate": 4.989648968418134e-05, + "loss": 0.8031, + "step": 29073 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896482543698295e-05, + "loss": 0.8666, + "step": 29074 + }, + { + "epoch": 0.03, + "learning_rate": 4.989647540296949e-05, + "loss": 1.1541, + "step": 29075 + }, + { + "epoch": 0.03, + "learning_rate": 4.989646826199492e-05, + "loss": 1.0675, + "step": 29076 + }, + { + "epoch": 0.03, + "learning_rate": 4.989646112077458e-05, + "loss": 0.8997, + "step": 29077 + }, + { + "epoch": 0.03, + "learning_rate": 4.989645397930847e-05, + "loss": 0.8984, + "step": 29078 + }, + { + "epoch": 0.03, + "learning_rate": 4.98964468375966e-05, + "loss": 1.1646, + "step": 29079 + }, + { + "epoch": 0.03, + "learning_rate": 4.989643969563895e-05, + "loss": 0.9267, + "step": 29080 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896432553435546e-05, + "loss": 0.7993, + "step": 29081 + }, + { + "epoch": 0.03, + "learning_rate": 4.989642541098637e-05, + "loss": 1.1879, + "step": 29082 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896418268291426e-05, + "loss": 0.6457, + "step": 29083 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896411125350714e-05, + "loss": 0.3846, + "step": 29084 + }, + { + "epoch": 0.03, + "learning_rate": 4.989640398216424e-05, + "loss": 0.7882, + "step": 29085 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896396838732e-05, + "loss": 0.8765, + "step": 29086 + }, + { + "epoch": 0.03, + "learning_rate": 4.989638969505399e-05, + "loss": 0.9096, + "step": 29087 + }, + { + "epoch": 0.03, + "learning_rate": 4.989638255113021e-05, + "loss": 0.8133, + "step": 29088 + }, + { + "epoch": 0.03, + "learning_rate": 4.989637540696066e-05, + "loss": 0.8176, + "step": 29089 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896368262545355e-05, + "loss": 1.0449, + "step": 29090 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896361117884285e-05, + "loss": 0.9024, + "step": 29091 + }, + { + "epoch": 0.03, + "learning_rate": 4.989635397297744e-05, + "loss": 0.9412, + "step": 29092 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896346827824826e-05, + "loss": 0.9131, + "step": 29093 + }, + { + "epoch": 0.03, + "learning_rate": 4.989633968242645e-05, + "loss": 1.0466, + "step": 29094 + }, + { + "epoch": 0.03, + "learning_rate": 4.989633253678231e-05, + "loss": 0.9202, + "step": 29095 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896325390892404e-05, + "loss": 1.0047, + "step": 29096 + }, + { + "epoch": 0.03, + "learning_rate": 4.989631824475673e-05, + "loss": 0.8466, + "step": 29097 + }, + { + "epoch": 0.03, + "learning_rate": 4.989631109837529e-05, + "loss": 0.867, + "step": 29098 + }, + { + "epoch": 0.03, + "learning_rate": 4.989630395174808e-05, + "loss": 0.5349, + "step": 29099 + }, + { + "epoch": 0.03, + "learning_rate": 4.989629680487511e-05, + "loss": 0.7237, + "step": 29100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896289657756376e-05, + "loss": 0.7533, + "step": 29101 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896282510391875e-05, + "loss": 0.4468, + "step": 29102 + }, + { + "epoch": 0.03, + "learning_rate": 4.98962753627816e-05, + "loss": 1.0564, + "step": 29103 + }, + { + "epoch": 0.03, + "learning_rate": 4.989626821492557e-05, + "loss": 0.884, + "step": 29104 + }, + { + "epoch": 0.03, + "learning_rate": 4.989626106682377e-05, + "loss": 1.0782, + "step": 29105 + }, + { + "epoch": 0.03, + "learning_rate": 4.98962539184762e-05, + "loss": 1.3443, + "step": 29106 + }, + { + "epoch": 0.03, + "learning_rate": 4.989624676988287e-05, + "loss": 1.4795, + "step": 29107 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896239621043776e-05, + "loss": 0.9764, + "step": 29108 + }, + { + "epoch": 0.03, + "learning_rate": 4.989623247195892e-05, + "loss": 1.1921, + "step": 29109 + }, + { + "epoch": 0.03, + "learning_rate": 4.989622532262829e-05, + "loss": 1.174, + "step": 29110 + }, + { + "epoch": 0.03, + "learning_rate": 4.98962181730519e-05, + "loss": 1.2818, + "step": 29111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896211023229745e-05, + "loss": 1.3059, + "step": 29112 + }, + { + "epoch": 0.03, + "learning_rate": 4.989620387316182e-05, + "loss": 0.9999, + "step": 29113 + }, + { + "epoch": 0.03, + "learning_rate": 4.989619672284814e-05, + "loss": 0.9788, + "step": 29114 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896189572288685e-05, + "loss": 1.0644, + "step": 29115 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896182421483475e-05, + "loss": 0.7423, + "step": 29116 + }, + { + "epoch": 0.03, + "learning_rate": 4.989617527043249e-05, + "loss": 0.901, + "step": 29117 + }, + { + "epoch": 0.03, + "learning_rate": 4.989616811913575e-05, + "loss": 1.496, + "step": 29118 + }, + { + "epoch": 0.03, + "learning_rate": 4.989616096759324e-05, + "loss": 0.9832, + "step": 29119 + }, + { + "epoch": 0.03, + "learning_rate": 4.989615381580497e-05, + "loss": 1.0821, + "step": 29120 + }, + { + "epoch": 0.03, + "learning_rate": 4.989614666377093e-05, + "loss": 1.0936, + "step": 29121 + }, + { + "epoch": 0.03, + "learning_rate": 4.989613951149112e-05, + "loss": 1.0122, + "step": 29122 + }, + { + "epoch": 0.03, + "learning_rate": 4.989613235896556e-05, + "loss": 1.0169, + "step": 29123 + }, + { + "epoch": 0.03, + "learning_rate": 4.989612520619423e-05, + "loss": 1.4078, + "step": 29124 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896118053177136e-05, + "loss": 1.3354, + "step": 29125 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896110899914284e-05, + "loss": 0.3098, + "step": 29126 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896103746405656e-05, + "loss": 0.3136, + "step": 29127 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896096592651275e-05, + "loss": 4.1156, + "step": 29128 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896089438651124e-05, + "loss": 1.9682, + "step": 29129 + }, + { + "epoch": 0.03, + "learning_rate": 4.989608228440521e-05, + "loss": 1.1782, + "step": 29130 + }, + { + "epoch": 0.03, + "learning_rate": 4.989607512991353e-05, + "loss": 0.883, + "step": 29131 + }, + { + "epoch": 0.03, + "learning_rate": 4.989606797517609e-05, + "loss": 0.8494, + "step": 29132 + }, + { + "epoch": 0.03, + "learning_rate": 4.989606082019289e-05, + "loss": 1.2184, + "step": 29133 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896053664963916e-05, + "loss": 0.9239, + "step": 29134 + }, + { + "epoch": 0.03, + "learning_rate": 4.989604650948919e-05, + "loss": 0.4045, + "step": 29135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896039353768696e-05, + "loss": 0.1424, + "step": 29136 + }, + { + "epoch": 0.03, + "learning_rate": 4.989603219780243e-05, + "loss": 0.8851, + "step": 29137 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896025041590415e-05, + "loss": 1.1703, + "step": 29138 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896017885132636e-05, + "loss": 0.848, + "step": 29139 + }, + { + "epoch": 0.03, + "learning_rate": 4.989601072842909e-05, + "loss": 1.1787, + "step": 29140 + }, + { + "epoch": 0.03, + "learning_rate": 4.989600357147978e-05, + "loss": 0.9603, + "step": 29141 + }, + { + "epoch": 0.03, + "learning_rate": 4.989599641428471e-05, + "loss": 0.6911, + "step": 29142 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895989256843864e-05, + "loss": 0.1579, + "step": 29143 + }, + { + "epoch": 0.03, + "learning_rate": 4.989598209915727e-05, + "loss": 0.1521, + "step": 29144 + }, + { + "epoch": 0.03, + "learning_rate": 4.989597494122491e-05, + "loss": 0.3108, + "step": 29145 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895967783046785e-05, + "loss": 0.9406, + "step": 29146 + }, + { + "epoch": 0.03, + "learning_rate": 4.989596062462291e-05, + "loss": 0.8081, + "step": 29147 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895953465953254e-05, + "loss": 1.0443, + "step": 29148 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895946307037846e-05, + "loss": 1.1583, + "step": 29149 + }, + { + "epoch": 0.03, + "learning_rate": 4.989593914787667e-05, + "loss": 1.0134, + "step": 29150 + }, + { + "epoch": 0.03, + "learning_rate": 4.989593198846973e-05, + "loss": 1.0383, + "step": 29151 + }, + { + "epoch": 0.03, + "learning_rate": 4.989592482881704e-05, + "loss": 0.8247, + "step": 29152 + }, + { + "epoch": 0.03, + "learning_rate": 4.989591766891858e-05, + "loss": 1.0257, + "step": 29153 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895910508774357e-05, + "loss": 1.0188, + "step": 29154 + }, + { + "epoch": 0.03, + "learning_rate": 4.989590334838438e-05, + "loss": 1.0004, + "step": 29155 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895896187748634e-05, + "loss": 1.0485, + "step": 29156 + }, + { + "epoch": 0.03, + "learning_rate": 4.989588902686713e-05, + "loss": 1.0402, + "step": 29157 + }, + { + "epoch": 0.03, + "learning_rate": 4.989588186573986e-05, + "loss": 1.0171, + "step": 29158 + }, + { + "epoch": 0.03, + "learning_rate": 4.989587470436683e-05, + "loss": 1.0026, + "step": 29159 + }, + { + "epoch": 0.03, + "learning_rate": 4.989586754274803e-05, + "loss": 1.2145, + "step": 29160 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895860380883484e-05, + "loss": 1.0712, + "step": 29161 + }, + { + "epoch": 0.03, + "learning_rate": 4.989585321877317e-05, + "loss": 1.0376, + "step": 29162 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895846056417094e-05, + "loss": 0.9398, + "step": 29163 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895838893815256e-05, + "loss": 0.8942, + "step": 29164 + }, + { + "epoch": 0.03, + "learning_rate": 4.989583173096766e-05, + "loss": 2.1347, + "step": 29165 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895824567874296e-05, + "loss": 1.1978, + "step": 29166 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895817404535174e-05, + "loss": 0.7928, + "step": 29167 + }, + { + "epoch": 0.03, + "learning_rate": 4.989581024095029e-05, + "loss": 1.214, + "step": 29168 + }, + { + "epoch": 0.03, + "learning_rate": 4.989580307711965e-05, + "loss": 1.015, + "step": 29169 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895795913043245e-05, + "loss": 0.76, + "step": 29170 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895788748721084e-05, + "loss": 1.1821, + "step": 29171 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895781584153154e-05, + "loss": 0.4537, + "step": 29172 + }, + { + "epoch": 0.03, + "learning_rate": 4.989577441933947e-05, + "loss": 0.8343, + "step": 29173 + }, + { + "epoch": 0.03, + "learning_rate": 4.989576725428002e-05, + "loss": 1.0043, + "step": 29174 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895760088974816e-05, + "loss": 0.9862, + "step": 29175 + }, + { + "epoch": 0.03, + "learning_rate": 4.989575292342385e-05, + "loss": 1.0313, + "step": 29176 + }, + { + "epoch": 0.03, + "learning_rate": 4.989574575762712e-05, + "loss": 0.8568, + "step": 29177 + }, + { + "epoch": 0.03, + "learning_rate": 4.989573859158464e-05, + "loss": 1.2798, + "step": 29178 + }, + { + "epoch": 0.03, + "learning_rate": 4.989573142529639e-05, + "loss": 0.8715, + "step": 29179 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895724258762375e-05, + "loss": 0.8769, + "step": 29180 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895717091982605e-05, + "loss": 0.8463, + "step": 29181 + }, + { + "epoch": 0.03, + "learning_rate": 4.989570992495708e-05, + "loss": 1.1206, + "step": 29182 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895702757685794e-05, + "loss": 1.2376, + "step": 29183 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895695590168746e-05, + "loss": 1.3365, + "step": 29184 + }, + { + "epoch": 0.03, + "learning_rate": 4.989568842240594e-05, + "loss": 0.9084, + "step": 29185 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895681254397366e-05, + "loss": 0.9741, + "step": 29186 + }, + { + "epoch": 0.03, + "learning_rate": 4.989567408614304e-05, + "loss": 0.9876, + "step": 29187 + }, + { + "epoch": 0.03, + "learning_rate": 4.989566691764295e-05, + "loss": 0.7732, + "step": 29188 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895659748897104e-05, + "loss": 0.7497, + "step": 29189 + }, + { + "epoch": 0.03, + "learning_rate": 4.98956525799055e-05, + "loss": 0.9963, + "step": 29190 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895645410668136e-05, + "loss": 0.9801, + "step": 29191 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895638241185e-05, + "loss": 0.9852, + "step": 29192 + }, + { + "epoch": 0.03, + "learning_rate": 4.989563107145613e-05, + "loss": 1.5967, + "step": 29193 + }, + { + "epoch": 0.03, + "learning_rate": 4.989562390148148e-05, + "loss": 1.0507, + "step": 29194 + }, + { + "epoch": 0.03, + "learning_rate": 4.989561673126108e-05, + "loss": 0.865, + "step": 29195 + }, + { + "epoch": 0.03, + "learning_rate": 4.989560956079492e-05, + "loss": 0.9105, + "step": 29196 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895602390082995e-05, + "loss": 0.9884, + "step": 29197 + }, + { + "epoch": 0.03, + "learning_rate": 4.989559521912532e-05, + "loss": 0.8667, + "step": 29198 + }, + { + "epoch": 0.03, + "learning_rate": 4.989558804792188e-05, + "loss": 0.8648, + "step": 29199 + }, + { + "epoch": 0.03, + "learning_rate": 4.989558087647268e-05, + "loss": 0.9377, + "step": 29200 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895573704777724e-05, + "loss": 1.1753, + "step": 29201 + }, + { + "epoch": 0.03, + "learning_rate": 4.989556653283701e-05, + "loss": 0.8946, + "step": 29202 + }, + { + "epoch": 0.03, + "learning_rate": 4.989555936065054e-05, + "loss": 1.2819, + "step": 29203 + }, + { + "epoch": 0.03, + "learning_rate": 4.989555218821831e-05, + "loss": 0.7089, + "step": 29204 + }, + { + "epoch": 0.03, + "learning_rate": 4.989554501554032e-05, + "loss": 0.7565, + "step": 29205 + }, + { + "epoch": 0.03, + "learning_rate": 4.989553784261657e-05, + "loss": 1.1422, + "step": 29206 + }, + { + "epoch": 0.03, + "learning_rate": 4.989553066944707e-05, + "loss": 1.0805, + "step": 29207 + }, + { + "epoch": 0.03, + "learning_rate": 4.98955234960318e-05, + "loss": 1.0336, + "step": 29208 + }, + { + "epoch": 0.03, + "learning_rate": 4.989551632237078e-05, + "loss": 0.8886, + "step": 29209 + }, + { + "epoch": 0.03, + "learning_rate": 4.989550914846399e-05, + "loss": 0.8117, + "step": 29210 + }, + { + "epoch": 0.03, + "learning_rate": 4.989550197431145e-05, + "loss": 1.1089, + "step": 29211 + }, + { + "epoch": 0.03, + "learning_rate": 4.989549479991316e-05, + "loss": 1.0928, + "step": 29212 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895487625269105e-05, + "loss": 1.1038, + "step": 29213 + }, + { + "epoch": 0.03, + "learning_rate": 4.989548045037929e-05, + "loss": 0.8486, + "step": 29214 + }, + { + "epoch": 0.03, + "learning_rate": 4.989547327524372e-05, + "loss": 1.0194, + "step": 29215 + }, + { + "epoch": 0.03, + "learning_rate": 4.98954660998624e-05, + "loss": 0.9102, + "step": 29216 + }, + { + "epoch": 0.03, + "learning_rate": 4.989545892423531e-05, + "loss": 0.9763, + "step": 29217 + }, + { + "epoch": 0.03, + "learning_rate": 4.989545174836247e-05, + "loss": 0.9738, + "step": 29218 + }, + { + "epoch": 0.03, + "learning_rate": 4.989544457224387e-05, + "loss": 1.128, + "step": 29219 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895437395879514e-05, + "loss": 1.0363, + "step": 29220 + }, + { + "epoch": 0.03, + "learning_rate": 4.98954302192694e-05, + "loss": 1.3288, + "step": 29221 + }, + { + "epoch": 0.03, + "learning_rate": 4.989542304241353e-05, + "loss": 1.1105, + "step": 29222 + }, + { + "epoch": 0.03, + "learning_rate": 4.98954158653119e-05, + "loss": 0.7951, + "step": 29223 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895408687964515e-05, + "loss": 1.1573, + "step": 29224 + }, + { + "epoch": 0.03, + "learning_rate": 4.989540151037137e-05, + "loss": 0.7594, + "step": 29225 + }, + { + "epoch": 0.03, + "learning_rate": 4.989539433253247e-05, + "loss": 1.0222, + "step": 29226 + }, + { + "epoch": 0.03, + "learning_rate": 4.989538715444781e-05, + "loss": 0.7839, + "step": 29227 + }, + { + "epoch": 0.03, + "learning_rate": 4.989537997611739e-05, + "loss": 0.7927, + "step": 29228 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895372797541226e-05, + "loss": 1.049, + "step": 29229 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895365618719303e-05, + "loss": 0.9789, + "step": 29230 + }, + { + "epoch": 0.03, + "learning_rate": 4.989535843965162e-05, + "loss": 0.6173, + "step": 29231 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895351260338174e-05, + "loss": 0.8645, + "step": 29232 + }, + { + "epoch": 0.03, + "learning_rate": 4.989534408077898e-05, + "loss": 0.7539, + "step": 29233 + }, + { + "epoch": 0.03, + "learning_rate": 4.989533690097403e-05, + "loss": 0.5883, + "step": 29234 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895329720923324e-05, + "loss": 1.173, + "step": 29235 + }, + { + "epoch": 0.03, + "learning_rate": 4.989532254062685e-05, + "loss": 0.8586, + "step": 29236 + }, + { + "epoch": 0.03, + "learning_rate": 4.989531536008464e-05, + "loss": 0.8761, + "step": 29237 + }, + { + "epoch": 0.03, + "learning_rate": 4.989530817929666e-05, + "loss": 0.437, + "step": 29238 + }, + { + "epoch": 0.03, + "learning_rate": 4.989530099826293e-05, + "loss": 0.3464, + "step": 29239 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895293816983434e-05, + "loss": 0.2199, + "step": 29240 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895286635458196e-05, + "loss": 0.1708, + "step": 29241 + }, + { + "epoch": 0.03, + "learning_rate": 4.989527945368719e-05, + "loss": 0.1026, + "step": 29242 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895272271670436e-05, + "loss": 0.1849, + "step": 29243 + }, + { + "epoch": 0.03, + "learning_rate": 4.989526508940792e-05, + "loss": 0.9639, + "step": 29244 + }, + { + "epoch": 0.03, + "learning_rate": 4.989525790689965e-05, + "loss": 0.7573, + "step": 29245 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895250724145624e-05, + "loss": 0.8215, + "step": 29246 + }, + { + "epoch": 0.03, + "learning_rate": 4.989524354114585e-05, + "loss": 0.8524, + "step": 29247 + }, + { + "epoch": 0.03, + "learning_rate": 4.989523635790032e-05, + "loss": 1.6066, + "step": 29248 + }, + { + "epoch": 0.03, + "learning_rate": 4.989522917440903e-05, + "loss": 1.6906, + "step": 29249 + }, + { + "epoch": 0.03, + "learning_rate": 4.989522199067198e-05, + "loss": 0.4315, + "step": 29250 + }, + { + "epoch": 0.03, + "learning_rate": 4.989521480668918e-05, + "loss": 0.177, + "step": 29251 + }, + { + "epoch": 0.03, + "learning_rate": 4.989520762246063e-05, + "loss": 0.119, + "step": 29252 + }, + { + "epoch": 0.03, + "learning_rate": 4.989520043798632e-05, + "loss": 0.0758, + "step": 29253 + }, + { + "epoch": 0.03, + "learning_rate": 4.989519325326626e-05, + "loss": 0.0594, + "step": 29254 + }, + { + "epoch": 0.03, + "learning_rate": 4.989518606830044e-05, + "loss": 0.0302, + "step": 29255 + }, + { + "epoch": 0.03, + "learning_rate": 4.989517888308886e-05, + "loss": 0.4339, + "step": 29256 + }, + { + "epoch": 0.03, + "learning_rate": 4.989517169763153e-05, + "loss": 0.9214, + "step": 29257 + }, + { + "epoch": 0.03, + "learning_rate": 4.989516451192845e-05, + "loss": 1.5124, + "step": 29258 + }, + { + "epoch": 0.03, + "learning_rate": 4.989515732597961e-05, + "loss": 1.1203, + "step": 29259 + }, + { + "epoch": 0.03, + "learning_rate": 4.989515013978502e-05, + "loss": 1.3431, + "step": 29260 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895142953344673e-05, + "loss": 1.1845, + "step": 29261 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895135766658565e-05, + "loss": 0.9739, + "step": 29262 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895128579726716e-05, + "loss": 1.1506, + "step": 29263 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895121392549105e-05, + "loss": 0.7151, + "step": 29264 + }, + { + "epoch": 0.03, + "learning_rate": 4.989511420512574e-05, + "loss": 0.7792, + "step": 29265 + }, + { + "epoch": 0.03, + "learning_rate": 4.989510701745662e-05, + "loss": 0.752, + "step": 29266 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895099829541744e-05, + "loss": 0.7978, + "step": 29267 + }, + { + "epoch": 0.03, + "learning_rate": 4.989509264138113e-05, + "loss": 0.205, + "step": 29268 + }, + { + "epoch": 0.03, + "learning_rate": 4.989508545297474e-05, + "loss": 0.1651, + "step": 29269 + }, + { + "epoch": 0.03, + "learning_rate": 4.989507826432261e-05, + "loss": 0.1175, + "step": 29270 + }, + { + "epoch": 0.03, + "learning_rate": 4.989507107542472e-05, + "loss": 0.1591, + "step": 29271 + }, + { + "epoch": 0.03, + "learning_rate": 4.989506388628108e-05, + "loss": 0.6633, + "step": 29272 + }, + { + "epoch": 0.03, + "learning_rate": 4.9895056696891684e-05, + "loss": 1.0926, + "step": 29273 + }, + { + "epoch": 0.03, + "learning_rate": 4.989504950725654e-05, + "loss": 0.9763, + "step": 29274 + }, + { + "epoch": 0.03, + "learning_rate": 4.989504231737564e-05, + "loss": 1.3217, + "step": 29275 + }, + { + "epoch": 0.03, + "learning_rate": 4.989503512724898e-05, + "loss": 1.1074, + "step": 29276 + }, + { + "epoch": 0.03, + "learning_rate": 4.989502793687657e-05, + "loss": 0.849, + "step": 29277 + }, + { + "epoch": 0.03, + "learning_rate": 4.989502074625841e-05, + "loss": 1.3484, + "step": 29278 + }, + { + "epoch": 0.03, + "learning_rate": 4.98950135553945e-05, + "loss": 1.0595, + "step": 29279 + }, + { + "epoch": 0.03, + "learning_rate": 4.989500636428483e-05, + "loss": 1.0959, + "step": 29280 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894999172929406e-05, + "loss": 1.2172, + "step": 29281 + }, + { + "epoch": 0.03, + "learning_rate": 4.989499198132823e-05, + "loss": 1.1578, + "step": 29282 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894984789481306e-05, + "loss": 1.1875, + "step": 29283 + }, + { + "epoch": 0.03, + "learning_rate": 4.989497759738863e-05, + "loss": 0.8455, + "step": 29284 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894970405050194e-05, + "loss": 1.1169, + "step": 29285 + }, + { + "epoch": 0.03, + "learning_rate": 4.989496321246601e-05, + "loss": 0.8254, + "step": 29286 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894956019636076e-05, + "loss": 0.9896, + "step": 29287 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894948826560385e-05, + "loss": 1.0347, + "step": 29288 + }, + { + "epoch": 0.03, + "learning_rate": 4.989494163323894e-05, + "loss": 0.7141, + "step": 29289 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894934439671745e-05, + "loss": 1.2202, + "step": 29290 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894927245858804e-05, + "loss": 0.9508, + "step": 29291 + }, + { + "epoch": 0.03, + "learning_rate": 4.98949200518001e-05, + "loss": 0.8317, + "step": 29292 + }, + { + "epoch": 0.03, + "learning_rate": 4.989491285749565e-05, + "loss": 0.8957, + "step": 29293 + }, + { + "epoch": 0.03, + "learning_rate": 4.989490566294544e-05, + "loss": 0.6932, + "step": 29294 + }, + { + "epoch": 0.03, + "learning_rate": 4.989489846814949e-05, + "loss": 1.2097, + "step": 29295 + }, + { + "epoch": 0.03, + "learning_rate": 4.989489127310778e-05, + "loss": 1.155, + "step": 29296 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894884077820324e-05, + "loss": 0.8829, + "step": 29297 + }, + { + "epoch": 0.03, + "learning_rate": 4.989487688228711e-05, + "loss": 1.0818, + "step": 29298 + }, + { + "epoch": 0.03, + "learning_rate": 4.989486968650815e-05, + "loss": 1.4986, + "step": 29299 + }, + { + "epoch": 0.03, + "learning_rate": 4.989486249048343e-05, + "loss": 0.9708, + "step": 29300 + }, + { + "epoch": 0.03, + "learning_rate": 4.989485529421296e-05, + "loss": 1.179, + "step": 29301 + }, + { + "epoch": 0.03, + "learning_rate": 4.989484809769675e-05, + "loss": 1.2555, + "step": 29302 + }, + { + "epoch": 0.03, + "learning_rate": 4.989484090093478e-05, + "loss": 0.3215, + "step": 29303 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894833703927055e-05, + "loss": 0.9909, + "step": 29304 + }, + { + "epoch": 0.03, + "learning_rate": 4.989482650667359e-05, + "loss": 0.7572, + "step": 29305 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894819309174365e-05, + "loss": 0.9826, + "step": 29306 + }, + { + "epoch": 0.03, + "learning_rate": 4.989481211142939e-05, + "loss": 0.9851, + "step": 29307 + }, + { + "epoch": 0.03, + "learning_rate": 4.989480491343866e-05, + "loss": 1.0092, + "step": 29308 + }, + { + "epoch": 0.03, + "learning_rate": 4.989479771520219e-05, + "loss": 1.1625, + "step": 29309 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894790516719955e-05, + "loss": 0.9372, + "step": 29310 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894783317991986e-05, + "loss": 0.9822, + "step": 29311 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894776119018256e-05, + "loss": 1.0692, + "step": 29312 + }, + { + "epoch": 0.03, + "learning_rate": 4.989476891979877e-05, + "loss": 1.051, + "step": 29313 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894761720333544e-05, + "loss": 0.9704, + "step": 29314 + }, + { + "epoch": 0.03, + "learning_rate": 4.989475452062256e-05, + "loss": 1.0557, + "step": 29315 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894747320665834e-05, + "loss": 0.7574, + "step": 29316 + }, + { + "epoch": 0.03, + "learning_rate": 4.989474012046335e-05, + "loss": 1.152, + "step": 29317 + }, + { + "epoch": 0.03, + "learning_rate": 4.989473292001512e-05, + "loss": 1.0923, + "step": 29318 + }, + { + "epoch": 0.03, + "learning_rate": 4.989472571932113e-05, + "loss": 1.1042, + "step": 29319 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894718518381404e-05, + "loss": 0.6948, + "step": 29320 + }, + { + "epoch": 0.03, + "learning_rate": 4.989471131719592e-05, + "loss": 0.9491, + "step": 29321 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894704115764684e-05, + "loss": 0.9543, + "step": 29322 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894696914087705e-05, + "loss": 0.7987, + "step": 29323 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894689712164965e-05, + "loss": 1.0699, + "step": 29324 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894682509996484e-05, + "loss": 0.9408, + "step": 29325 + }, + { + "epoch": 0.03, + "learning_rate": 4.989467530758225e-05, + "loss": 1.0876, + "step": 29326 + }, + { + "epoch": 0.03, + "learning_rate": 4.989466810492227e-05, + "loss": 1.0953, + "step": 29327 + }, + { + "epoch": 0.03, + "learning_rate": 4.989466090201654e-05, + "loss": 0.7599, + "step": 29328 + }, + { + "epoch": 0.03, + "learning_rate": 4.989465369886506e-05, + "loss": 0.4469, + "step": 29329 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894646495467824e-05, + "loss": 0.806, + "step": 29330 + }, + { + "epoch": 0.03, + "learning_rate": 4.989463929182484e-05, + "loss": 0.6327, + "step": 29331 + }, + { + "epoch": 0.03, + "learning_rate": 4.989463208793611e-05, + "loss": 0.9187, + "step": 29332 + }, + { + "epoch": 0.03, + "learning_rate": 4.989462488380163e-05, + "loss": 1.053, + "step": 29333 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894617679421405e-05, + "loss": 0.8819, + "step": 29334 + }, + { + "epoch": 0.03, + "learning_rate": 4.989461047479543e-05, + "loss": 0.98, + "step": 29335 + }, + { + "epoch": 0.03, + "learning_rate": 4.98946032699237e-05, + "loss": 1.0498, + "step": 29336 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894596064806224e-05, + "loss": 0.8612, + "step": 29337 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894588859443e-05, + "loss": 1.035, + "step": 29338 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894581653834025e-05, + "loss": 0.96, + "step": 29339 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894574447979304e-05, + "loss": 0.8642, + "step": 29340 + }, + { + "epoch": 0.03, + "learning_rate": 4.989456724187883e-05, + "loss": 0.7683, + "step": 29341 + }, + { + "epoch": 0.03, + "learning_rate": 4.989456003553261e-05, + "loss": 0.7102, + "step": 29342 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894552828940646e-05, + "loss": 0.8897, + "step": 29343 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894545622102926e-05, + "loss": 0.9385, + "step": 29344 + }, + { + "epoch": 0.03, + "learning_rate": 4.989453841501946e-05, + "loss": 1.0232, + "step": 29345 + }, + { + "epoch": 0.03, + "learning_rate": 4.989453120769024e-05, + "loss": 1.014, + "step": 29346 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894524000115285e-05, + "loss": 0.8275, + "step": 29347 + }, + { + "epoch": 0.03, + "learning_rate": 4.989451679229457e-05, + "loss": 0.9315, + "step": 29348 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894509584228113e-05, + "loss": 1.2444, + "step": 29349 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894502375915906e-05, + "loss": 1.0169, + "step": 29350 + }, + { + "epoch": 0.03, + "learning_rate": 4.989449516735794e-05, + "loss": 0.8404, + "step": 29351 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894487958554246e-05, + "loss": 0.6489, + "step": 29352 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894480749504794e-05, + "loss": 1.3753, + "step": 29353 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894473540209595e-05, + "loss": 0.8628, + "step": 29354 + }, + { + "epoch": 0.03, + "learning_rate": 4.989446633066865e-05, + "loss": 0.9753, + "step": 29355 + }, + { + "epoch": 0.03, + "learning_rate": 4.989445912088195e-05, + "loss": 1.0818, + "step": 29356 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894451910849515e-05, + "loss": 1.1394, + "step": 29357 + }, + { + "epoch": 0.03, + "learning_rate": 4.989444470057132e-05, + "loss": 0.8914, + "step": 29358 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894437490047384e-05, + "loss": 1.003, + "step": 29359 + }, + { + "epoch": 0.03, + "learning_rate": 4.98944302792777e-05, + "loss": 0.7021, + "step": 29360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894423068262274e-05, + "loss": 0.8251, + "step": 29361 + }, + { + "epoch": 0.03, + "learning_rate": 4.989441585700109e-05, + "loss": 0.4326, + "step": 29362 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894408645494166e-05, + "loss": 0.3051, + "step": 29363 + }, + { + "epoch": 0.03, + "learning_rate": 4.989440143374149e-05, + "loss": 0.2911, + "step": 29364 + }, + { + "epoch": 0.03, + "learning_rate": 4.989439422174307e-05, + "loss": 0.2458, + "step": 29365 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894387009498904e-05, + "loss": 0.2318, + "step": 29366 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894379797008994e-05, + "loss": 0.351, + "step": 29367 + }, + { + "epoch": 0.03, + "learning_rate": 4.989437258427333e-05, + "loss": 0.2303, + "step": 29368 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894365371291924e-05, + "loss": 0.2579, + "step": 29369 + }, + { + "epoch": 0.03, + "learning_rate": 4.989435815806477e-05, + "loss": 0.2376, + "step": 29370 + }, + { + "epoch": 0.03, + "learning_rate": 4.989435094459187e-05, + "loss": 0.2198, + "step": 29371 + }, + { + "epoch": 0.03, + "learning_rate": 4.989434373087322e-05, + "loss": 1.1669, + "step": 29372 + }, + { + "epoch": 0.03, + "learning_rate": 4.989433651690882e-05, + "loss": 1.2567, + "step": 29373 + }, + { + "epoch": 0.03, + "learning_rate": 4.989432930269868e-05, + "loss": 0.7841, + "step": 29374 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894322088242796e-05, + "loss": 1.3161, + "step": 29375 + }, + { + "epoch": 0.03, + "learning_rate": 4.989431487354117e-05, + "loss": 0.882, + "step": 29376 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894307658593785e-05, + "loss": 0.739, + "step": 29377 + }, + { + "epoch": 0.03, + "learning_rate": 4.989430044340067e-05, + "loss": 0.9347, + "step": 29378 + }, + { + "epoch": 0.03, + "learning_rate": 4.989429322796179e-05, + "loss": 1.0013, + "step": 29379 + }, + { + "epoch": 0.03, + "learning_rate": 4.989428601227717e-05, + "loss": 1.3179, + "step": 29380 + }, + { + "epoch": 0.03, + "learning_rate": 4.989427879634681e-05, + "loss": 0.9084, + "step": 29381 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894271580170706e-05, + "loss": 1.1538, + "step": 29382 + }, + { + "epoch": 0.03, + "learning_rate": 4.989426436374884e-05, + "loss": 1.0696, + "step": 29383 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894257147081244e-05, + "loss": 1.0778, + "step": 29384 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894249930167904e-05, + "loss": 0.8512, + "step": 29385 + }, + { + "epoch": 0.03, + "learning_rate": 4.989424271300881e-05, + "loss": 1.043, + "step": 29386 + }, + { + "epoch": 0.03, + "learning_rate": 4.989423549560397e-05, + "loss": 0.8696, + "step": 29387 + }, + { + "epoch": 0.03, + "learning_rate": 4.989422827795339e-05, + "loss": 0.7935, + "step": 29388 + }, + { + "epoch": 0.03, + "learning_rate": 4.989422106005706e-05, + "loss": 0.5885, + "step": 29389 + }, + { + "epoch": 0.03, + "learning_rate": 4.989421384191499e-05, + "loss": 0.9029, + "step": 29390 + }, + { + "epoch": 0.03, + "learning_rate": 4.989420662352717e-05, + "loss": 0.6442, + "step": 29391 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894199404893605e-05, + "loss": 0.5084, + "step": 29392 + }, + { + "epoch": 0.03, + "learning_rate": 4.98941921860143e-05, + "loss": 0.4674, + "step": 29393 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894184966889245e-05, + "loss": 0.6737, + "step": 29394 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894177747518446e-05, + "loss": 0.9451, + "step": 29395 + }, + { + "epoch": 0.03, + "learning_rate": 4.98941705279019e-05, + "loss": 1.289, + "step": 29396 + }, + { + "epoch": 0.03, + "learning_rate": 4.989416330803962e-05, + "loss": 0.9939, + "step": 29397 + }, + { + "epoch": 0.03, + "learning_rate": 4.989415608793158e-05, + "loss": 0.8534, + "step": 29398 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894148867577804e-05, + "loss": 1.1014, + "step": 29399 + }, + { + "epoch": 0.03, + "learning_rate": 4.989414164697828e-05, + "loss": 0.7335, + "step": 29400 + }, + { + "epoch": 0.03, + "learning_rate": 4.989413442613301e-05, + "loss": 0.9387, + "step": 29401 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894127205042005e-05, + "loss": 0.4847, + "step": 29402 + }, + { + "epoch": 0.03, + "learning_rate": 4.989411998370525e-05, + "loss": 1.0982, + "step": 29403 + }, + { + "epoch": 0.03, + "learning_rate": 4.989411276212275e-05, + "loss": 0.8505, + "step": 29404 + }, + { + "epoch": 0.03, + "learning_rate": 4.989410554029451e-05, + "loss": 1.1021, + "step": 29405 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894098318220515e-05, + "loss": 1.0102, + "step": 29406 + }, + { + "epoch": 0.03, + "learning_rate": 4.989409109590079e-05, + "loss": 1.0886, + "step": 29407 + }, + { + "epoch": 0.03, + "learning_rate": 4.989408387333531e-05, + "loss": 1.1411, + "step": 29408 + }, + { + "epoch": 0.03, + "learning_rate": 4.989407665052409e-05, + "loss": 1.0579, + "step": 29409 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894069427467126e-05, + "loss": 0.9267, + "step": 29410 + }, + { + "epoch": 0.03, + "learning_rate": 4.989406220416443e-05, + "loss": 0.9765, + "step": 29411 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894054980615974e-05, + "loss": 0.5028, + "step": 29412 + }, + { + "epoch": 0.03, + "learning_rate": 4.989404775682178e-05, + "loss": 0.7599, + "step": 29413 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894040532781836e-05, + "loss": 0.8948, + "step": 29414 + }, + { + "epoch": 0.03, + "learning_rate": 4.989403330849616e-05, + "loss": 1.1558, + "step": 29415 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894026083964734e-05, + "loss": 0.6147, + "step": 29416 + }, + { + "epoch": 0.03, + "learning_rate": 4.989401885918756e-05, + "loss": 1.056, + "step": 29417 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894011634164654e-05, + "loss": 1.095, + "step": 29418 + }, + { + "epoch": 0.03, + "learning_rate": 4.9894004408896e-05, + "loss": 0.8667, + "step": 29419 + }, + { + "epoch": 0.03, + "learning_rate": 4.98939971833816e-05, + "loss": 0.929, + "step": 29420 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893989957621464e-05, + "loss": 0.8799, + "step": 29421 + }, + { + "epoch": 0.03, + "learning_rate": 4.989398273161557e-05, + "loss": 1.1629, + "step": 29422 + }, + { + "epoch": 0.03, + "learning_rate": 4.989397550536395e-05, + "loss": 0.8678, + "step": 29423 + }, + { + "epoch": 0.03, + "learning_rate": 4.989396827886658e-05, + "loss": 0.9394, + "step": 29424 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893961052123474e-05, + "loss": 0.9161, + "step": 29425 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893953825134616e-05, + "loss": 1.0829, + "step": 29426 + }, + { + "epoch": 0.03, + "learning_rate": 4.989394659790002e-05, + "loss": 0.9521, + "step": 29427 + }, + { + "epoch": 0.03, + "learning_rate": 4.989393937041967e-05, + "loss": 0.8078, + "step": 29428 + }, + { + "epoch": 0.03, + "learning_rate": 4.98939321426936e-05, + "loss": 1.1222, + "step": 29429 + }, + { + "epoch": 0.03, + "learning_rate": 4.989392491472177e-05, + "loss": 1.047, + "step": 29430 + }, + { + "epoch": 0.03, + "learning_rate": 4.98939176865042e-05, + "loss": 0.9694, + "step": 29431 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893910458040894e-05, + "loss": 0.7196, + "step": 29432 + }, + { + "epoch": 0.03, + "learning_rate": 4.989390322933184e-05, + "loss": 0.6405, + "step": 29433 + }, + { + "epoch": 0.03, + "learning_rate": 4.989389600037705e-05, + "loss": 0.651, + "step": 29434 + }, + { + "epoch": 0.03, + "learning_rate": 4.989388877117651e-05, + "loss": 0.4802, + "step": 29435 + }, + { + "epoch": 0.03, + "learning_rate": 4.989388154173023e-05, + "loss": 0.5764, + "step": 29436 + }, + { + "epoch": 0.03, + "learning_rate": 4.989387431203821e-05, + "loss": 0.5383, + "step": 29437 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893867082100454e-05, + "loss": 0.6518, + "step": 29438 + }, + { + "epoch": 0.03, + "learning_rate": 4.989385985191695e-05, + "loss": 0.7855, + "step": 29439 + }, + { + "epoch": 0.03, + "learning_rate": 4.98938526214877e-05, + "loss": 0.9099, + "step": 29440 + }, + { + "epoch": 0.03, + "learning_rate": 4.989384539081272e-05, + "loss": 1.2821, + "step": 29441 + }, + { + "epoch": 0.03, + "learning_rate": 4.989383815989199e-05, + "loss": 0.9639, + "step": 29442 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893830928725516e-05, + "loss": 1.1148, + "step": 29443 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893823697313304e-05, + "loss": 0.9673, + "step": 29444 + }, + { + "epoch": 0.03, + "learning_rate": 4.989381646565535e-05, + "loss": 0.5411, + "step": 29445 + }, + { + "epoch": 0.03, + "learning_rate": 4.989380923375166e-05, + "loss": 1.6845, + "step": 29446 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893802001602226e-05, + "loss": 1.2677, + "step": 29447 + }, + { + "epoch": 0.03, + "learning_rate": 4.989379476920705e-05, + "loss": 0.7428, + "step": 29448 + }, + { + "epoch": 0.03, + "learning_rate": 4.989378753656613e-05, + "loss": 0.7558, + "step": 29449 + }, + { + "epoch": 0.03, + "learning_rate": 4.989378030367947e-05, + "loss": 0.4081, + "step": 29450 + }, + { + "epoch": 0.03, + "learning_rate": 4.989377307054707e-05, + "loss": 0.9302, + "step": 29451 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893765837168934e-05, + "loss": 0.9861, + "step": 29452 + }, + { + "epoch": 0.03, + "learning_rate": 4.989375860354505e-05, + "loss": 1.0523, + "step": 29453 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893751369675433e-05, + "loss": 0.6775, + "step": 29454 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893744135560075e-05, + "loss": 0.5099, + "step": 29455 + }, + { + "epoch": 0.03, + "learning_rate": 4.989373690119896e-05, + "loss": 1.2656, + "step": 29456 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893729666592126e-05, + "loss": 0.5586, + "step": 29457 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893722431739544e-05, + "loss": 0.9869, + "step": 29458 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893715196641213e-05, + "loss": 0.9622, + "step": 29459 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893707961297155e-05, + "loss": 1.0984, + "step": 29460 + }, + { + "epoch": 0.03, + "learning_rate": 4.989370072570735e-05, + "loss": 1.3423, + "step": 29461 + }, + { + "epoch": 0.03, + "learning_rate": 4.98936934898718e-05, + "loss": 1.3087, + "step": 29462 + }, + { + "epoch": 0.03, + "learning_rate": 4.989368625379052e-05, + "loss": 0.9516, + "step": 29463 + }, + { + "epoch": 0.03, + "learning_rate": 4.989367901746349e-05, + "loss": 0.7328, + "step": 29464 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893671780890727e-05, + "loss": 0.6059, + "step": 29465 + }, + { + "epoch": 0.03, + "learning_rate": 4.989366454407223e-05, + "loss": 1.2643, + "step": 29466 + }, + { + "epoch": 0.03, + "learning_rate": 4.989365730700798e-05, + "loss": 1.1962, + "step": 29467 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893650069697994e-05, + "loss": 0.9147, + "step": 29468 + }, + { + "epoch": 0.03, + "learning_rate": 4.989364283214227e-05, + "loss": 1.1372, + "step": 29469 + }, + { + "epoch": 0.03, + "learning_rate": 4.989363559434081e-05, + "loss": 0.9389, + "step": 29470 + }, + { + "epoch": 0.03, + "learning_rate": 4.989362835629361e-05, + "loss": 0.8516, + "step": 29471 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893621118000665e-05, + "loss": 0.9088, + "step": 29472 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893613879461985e-05, + "loss": 0.9734, + "step": 29473 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893606640677564e-05, + "loss": 0.8195, + "step": 29474 + }, + { + "epoch": 0.03, + "learning_rate": 4.989359940164741e-05, + "loss": 1.0494, + "step": 29475 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893592162371504e-05, + "loss": 1.0731, + "step": 29476 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893584922849866e-05, + "loss": 1.0533, + "step": 29477 + }, + { + "epoch": 0.03, + "learning_rate": 4.989357768308249e-05, + "loss": 0.932, + "step": 29478 + }, + { + "epoch": 0.03, + "learning_rate": 4.989357044306938e-05, + "loss": 0.7419, + "step": 29479 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893563202810525e-05, + "loss": 0.8148, + "step": 29480 + }, + { + "epoch": 0.03, + "learning_rate": 4.989355596230593e-05, + "loss": 1.437, + "step": 29481 + }, + { + "epoch": 0.03, + "learning_rate": 4.989354872155559e-05, + "loss": 1.2238, + "step": 29482 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893541480559526e-05, + "loss": 0.8273, + "step": 29483 + }, + { + "epoch": 0.03, + "learning_rate": 4.989353423931772e-05, + "loss": 0.7372, + "step": 29484 + }, + { + "epoch": 0.03, + "learning_rate": 4.989352699783017e-05, + "loss": 1.0574, + "step": 29485 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893519756096884e-05, + "loss": 1.2833, + "step": 29486 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893512514117854e-05, + "loss": 1.1901, + "step": 29487 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893505271893097e-05, + "loss": 1.1349, + "step": 29488 + }, + { + "epoch": 0.03, + "learning_rate": 4.98934980294226e-05, + "loss": 0.4467, + "step": 29489 + }, + { + "epoch": 0.03, + "learning_rate": 4.989349078670635e-05, + "loss": 0.9104, + "step": 29490 + }, + { + "epoch": 0.03, + "learning_rate": 4.989348354374438e-05, + "loss": 1.0715, + "step": 29491 + }, + { + "epoch": 0.03, + "learning_rate": 4.989347630053667e-05, + "loss": 1.1724, + "step": 29492 + }, + { + "epoch": 0.03, + "learning_rate": 4.989346905708321e-05, + "loss": 1.1202, + "step": 29493 + }, + { + "epoch": 0.03, + "learning_rate": 4.989346181338402e-05, + "loss": 1.0046, + "step": 29494 + }, + { + "epoch": 0.03, + "learning_rate": 4.989345456943909e-05, + "loss": 1.161, + "step": 29495 + }, + { + "epoch": 0.03, + "learning_rate": 4.989344732524843e-05, + "loss": 0.7851, + "step": 29496 + }, + { + "epoch": 0.03, + "learning_rate": 4.989344008081203e-05, + "loss": 0.9665, + "step": 29497 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893432836129885e-05, + "loss": 0.9875, + "step": 29498 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893425591202e-05, + "loss": 1.5392, + "step": 29499 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893418346028383e-05, + "loss": 1.1259, + "step": 29500 + }, + { + "epoch": 0.03, + "eval_loss": 1.0635902881622314, + "eval_runtime": 130.3328, + "eval_samples_per_second": 10.627, + "eval_steps_per_second": 5.317, + "step": 29500 + }, + { + "epoch": 0.03, + "learning_rate": 4.989341110060903e-05, + "loss": 1.2886, + "step": 29501 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893403854943944e-05, + "loss": 1.2668, + "step": 29502 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893396609033116e-05, + "loss": 1.3232, + "step": 29503 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893389362876554e-05, + "loss": 0.9349, + "step": 29504 + }, + { + "epoch": 0.03, + "learning_rate": 4.989338211647425e-05, + "loss": 1.0031, + "step": 29505 + }, + { + "epoch": 0.03, + "learning_rate": 4.989337486982621e-05, + "loss": 1.0684, + "step": 29506 + }, + { + "epoch": 0.03, + "learning_rate": 4.989336762293243e-05, + "loss": 1.2405, + "step": 29507 + }, + { + "epoch": 0.03, + "learning_rate": 4.989336037579292e-05, + "loss": 0.9994, + "step": 29508 + }, + { + "epoch": 0.03, + "learning_rate": 4.989335312840767e-05, + "loss": 1.1054, + "step": 29509 + }, + { + "epoch": 0.03, + "learning_rate": 4.989334588077669e-05, + "loss": 0.9814, + "step": 29510 + }, + { + "epoch": 0.03, + "learning_rate": 4.989333863289996e-05, + "loss": 0.9013, + "step": 29511 + }, + { + "epoch": 0.03, + "learning_rate": 4.98933313847775e-05, + "loss": 0.344, + "step": 29512 + }, + { + "epoch": 0.03, + "learning_rate": 4.98933241364093e-05, + "loss": 0.7135, + "step": 29513 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893316887795374e-05, + "loss": 0.8927, + "step": 29514 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893309638935706e-05, + "loss": 0.993, + "step": 29515 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893302389830296e-05, + "loss": 1.1201, + "step": 29516 + }, + { + "epoch": 0.03, + "learning_rate": 4.989329514047916e-05, + "loss": 1.2187, + "step": 29517 + }, + { + "epoch": 0.03, + "learning_rate": 4.989328789088228e-05, + "loss": 1.1157, + "step": 29518 + }, + { + "epoch": 0.03, + "learning_rate": 4.989328064103967e-05, + "loss": 1.2772, + "step": 29519 + }, + { + "epoch": 0.03, + "learning_rate": 4.989327339095132e-05, + "loss": 0.8622, + "step": 29520 + }, + { + "epoch": 0.03, + "learning_rate": 4.989326614061723e-05, + "loss": 0.9657, + "step": 29521 + }, + { + "epoch": 0.03, + "learning_rate": 4.989325889003741e-05, + "loss": 1.0221, + "step": 29522 + }, + { + "epoch": 0.03, + "learning_rate": 4.989325163921186e-05, + "loss": 1.0889, + "step": 29523 + }, + { + "epoch": 0.03, + "learning_rate": 4.989324438814056e-05, + "loss": 1.3228, + "step": 29524 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893237136823536e-05, + "loss": 0.7939, + "step": 29525 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893229885260775e-05, + "loss": 0.98, + "step": 29526 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893222633452274e-05, + "loss": 1.2033, + "step": 29527 + }, + { + "epoch": 0.03, + "learning_rate": 4.989321538139804e-05, + "loss": 1.0589, + "step": 29528 + }, + { + "epoch": 0.03, + "learning_rate": 4.989320812909807e-05, + "loss": 1.048, + "step": 29529 + }, + { + "epoch": 0.03, + "learning_rate": 4.989320087655236e-05, + "loss": 0.9237, + "step": 29530 + }, + { + "epoch": 0.03, + "learning_rate": 4.989319362376092e-05, + "loss": 1.0848, + "step": 29531 + }, + { + "epoch": 0.03, + "learning_rate": 4.989318637072374e-05, + "loss": 0.995, + "step": 29532 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893179117440835e-05, + "loss": 1.0574, + "step": 29533 + }, + { + "epoch": 0.03, + "learning_rate": 4.989317186391219e-05, + "loss": 1.1163, + "step": 29534 + }, + { + "epoch": 0.03, + "learning_rate": 4.989316461013781e-05, + "loss": 0.9879, + "step": 29535 + }, + { + "epoch": 0.03, + "learning_rate": 4.989315735611769e-05, + "loss": 1.0952, + "step": 29536 + }, + { + "epoch": 0.03, + "learning_rate": 4.989315010185185e-05, + "loss": 1.2547, + "step": 29537 + }, + { + "epoch": 0.03, + "learning_rate": 4.989314284734026e-05, + "loss": 0.833, + "step": 29538 + }, + { + "epoch": 0.03, + "learning_rate": 4.989313559258294e-05, + "loss": 0.9863, + "step": 29539 + }, + { + "epoch": 0.03, + "learning_rate": 4.989312833757989e-05, + "loss": 0.7732, + "step": 29540 + }, + { + "epoch": 0.03, + "learning_rate": 4.98931210823311e-05, + "loss": 0.9732, + "step": 29541 + }, + { + "epoch": 0.03, + "learning_rate": 4.989311382683658e-05, + "loss": 0.9168, + "step": 29542 + }, + { + "epoch": 0.03, + "learning_rate": 4.989310657109632e-05, + "loss": 0.8702, + "step": 29543 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893099315110326e-05, + "loss": 0.7823, + "step": 29544 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893092058878605e-05, + "loss": 0.5991, + "step": 29545 + }, + { + "epoch": 0.03, + "learning_rate": 4.989308480240115e-05, + "loss": 0.8633, + "step": 29546 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893077545677946e-05, + "loss": 1.0289, + "step": 29547 + }, + { + "epoch": 0.03, + "learning_rate": 4.989307028870902e-05, + "loss": 1.2133, + "step": 29548 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893063031494356e-05, + "loss": 0.8713, + "step": 29549 + }, + { + "epoch": 0.03, + "learning_rate": 4.989305577403396e-05, + "loss": 0.7791, + "step": 29550 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893048516327835e-05, + "loss": 1.1244, + "step": 29551 + }, + { + "epoch": 0.03, + "learning_rate": 4.989304125837597e-05, + "loss": 0.9407, + "step": 29552 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893034000178376e-05, + "loss": 0.7906, + "step": 29553 + }, + { + "epoch": 0.03, + "learning_rate": 4.989302674173505e-05, + "loss": 0.8822, + "step": 29554 + }, + { + "epoch": 0.03, + "learning_rate": 4.989301948304598e-05, + "loss": 1.1319, + "step": 29555 + }, + { + "epoch": 0.03, + "learning_rate": 4.9893012224111194e-05, + "loss": 0.8197, + "step": 29556 + }, + { + "epoch": 0.03, + "learning_rate": 4.989300496493066e-05, + "loss": 0.4789, + "step": 29557 + }, + { + "epoch": 0.03, + "learning_rate": 4.98929977055044e-05, + "loss": 0.7079, + "step": 29558 + }, + { + "epoch": 0.03, + "learning_rate": 4.98929904458324e-05, + "loss": 0.9878, + "step": 29559 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892983185914664e-05, + "loss": 0.9427, + "step": 29560 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892975925751205e-05, + "loss": 0.9449, + "step": 29561 + }, + { + "epoch": 0.03, + "learning_rate": 4.989296866534201e-05, + "loss": 0.9034, + "step": 29562 + }, + { + "epoch": 0.03, + "learning_rate": 4.989296140468708e-05, + "loss": 1.5288, + "step": 29563 + }, + { + "epoch": 0.03, + "learning_rate": 4.989295414378642e-05, + "loss": 1.3956, + "step": 29564 + }, + { + "epoch": 0.03, + "learning_rate": 4.989294688264002e-05, + "loss": 0.5595, + "step": 29565 + }, + { + "epoch": 0.03, + "learning_rate": 4.98929396212479e-05, + "loss": 2.1128, + "step": 29566 + }, + { + "epoch": 0.03, + "learning_rate": 4.989293235961004e-05, + "loss": 1.4597, + "step": 29567 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892925097726446e-05, + "loss": 0.8417, + "step": 29568 + }, + { + "epoch": 0.03, + "learning_rate": 4.989291783559712e-05, + "loss": 0.423, + "step": 29569 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892910573222065e-05, + "loss": 0.4306, + "step": 29570 + }, + { + "epoch": 0.03, + "learning_rate": 4.989290331060128e-05, + "loss": 1.1656, + "step": 29571 + }, + { + "epoch": 0.03, + "learning_rate": 4.989289604773475e-05, + "loss": 2.3191, + "step": 29572 + }, + { + "epoch": 0.03, + "learning_rate": 4.989288878462251e-05, + "loss": 2.3073, + "step": 29573 + }, + { + "epoch": 0.03, + "learning_rate": 4.989288152126451e-05, + "loss": 2.3022, + "step": 29574 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892874257660796e-05, + "loss": 2.3115, + "step": 29575 + }, + { + "epoch": 0.03, + "learning_rate": 4.989286699381135e-05, + "loss": 2.3062, + "step": 29576 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892859729716166e-05, + "loss": 2.296, + "step": 29577 + }, + { + "epoch": 0.03, + "learning_rate": 4.989285246537525e-05, + "loss": 2.2944, + "step": 29578 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892845200788605e-05, + "loss": 2.2922, + "step": 29579 + }, + { + "epoch": 0.03, + "learning_rate": 4.989283793595623e-05, + "loss": 2.2831, + "step": 29580 + }, + { + "epoch": 0.03, + "learning_rate": 4.989283067087812e-05, + "loss": 2.2653, + "step": 29581 + }, + { + "epoch": 0.03, + "learning_rate": 4.989282340555428e-05, + "loss": 2.2366, + "step": 29582 + }, + { + "epoch": 0.03, + "learning_rate": 4.989281613998471e-05, + "loss": 2.2177, + "step": 29583 + }, + { + "epoch": 0.03, + "learning_rate": 4.98928088741694e-05, + "loss": 2.1955, + "step": 29584 + }, + { + "epoch": 0.03, + "learning_rate": 4.989280160810837e-05, + "loss": 2.1125, + "step": 29585 + }, + { + "epoch": 0.03, + "learning_rate": 4.989279434180161e-05, + "loss": 1.15, + "step": 29586 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892787075249115e-05, + "loss": 0.1784, + "step": 29587 + }, + { + "epoch": 0.03, + "learning_rate": 4.989277980845088e-05, + "loss": 0.0015, + "step": 29588 + }, + { + "epoch": 0.03, + "learning_rate": 4.989277254140692e-05, + "loss": 0.0011, + "step": 29589 + }, + { + "epoch": 0.03, + "learning_rate": 4.989276527411724e-05, + "loss": 0.0037, + "step": 29590 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892758006581815e-05, + "loss": 0.0006, + "step": 29591 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892750738800664e-05, + "loss": 0.7902, + "step": 29592 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892743470773785e-05, + "loss": 1.4945, + "step": 29593 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892736202501166e-05, + "loss": 1.5566, + "step": 29594 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892728933982825e-05, + "loss": 0.6584, + "step": 29595 + }, + { + "epoch": 0.03, + "learning_rate": 4.989272166521875e-05, + "loss": 1.2504, + "step": 29596 + }, + { + "epoch": 0.03, + "learning_rate": 4.989271439620895e-05, + "loss": 1.072, + "step": 29597 + }, + { + "epoch": 0.03, + "learning_rate": 4.989270712695341e-05, + "loss": 0.9854, + "step": 29598 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892699857452145e-05, + "loss": 0.9972, + "step": 29599 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892692587705146e-05, + "loss": 0.9128, + "step": 29600 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892685317712426e-05, + "loss": 1.0996, + "step": 29601 + }, + { + "epoch": 0.03, + "learning_rate": 4.989267804747397e-05, + "loss": 0.8003, + "step": 29602 + }, + { + "epoch": 0.03, + "learning_rate": 4.989267077698978e-05, + "loss": 1.0765, + "step": 29603 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892663506259866e-05, + "loss": 0.962, + "step": 29604 + }, + { + "epoch": 0.03, + "learning_rate": 4.989265623528422e-05, + "loss": 1.5177, + "step": 29605 + }, + { + "epoch": 0.03, + "learning_rate": 4.989264896406285e-05, + "loss": 1.6021, + "step": 29606 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892641692595744e-05, + "loss": 0.8687, + "step": 29607 + }, + { + "epoch": 0.03, + "learning_rate": 4.989263442088291e-05, + "loss": 1.244, + "step": 29608 + }, + { + "epoch": 0.03, + "learning_rate": 4.989262714892434e-05, + "loss": 0.8656, + "step": 29609 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892619876720045e-05, + "loss": 0.8696, + "step": 29610 + }, + { + "epoch": 0.03, + "learning_rate": 4.989261260427003e-05, + "loss": 0.7742, + "step": 29611 + }, + { + "epoch": 0.03, + "learning_rate": 4.989260533157427e-05, + "loss": 1.1124, + "step": 29612 + }, + { + "epoch": 0.03, + "learning_rate": 4.989259805863279e-05, + "loss": 1.1395, + "step": 29613 + }, + { + "epoch": 0.03, + "learning_rate": 4.989259078544558e-05, + "loss": 1.173, + "step": 29614 + }, + { + "epoch": 0.03, + "learning_rate": 4.989258351201264e-05, + "loss": 0.8595, + "step": 29615 + }, + { + "epoch": 0.03, + "learning_rate": 4.989257623833397e-05, + "loss": 0.5478, + "step": 29616 + }, + { + "epoch": 0.03, + "learning_rate": 4.989256896440957e-05, + "loss": 0.6014, + "step": 29617 + }, + { + "epoch": 0.03, + "learning_rate": 4.989256169023945e-05, + "loss": 0.5618, + "step": 29618 + }, + { + "epoch": 0.03, + "learning_rate": 4.989255441582359e-05, + "loss": 0.46, + "step": 29619 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892547141162004e-05, + "loss": 0.5252, + "step": 29620 + }, + { + "epoch": 0.03, + "learning_rate": 4.989253986625469e-05, + "loss": 0.4168, + "step": 29621 + }, + { + "epoch": 0.03, + "learning_rate": 4.989253259110165e-05, + "loss": 0.3783, + "step": 29622 + }, + { + "epoch": 0.03, + "learning_rate": 4.989252531570287e-05, + "loss": 0.4579, + "step": 29623 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892518040058376e-05, + "loss": 0.3815, + "step": 29624 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892510764168145e-05, + "loss": 0.4469, + "step": 29625 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892503488032194e-05, + "loss": 0.4938, + "step": 29626 + }, + { + "epoch": 0.03, + "learning_rate": 4.989249621165051e-05, + "loss": 1.121, + "step": 29627 + }, + { + "epoch": 0.03, + "learning_rate": 4.989248893502309e-05, + "loss": 1.0638, + "step": 29628 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892481658149945e-05, + "loss": 1.3473, + "step": 29629 + }, + { + "epoch": 0.03, + "learning_rate": 4.989247438103108e-05, + "loss": 0.8908, + "step": 29630 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892467103666486e-05, + "loss": 1.2684, + "step": 29631 + }, + { + "epoch": 0.03, + "learning_rate": 4.989245982605616e-05, + "loss": 0.9739, + "step": 29632 + }, + { + "epoch": 0.03, + "learning_rate": 4.989245254820011e-05, + "loss": 0.903, + "step": 29633 + }, + { + "epoch": 0.03, + "learning_rate": 4.989244527009833e-05, + "loss": 1.1687, + "step": 29634 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892437991750815e-05, + "loss": 1.2772, + "step": 29635 + }, + { + "epoch": 0.03, + "learning_rate": 4.989243071315758e-05, + "loss": 1.2203, + "step": 29636 + }, + { + "epoch": 0.03, + "learning_rate": 4.989242343431862e-05, + "loss": 1.042, + "step": 29637 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892416155233926e-05, + "loss": 1.1775, + "step": 29638 + }, + { + "epoch": 0.03, + "learning_rate": 4.989240887590351e-05, + "loss": 1.2283, + "step": 29639 + }, + { + "epoch": 0.03, + "learning_rate": 4.989240159632736e-05, + "loss": 1.2469, + "step": 29640 + }, + { + "epoch": 0.03, + "learning_rate": 4.989239431650549e-05, + "loss": 1.0879, + "step": 29641 + }, + { + "epoch": 0.03, + "learning_rate": 4.989238703643788e-05, + "loss": 0.7164, + "step": 29642 + }, + { + "epoch": 0.03, + "learning_rate": 4.989237975612456e-05, + "loss": 0.4945, + "step": 29643 + }, + { + "epoch": 0.03, + "learning_rate": 4.98923724755655e-05, + "loss": 0.9401, + "step": 29644 + }, + { + "epoch": 0.03, + "learning_rate": 4.989236519476072e-05, + "loss": 0.7985, + "step": 29645 + }, + { + "epoch": 0.03, + "learning_rate": 4.989235791371021e-05, + "loss": 1.3772, + "step": 29646 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892350632413975e-05, + "loss": 1.0971, + "step": 29647 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892343350872015e-05, + "loss": 1.0775, + "step": 29648 + }, + { + "epoch": 0.03, + "learning_rate": 4.989233606908432e-05, + "loss": 0.4205, + "step": 29649 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892328787050905e-05, + "loss": 1.2014, + "step": 29650 + }, + { + "epoch": 0.03, + "learning_rate": 4.989232150477177e-05, + "loss": 1.2737, + "step": 29651 + }, + { + "epoch": 0.03, + "learning_rate": 4.989231422224689e-05, + "loss": 1.0253, + "step": 29652 + }, + { + "epoch": 0.03, + "learning_rate": 4.98923069394763e-05, + "loss": 1.1168, + "step": 29653 + }, + { + "epoch": 0.03, + "learning_rate": 4.989229965645998e-05, + "loss": 0.8531, + "step": 29654 + }, + { + "epoch": 0.03, + "learning_rate": 4.989229237319793e-05, + "loss": 0.874, + "step": 29655 + }, + { + "epoch": 0.03, + "learning_rate": 4.989228508969016e-05, + "loss": 0.1529, + "step": 29656 + }, + { + "epoch": 0.03, + "learning_rate": 4.989227780593666e-05, + "loss": 0.2259, + "step": 29657 + }, + { + "epoch": 0.03, + "learning_rate": 4.989227052193743e-05, + "loss": 0.8595, + "step": 29658 + }, + { + "epoch": 0.03, + "learning_rate": 4.989226323769248e-05, + "loss": 0.8849, + "step": 29659 + }, + { + "epoch": 0.03, + "learning_rate": 4.989225595320179e-05, + "loss": 0.9432, + "step": 29660 + }, + { + "epoch": 0.03, + "learning_rate": 4.989224866846539e-05, + "loss": 0.8398, + "step": 29661 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892241383483266e-05, + "loss": 0.9027, + "step": 29662 + }, + { + "epoch": 0.03, + "learning_rate": 4.989223409825541e-05, + "loss": 0.6957, + "step": 29663 + }, + { + "epoch": 0.03, + "learning_rate": 4.989222681278183e-05, + "loss": 0.809, + "step": 29664 + }, + { + "epoch": 0.03, + "learning_rate": 4.989221952706252e-05, + "loss": 1.0714, + "step": 29665 + }, + { + "epoch": 0.03, + "learning_rate": 4.989221224109749e-05, + "loss": 1.0642, + "step": 29666 + }, + { + "epoch": 0.03, + "learning_rate": 4.989220495488673e-05, + "loss": 1.0366, + "step": 29667 + }, + { + "epoch": 0.03, + "learning_rate": 4.989219766843024e-05, + "loss": 1.2124, + "step": 29668 + }, + { + "epoch": 0.03, + "learning_rate": 4.989219038172803e-05, + "loss": 1.3846, + "step": 29669 + }, + { + "epoch": 0.03, + "learning_rate": 4.98921830947801e-05, + "loss": 1.0565, + "step": 29670 + }, + { + "epoch": 0.03, + "learning_rate": 4.989217580758644e-05, + "loss": 1.0992, + "step": 29671 + }, + { + "epoch": 0.03, + "learning_rate": 4.989216852014706e-05, + "loss": 1.0438, + "step": 29672 + }, + { + "epoch": 0.03, + "learning_rate": 4.989216123246195e-05, + "loss": 0.7611, + "step": 29673 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892153944531116e-05, + "loss": 1.1699, + "step": 29674 + }, + { + "epoch": 0.03, + "learning_rate": 4.989214665635456e-05, + "loss": 0.9703, + "step": 29675 + }, + { + "epoch": 0.03, + "learning_rate": 4.989213936793227e-05, + "loss": 1.1886, + "step": 29676 + }, + { + "epoch": 0.03, + "learning_rate": 4.989213207926426e-05, + "loss": 0.661, + "step": 29677 + }, + { + "epoch": 0.03, + "learning_rate": 4.989212479035053e-05, + "loss": 1.1065, + "step": 29678 + }, + { + "epoch": 0.03, + "learning_rate": 4.989211750119107e-05, + "loss": 0.9948, + "step": 29679 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892110211785894e-05, + "loss": 0.9044, + "step": 29680 + }, + { + "epoch": 0.03, + "learning_rate": 4.989210292213499e-05, + "loss": 0.9745, + "step": 29681 + }, + { + "epoch": 0.03, + "learning_rate": 4.989209563223836e-05, + "loss": 0.8212, + "step": 29682 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892088342096e-05, + "loss": 0.8004, + "step": 29683 + }, + { + "epoch": 0.03, + "learning_rate": 4.989208105170792e-05, + "loss": 1.0106, + "step": 29684 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892073761074115e-05, + "loss": 0.5492, + "step": 29685 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892066470194586e-05, + "loss": 0.911, + "step": 29686 + }, + { + "epoch": 0.03, + "learning_rate": 4.989205917906934e-05, + "loss": 0.9088, + "step": 29687 + }, + { + "epoch": 0.03, + "learning_rate": 4.989205188769836e-05, + "loss": 0.8455, + "step": 29688 + }, + { + "epoch": 0.03, + "learning_rate": 4.989204459608166e-05, + "loss": 1.0994, + "step": 29689 + }, + { + "epoch": 0.03, + "learning_rate": 4.989203730421924e-05, + "loss": 1.1868, + "step": 29690 + }, + { + "epoch": 0.03, + "learning_rate": 4.989203001211109e-05, + "loss": 1.067, + "step": 29691 + }, + { + "epoch": 0.03, + "learning_rate": 4.989202271975722e-05, + "loss": 1.1097, + "step": 29692 + }, + { + "epoch": 0.03, + "learning_rate": 4.989201542715762e-05, + "loss": 0.8404, + "step": 29693 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892008134312305e-05, + "loss": 0.8055, + "step": 29694 + }, + { + "epoch": 0.03, + "learning_rate": 4.989200084122127e-05, + "loss": 1.1603, + "step": 29695 + }, + { + "epoch": 0.03, + "learning_rate": 4.989199354788451e-05, + "loss": 1.2454, + "step": 29696 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891986254302016e-05, + "loss": 0.9969, + "step": 29697 + }, + { + "epoch": 0.03, + "learning_rate": 4.989197896047381e-05, + "loss": 0.7465, + "step": 29698 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891971666399875e-05, + "loss": 0.4535, + "step": 29699 + }, + { + "epoch": 0.03, + "learning_rate": 4.989196437208021e-05, + "loss": 1.0462, + "step": 29700 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891957077514836e-05, + "loss": 1.1903, + "step": 29701 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891949782703726e-05, + "loss": 1.1135, + "step": 29702 + }, + { + "epoch": 0.03, + "learning_rate": 4.98919424876469e-05, + "loss": 1.1866, + "step": 29703 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891935192344355e-05, + "loss": 0.9945, + "step": 29704 + }, + { + "epoch": 0.03, + "learning_rate": 4.989192789679609e-05, + "loss": 0.8417, + "step": 29705 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891920601002094e-05, + "loss": 1.0148, + "step": 29706 + }, + { + "epoch": 0.03, + "learning_rate": 4.989191330496238e-05, + "loss": 0.8567, + "step": 29707 + }, + { + "epoch": 0.03, + "learning_rate": 4.989190600867694e-05, + "loss": 0.8136, + "step": 29708 + }, + { + "epoch": 0.03, + "learning_rate": 4.989189871214578e-05, + "loss": 0.8453, + "step": 29709 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891891415368895e-05, + "loss": 1.0365, + "step": 29710 + }, + { + "epoch": 0.03, + "learning_rate": 4.989188411834629e-05, + "loss": 1.1308, + "step": 29711 + }, + { + "epoch": 0.03, + "learning_rate": 4.989187682107796e-05, + "loss": 1.4532, + "step": 29712 + }, + { + "epoch": 0.03, + "learning_rate": 4.989186952356391e-05, + "loss": 0.9661, + "step": 29713 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891862225804135e-05, + "loss": 0.6396, + "step": 29714 + }, + { + "epoch": 0.03, + "learning_rate": 4.989185492779864e-05, + "loss": 0.854, + "step": 29715 + }, + { + "epoch": 0.03, + "learning_rate": 4.989184762954743e-05, + "loss": 0.9681, + "step": 29716 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891840331050486e-05, + "loss": 0.8403, + "step": 29717 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891833032307835e-05, + "loss": 0.9704, + "step": 29718 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891825733319456e-05, + "loss": 1.1809, + "step": 29719 + }, + { + "epoch": 0.03, + "learning_rate": 4.989181843408535e-05, + "loss": 0.9255, + "step": 29720 + }, + { + "epoch": 0.03, + "learning_rate": 4.989181113460552e-05, + "loss": 1.1357, + "step": 29721 + }, + { + "epoch": 0.03, + "learning_rate": 4.989180383487997e-05, + "loss": 1.1232, + "step": 29722 + }, + { + "epoch": 0.03, + "learning_rate": 4.989179653490871e-05, + "loss": 0.8862, + "step": 29723 + }, + { + "epoch": 0.03, + "learning_rate": 4.989178923469172e-05, + "loss": 0.681, + "step": 29724 + }, + { + "epoch": 0.03, + "learning_rate": 4.989178193422901e-05, + "loss": 1.0773, + "step": 29725 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891774633520584e-05, + "loss": 1.0559, + "step": 29726 + }, + { + "epoch": 0.03, + "learning_rate": 4.989176733256643e-05, + "loss": 0.9681, + "step": 29727 + }, + { + "epoch": 0.03, + "learning_rate": 4.989176003136655e-05, + "loss": 0.8999, + "step": 29728 + }, + { + "epoch": 0.03, + "learning_rate": 4.989175272992096e-05, + "loss": 1.0365, + "step": 29729 + }, + { + "epoch": 0.03, + "learning_rate": 4.989174542822964e-05, + "loss": 1.037, + "step": 29730 + }, + { + "epoch": 0.03, + "learning_rate": 4.989173812629261e-05, + "loss": 1.2131, + "step": 29731 + }, + { + "epoch": 0.03, + "learning_rate": 4.989173082410985e-05, + "loss": 0.8438, + "step": 29732 + }, + { + "epoch": 0.03, + "learning_rate": 4.989172352168138e-05, + "loss": 0.9713, + "step": 29733 + }, + { + "epoch": 0.03, + "learning_rate": 4.989171621900718e-05, + "loss": 1.2086, + "step": 29734 + }, + { + "epoch": 0.03, + "learning_rate": 4.989170891608725e-05, + "loss": 1.1198, + "step": 29735 + }, + { + "epoch": 0.03, + "learning_rate": 4.989170161292162e-05, + "loss": 1.0239, + "step": 29736 + }, + { + "epoch": 0.03, + "learning_rate": 4.989169430951026e-05, + "loss": 0.8525, + "step": 29737 + }, + { + "epoch": 0.03, + "learning_rate": 4.989168700585318e-05, + "loss": 0.9392, + "step": 29738 + }, + { + "epoch": 0.03, + "learning_rate": 4.989167970195038e-05, + "loss": 1.047, + "step": 29739 + }, + { + "epoch": 0.03, + "learning_rate": 4.989167239780186e-05, + "loss": 0.8601, + "step": 29740 + }, + { + "epoch": 0.03, + "learning_rate": 4.989166509340762e-05, + "loss": 1.0527, + "step": 29741 + }, + { + "epoch": 0.03, + "learning_rate": 4.989165778876766e-05, + "loss": 0.9109, + "step": 29742 + }, + { + "epoch": 0.03, + "learning_rate": 4.989165048388198e-05, + "loss": 0.59, + "step": 29743 + }, + { + "epoch": 0.03, + "learning_rate": 4.989164317875058e-05, + "loss": 0.9763, + "step": 29744 + }, + { + "epoch": 0.03, + "learning_rate": 4.989163587337346e-05, + "loss": 1.3292, + "step": 29745 + }, + { + "epoch": 0.03, + "learning_rate": 4.989162856775062e-05, + "loss": 1.0201, + "step": 29746 + }, + { + "epoch": 0.03, + "learning_rate": 4.989162126188206e-05, + "loss": 0.6978, + "step": 29747 + }, + { + "epoch": 0.03, + "learning_rate": 4.989161395576778e-05, + "loss": 0.788, + "step": 29748 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891606649407785e-05, + "loss": 1.0291, + "step": 29749 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891599342802066e-05, + "loss": 1.0938, + "step": 29750 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891592035950626e-05, + "loss": 1.0706, + "step": 29751 + }, + { + "epoch": 0.03, + "learning_rate": 4.989158472885347e-05, + "loss": 1.0645, + "step": 29752 + }, + { + "epoch": 0.03, + "learning_rate": 4.98915774215106e-05, + "loss": 1.081, + "step": 29753 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891570113922e-05, + "loss": 1.4387, + "step": 29754 + }, + { + "epoch": 0.03, + "learning_rate": 4.989156280608769e-05, + "loss": 0.8076, + "step": 29755 + }, + { + "epoch": 0.03, + "learning_rate": 4.989155549800765e-05, + "loss": 1.0695, + "step": 29756 + }, + { + "epoch": 0.03, + "learning_rate": 4.98915481896819e-05, + "loss": 0.9641, + "step": 29757 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891540881110436e-05, + "loss": 0.5471, + "step": 29758 + }, + { + "epoch": 0.03, + "learning_rate": 4.989153357229324e-05, + "loss": 1.2235, + "step": 29759 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891526263230335e-05, + "loss": 1.0957, + "step": 29760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891518953921706e-05, + "loss": 0.9794, + "step": 29761 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891511644367364e-05, + "loss": 0.8785, + "step": 29762 + }, + { + "epoch": 0.03, + "learning_rate": 4.989150433456729e-05, + "loss": 0.4749, + "step": 29763 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891497024521516e-05, + "loss": 0.9446, + "step": 29764 + }, + { + "epoch": 0.03, + "learning_rate": 4.989148971423001e-05, + "loss": 0.952, + "step": 29765 + }, + { + "epoch": 0.03, + "learning_rate": 4.989148240369279e-05, + "loss": 0.9412, + "step": 29766 + }, + { + "epoch": 0.03, + "learning_rate": 4.989147509290986e-05, + "loss": 0.8863, + "step": 29767 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891467781881204e-05, + "loss": 0.9866, + "step": 29768 + }, + { + "epoch": 0.03, + "learning_rate": 4.989146047060683e-05, + "loss": 2.0329, + "step": 29769 + }, + { + "epoch": 0.03, + "learning_rate": 4.989145315908673e-05, + "loss": 1.9651, + "step": 29770 + }, + { + "epoch": 0.03, + "learning_rate": 4.989144584732093e-05, + "loss": 1.3415, + "step": 29771 + }, + { + "epoch": 0.03, + "learning_rate": 4.98914385353094e-05, + "loss": 1.1383, + "step": 29772 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891431223052154e-05, + "loss": 0.9418, + "step": 29773 + }, + { + "epoch": 0.03, + "learning_rate": 4.989142391054919e-05, + "loss": 1.0114, + "step": 29774 + }, + { + "epoch": 0.03, + "learning_rate": 4.989141659780051e-05, + "loss": 0.9288, + "step": 29775 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891409284806114e-05, + "loss": 0.929, + "step": 29776 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891401971566e-05, + "loss": 0.945, + "step": 29777 + }, + { + "epoch": 0.03, + "learning_rate": 4.989139465808016e-05, + "loss": 1.0671, + "step": 29778 + }, + { + "epoch": 0.03, + "learning_rate": 4.989138734434861e-05, + "loss": 0.8631, + "step": 29779 + }, + { + "epoch": 0.03, + "learning_rate": 4.989138003037134e-05, + "loss": 1.0465, + "step": 29780 + }, + { + "epoch": 0.03, + "learning_rate": 4.989137271614836e-05, + "loss": 1.1379, + "step": 29781 + }, + { + "epoch": 0.03, + "learning_rate": 4.989136540167966e-05, + "loss": 0.6224, + "step": 29782 + }, + { + "epoch": 0.03, + "learning_rate": 4.989135808696523e-05, + "loss": 1.0469, + "step": 29783 + }, + { + "epoch": 0.03, + "learning_rate": 4.98913507720051e-05, + "loss": 0.8027, + "step": 29784 + }, + { + "epoch": 0.03, + "learning_rate": 4.989134345679924e-05, + "loss": 0.9904, + "step": 29785 + }, + { + "epoch": 0.03, + "learning_rate": 4.989133614134768e-05, + "loss": 0.9246, + "step": 29786 + }, + { + "epoch": 0.03, + "learning_rate": 4.989132882565039e-05, + "loss": 1.187, + "step": 29787 + }, + { + "epoch": 0.03, + "learning_rate": 4.989132150970739e-05, + "loss": 1.1243, + "step": 29788 + }, + { + "epoch": 0.03, + "learning_rate": 4.989131419351867e-05, + "loss": 1.0792, + "step": 29789 + }, + { + "epoch": 0.03, + "learning_rate": 4.989130687708423e-05, + "loss": 0.8464, + "step": 29790 + }, + { + "epoch": 0.03, + "learning_rate": 4.989129956040408e-05, + "loss": 0.9706, + "step": 29791 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891292243478204e-05, + "loss": 0.8199, + "step": 29792 + }, + { + "epoch": 0.03, + "learning_rate": 4.989128492630662e-05, + "loss": 0.9078, + "step": 29793 + }, + { + "epoch": 0.03, + "learning_rate": 4.989127760888932e-05, + "loss": 1.1443, + "step": 29794 + }, + { + "epoch": 0.03, + "learning_rate": 4.98912702912263e-05, + "loss": 0.9329, + "step": 29795 + }, + { + "epoch": 0.03, + "learning_rate": 4.989126297331757e-05, + "loss": 0.7379, + "step": 29796 + }, + { + "epoch": 0.03, + "learning_rate": 4.989125565516312e-05, + "loss": 0.6958, + "step": 29797 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891248336762945e-05, + "loss": 0.9879, + "step": 29798 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891241018117066e-05, + "loss": 0.8614, + "step": 29799 + }, + { + "epoch": 0.03, + "learning_rate": 4.989123369922547e-05, + "loss": 0.7644, + "step": 29800 + }, + { + "epoch": 0.03, + "learning_rate": 4.989122638008815e-05, + "loss": 0.3576, + "step": 29801 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891219060705116e-05, + "loss": 0.6654, + "step": 29802 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891211741076374e-05, + "loss": 0.6553, + "step": 29803 + }, + { + "epoch": 0.03, + "learning_rate": 4.989120442120192e-05, + "loss": 0.8626, + "step": 29804 + }, + { + "epoch": 0.03, + "learning_rate": 4.989119710108175e-05, + "loss": 0.844, + "step": 29805 + }, + { + "epoch": 0.03, + "learning_rate": 4.989118978071585e-05, + "loss": 1.1689, + "step": 29806 + }, + { + "epoch": 0.03, + "learning_rate": 4.989118246010424e-05, + "loss": 0.9637, + "step": 29807 + }, + { + "epoch": 0.03, + "learning_rate": 4.989117513924692e-05, + "loss": 0.9207, + "step": 29808 + }, + { + "epoch": 0.03, + "learning_rate": 4.989116781814388e-05, + "loss": 1.0986, + "step": 29809 + }, + { + "epoch": 0.03, + "learning_rate": 4.989116049679513e-05, + "loss": 0.828, + "step": 29810 + }, + { + "epoch": 0.03, + "learning_rate": 4.989115317520065e-05, + "loss": 0.4433, + "step": 29811 + }, + { + "epoch": 0.03, + "learning_rate": 4.989114585336048e-05, + "loss": 0.4017, + "step": 29812 + }, + { + "epoch": 0.03, + "learning_rate": 4.989113853127458e-05, + "loss": 0.3764, + "step": 29813 + }, + { + "epoch": 0.03, + "learning_rate": 4.989113120894296e-05, + "loss": 0.4369, + "step": 29814 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891123886365644e-05, + "loss": 0.3908, + "step": 29815 + }, + { + "epoch": 0.03, + "learning_rate": 4.989111656354259e-05, + "loss": 0.2859, + "step": 29816 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891109240473845e-05, + "loss": 0.3888, + "step": 29817 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891101917159364e-05, + "loss": 0.3302, + "step": 29818 + }, + { + "epoch": 0.03, + "learning_rate": 4.989109459359918e-05, + "loss": 0.2882, + "step": 29819 + }, + { + "epoch": 0.03, + "learning_rate": 4.989108726979328e-05, + "loss": 0.257, + "step": 29820 + }, + { + "epoch": 0.03, + "learning_rate": 4.989107994574167e-05, + "loss": 0.1577, + "step": 29821 + }, + { + "epoch": 0.03, + "learning_rate": 4.989107262144434e-05, + "loss": 0.2615, + "step": 29822 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891065296901296e-05, + "loss": 0.3558, + "step": 29823 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891057972112544e-05, + "loss": 0.877, + "step": 29824 + }, + { + "epoch": 0.03, + "learning_rate": 4.989105064707807e-05, + "loss": 0.8381, + "step": 29825 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891043321797884e-05, + "loss": 0.8544, + "step": 29826 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891035996271983e-05, + "loss": 0.903, + "step": 29827 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891028670500375e-05, + "loss": 0.5066, + "step": 29828 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891021344483046e-05, + "loss": 0.6762, + "step": 29829 + }, + { + "epoch": 0.03, + "learning_rate": 4.989101401822e-05, + "loss": 0.8143, + "step": 29830 + }, + { + "epoch": 0.03, + "learning_rate": 4.989100669171125e-05, + "loss": 0.6154, + "step": 29831 + }, + { + "epoch": 0.03, + "learning_rate": 4.989099936495679e-05, + "loss": 0.5266, + "step": 29832 + }, + { + "epoch": 0.03, + "learning_rate": 4.989099203795661e-05, + "loss": 0.9403, + "step": 29833 + }, + { + "epoch": 0.03, + "learning_rate": 4.989098471071071e-05, + "loss": 0.6278, + "step": 29834 + }, + { + "epoch": 0.03, + "learning_rate": 4.989097738321911e-05, + "loss": 0.5365, + "step": 29835 + }, + { + "epoch": 0.03, + "learning_rate": 4.989097005548178e-05, + "loss": 1.166, + "step": 29836 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890962727498746e-05, + "loss": 1.0713, + "step": 29837 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890955399270004e-05, + "loss": 1.037, + "step": 29838 + }, + { + "epoch": 0.03, + "learning_rate": 4.989094807079554e-05, + "loss": 1.0923, + "step": 29839 + }, + { + "epoch": 0.03, + "learning_rate": 4.989094074207536e-05, + "loss": 0.8037, + "step": 29840 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890933413109485e-05, + "loss": 0.6719, + "step": 29841 + }, + { + "epoch": 0.03, + "learning_rate": 4.989092608389788e-05, + "loss": 1.012, + "step": 29842 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890918754440574e-05, + "loss": 1.1544, + "step": 29843 + }, + { + "epoch": 0.03, + "learning_rate": 4.989091142473755e-05, + "loss": 0.2698, + "step": 29844 + }, + { + "epoch": 0.03, + "learning_rate": 4.989090409478881e-05, + "loss": 0.1179, + "step": 29845 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890896764594364e-05, + "loss": 0.8736, + "step": 29846 + }, + { + "epoch": 0.03, + "learning_rate": 4.98908894341542e-05, + "loss": 1.1104, + "step": 29847 + }, + { + "epoch": 0.03, + "learning_rate": 4.989088210346833e-05, + "loss": 0.9849, + "step": 29848 + }, + { + "epoch": 0.03, + "learning_rate": 4.989087477253674e-05, + "loss": 1.4127, + "step": 29849 + }, + { + "epoch": 0.03, + "learning_rate": 4.989086744135944e-05, + "loss": 1.1487, + "step": 29850 + }, + { + "epoch": 0.03, + "learning_rate": 4.989086010993643e-05, + "loss": 0.8982, + "step": 29851 + }, + { + "epoch": 0.03, + "learning_rate": 4.989085277826771e-05, + "loss": 0.8402, + "step": 29852 + }, + { + "epoch": 0.03, + "learning_rate": 4.989084544635327e-05, + "loss": 1.1107, + "step": 29853 + }, + { + "epoch": 0.03, + "learning_rate": 4.989083811419313e-05, + "loss": 0.9165, + "step": 29854 + }, + { + "epoch": 0.03, + "learning_rate": 4.989083078178727e-05, + "loss": 1.0202, + "step": 29855 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890823449135706e-05, + "loss": 0.9647, + "step": 29856 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890816116238423e-05, + "loss": 1.1339, + "step": 29857 + }, + { + "epoch": 0.03, + "learning_rate": 4.989080878309542e-05, + "loss": 0.9164, + "step": 29858 + }, + { + "epoch": 0.03, + "learning_rate": 4.989080144970672e-05, + "loss": 0.9995, + "step": 29859 + }, + { + "epoch": 0.03, + "learning_rate": 4.98907941160723e-05, + "loss": 0.8818, + "step": 29860 + }, + { + "epoch": 0.03, + "learning_rate": 4.989078678219218e-05, + "loss": 1.1307, + "step": 29861 + }, + { + "epoch": 0.03, + "learning_rate": 4.989077944806634e-05, + "loss": 1.0852, + "step": 29862 + }, + { + "epoch": 0.03, + "learning_rate": 4.989077211369478e-05, + "loss": 1.1966, + "step": 29863 + }, + { + "epoch": 0.03, + "learning_rate": 4.989076477907753e-05, + "loss": 0.9594, + "step": 29864 + }, + { + "epoch": 0.03, + "learning_rate": 4.989075744421455e-05, + "loss": 1.0555, + "step": 29865 + }, + { + "epoch": 0.03, + "learning_rate": 4.989075010910586e-05, + "loss": 0.999, + "step": 29866 + }, + { + "epoch": 0.03, + "learning_rate": 4.989074277375147e-05, + "loss": 0.7432, + "step": 29867 + }, + { + "epoch": 0.03, + "learning_rate": 4.989073543815136e-05, + "loss": 1.222, + "step": 29868 + }, + { + "epoch": 0.03, + "learning_rate": 4.989072810230555e-05, + "loss": 0.9464, + "step": 29869 + }, + { + "epoch": 0.03, + "learning_rate": 4.989072076621402e-05, + "loss": 1.0938, + "step": 29870 + }, + { + "epoch": 0.03, + "learning_rate": 4.989071342987678e-05, + "loss": 0.9997, + "step": 29871 + }, + { + "epoch": 0.03, + "learning_rate": 4.989070609329383e-05, + "loss": 1.0004, + "step": 29872 + }, + { + "epoch": 0.03, + "learning_rate": 4.989069875646517e-05, + "loss": 0.7596, + "step": 29873 + }, + { + "epoch": 0.03, + "learning_rate": 4.98906914193908e-05, + "loss": 0.8992, + "step": 29874 + }, + { + "epoch": 0.03, + "learning_rate": 4.989068408207072e-05, + "loss": 1.2514, + "step": 29875 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890676744504936e-05, + "loss": 1.0526, + "step": 29876 + }, + { + "epoch": 0.03, + "learning_rate": 4.989066940669343e-05, + "loss": 0.9493, + "step": 29877 + }, + { + "epoch": 0.03, + "learning_rate": 4.989066206863622e-05, + "loss": 0.9147, + "step": 29878 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890654730333295e-05, + "loss": 1.3534, + "step": 29879 + }, + { + "epoch": 0.03, + "learning_rate": 4.989064739178467e-05, + "loss": 0.8961, + "step": 29880 + }, + { + "epoch": 0.03, + "learning_rate": 4.989064005299032e-05, + "loss": 1.0245, + "step": 29881 + }, + { + "epoch": 0.03, + "learning_rate": 4.989063271395027e-05, + "loss": 0.9607, + "step": 29882 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890625374664514e-05, + "loss": 0.8638, + "step": 29883 + }, + { + "epoch": 0.03, + "learning_rate": 4.989061803513304e-05, + "loss": 1.1458, + "step": 29884 + }, + { + "epoch": 0.03, + "learning_rate": 4.989061069535586e-05, + "loss": 1.1629, + "step": 29885 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890603355332965e-05, + "loss": 1.1175, + "step": 29886 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890596015064375e-05, + "loss": 0.9372, + "step": 29887 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890588674550056e-05, + "loss": 1.0803, + "step": 29888 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890581333790044e-05, + "loss": 0.6242, + "step": 29889 + }, + { + "epoch": 0.03, + "learning_rate": 4.989057399278432e-05, + "loss": 0.335, + "step": 29890 + }, + { + "epoch": 0.03, + "learning_rate": 4.989056665153288e-05, + "loss": 0.9443, + "step": 29891 + }, + { + "epoch": 0.03, + "learning_rate": 4.989055931003573e-05, + "loss": 1.096, + "step": 29892 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890551968292875e-05, + "loss": 1.0462, + "step": 29893 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890544626304306e-05, + "loss": 1.0887, + "step": 29894 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890537284070037e-05, + "loss": 0.9552, + "step": 29895 + }, + { + "epoch": 0.03, + "learning_rate": 4.989052994159005e-05, + "loss": 0.9143, + "step": 29896 + }, + { + "epoch": 0.03, + "learning_rate": 4.989052259886436e-05, + "loss": 1.0504, + "step": 29897 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890515255892964e-05, + "loss": 1.1351, + "step": 29898 + }, + { + "epoch": 0.03, + "learning_rate": 4.989050791267585e-05, + "loss": 1.5387, + "step": 29899 + }, + { + "epoch": 0.03, + "learning_rate": 4.989050056921304e-05, + "loss": 0.9371, + "step": 29900 + }, + { + "epoch": 0.03, + "learning_rate": 4.989049322550451e-05, + "loss": 1.3925, + "step": 29901 + }, + { + "epoch": 0.03, + "learning_rate": 4.989048588155028e-05, + "loss": 1.0329, + "step": 29902 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890478537350335e-05, + "loss": 0.8854, + "step": 29903 + }, + { + "epoch": 0.03, + "learning_rate": 4.989047119290468e-05, + "loss": 1.4424, + "step": 29904 + }, + { + "epoch": 0.03, + "learning_rate": 4.989046384821332e-05, + "loss": 0.6747, + "step": 29905 + }, + { + "epoch": 0.03, + "learning_rate": 4.989045650327625e-05, + "loss": 1.1638, + "step": 29906 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890449158093475e-05, + "loss": 1.1735, + "step": 29907 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890441812665e-05, + "loss": 0.6172, + "step": 29908 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890434466990804e-05, + "loss": 1.0204, + "step": 29909 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890427121070905e-05, + "loss": 0.8543, + "step": 29910 + }, + { + "epoch": 0.03, + "learning_rate": 4.98904197749053e-05, + "loss": 0.8891, + "step": 29911 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890412428493984e-05, + "loss": 0.9005, + "step": 29912 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890405081836955e-05, + "loss": 0.9355, + "step": 29913 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890397734934226e-05, + "loss": 1.0309, + "step": 29914 + }, + { + "epoch": 0.03, + "learning_rate": 4.989039038778579e-05, + "loss": 0.845, + "step": 29915 + }, + { + "epoch": 0.03, + "learning_rate": 4.989038304039164e-05, + "loss": 1.028, + "step": 29916 + }, + { + "epoch": 0.03, + "learning_rate": 4.989037569275179e-05, + "loss": 0.871, + "step": 29917 + }, + { + "epoch": 0.03, + "learning_rate": 4.989036834486624e-05, + "loss": 0.9336, + "step": 29918 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890360996734965e-05, + "loss": 0.5238, + "step": 29919 + }, + { + "epoch": 0.03, + "learning_rate": 4.989035364835799e-05, + "loss": 0.7927, + "step": 29920 + }, + { + "epoch": 0.03, + "learning_rate": 4.989034629973531e-05, + "loss": 0.8324, + "step": 29921 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890338950866925e-05, + "loss": 1.119, + "step": 29922 + }, + { + "epoch": 0.03, + "learning_rate": 4.989033160175283e-05, + "loss": 0.9068, + "step": 29923 + }, + { + "epoch": 0.03, + "learning_rate": 4.989032425239303e-05, + "loss": 0.7505, + "step": 29924 + }, + { + "epoch": 0.03, + "learning_rate": 4.989031690278752e-05, + "loss": 0.8844, + "step": 29925 + }, + { + "epoch": 0.03, + "learning_rate": 4.98903095529363e-05, + "loss": 0.9936, + "step": 29926 + }, + { + "epoch": 0.03, + "learning_rate": 4.989030220283938e-05, + "loss": 1.1153, + "step": 29927 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890294852496746e-05, + "loss": 0.8911, + "step": 29928 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890287501908414e-05, + "loss": 1.2275, + "step": 29929 + }, + { + "epoch": 0.03, + "learning_rate": 4.989028015107437e-05, + "loss": 0.9951, + "step": 29930 + }, + { + "epoch": 0.03, + "learning_rate": 4.989027279999462e-05, + "loss": 0.865, + "step": 29931 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890265448669175e-05, + "loss": 0.9361, + "step": 29932 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890258097098007e-05, + "loss": 1.0378, + "step": 29933 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890250745281145e-05, + "loss": 0.5105, + "step": 29934 + }, + { + "epoch": 0.03, + "learning_rate": 4.989024339321857e-05, + "loss": 0.3743, + "step": 29935 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890236040910286e-05, + "loss": 0.3252, + "step": 29936 + }, + { + "epoch": 0.03, + "learning_rate": 4.98902286883563e-05, + "loss": 0.1323, + "step": 29937 + }, + { + "epoch": 0.03, + "learning_rate": 4.989022133555661e-05, + "loss": 0.2022, + "step": 29938 + }, + { + "epoch": 0.03, + "learning_rate": 4.989021398251121e-05, + "loss": 0.1352, + "step": 29939 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890206629220114e-05, + "loss": 0.1152, + "step": 29940 + }, + { + "epoch": 0.03, + "learning_rate": 4.98901992756833e-05, + "loss": 0.5077, + "step": 29941 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890191921900794e-05, + "loss": 0.4188, + "step": 29942 + }, + { + "epoch": 0.03, + "learning_rate": 4.989018456787258e-05, + "loss": 0.8156, + "step": 29943 + }, + { + "epoch": 0.03, + "learning_rate": 4.989017721359865e-05, + "loss": 1.4582, + "step": 29944 + }, + { + "epoch": 0.03, + "learning_rate": 4.989016985907902e-05, + "loss": 0.8036, + "step": 29945 + }, + { + "epoch": 0.03, + "learning_rate": 4.989016250431369e-05, + "loss": 1.075, + "step": 29946 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890155149302644e-05, + "loss": 0.7815, + "step": 29947 + }, + { + "epoch": 0.03, + "learning_rate": 4.98901477940459e-05, + "loss": 1.0642, + "step": 29948 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890140438543456e-05, + "loss": 0.2043, + "step": 29949 + }, + { + "epoch": 0.03, + "learning_rate": 4.98901330827953e-05, + "loss": 0.1035, + "step": 29950 + }, + { + "epoch": 0.03, + "learning_rate": 4.989012572680143e-05, + "loss": 0.1032, + "step": 29951 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890118370561866e-05, + "loss": 0.1123, + "step": 29952 + }, + { + "epoch": 0.03, + "learning_rate": 4.98901110140766e-05, + "loss": 0.095, + "step": 29953 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890103657345624e-05, + "loss": 0.0672, + "step": 29954 + }, + { + "epoch": 0.03, + "learning_rate": 4.989009630036895e-05, + "loss": 0.0599, + "step": 29955 + }, + { + "epoch": 0.03, + "learning_rate": 4.989008894314656e-05, + "loss": 0.0678, + "step": 29956 + }, + { + "epoch": 0.03, + "learning_rate": 4.989008158567847e-05, + "loss": 0.0629, + "step": 29957 + }, + { + "epoch": 0.03, + "learning_rate": 4.989007422796468e-05, + "loss": 0.0768, + "step": 29958 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890066870005184e-05, + "loss": 0.0735, + "step": 29959 + }, + { + "epoch": 0.03, + "learning_rate": 4.989005951179999e-05, + "loss": 0.0684, + "step": 29960 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890052153349075e-05, + "loss": 0.0601, + "step": 29961 + }, + { + "epoch": 0.03, + "learning_rate": 4.989004479465247e-05, + "loss": 0.0544, + "step": 29962 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890037435710156e-05, + "loss": 0.0517, + "step": 29963 + }, + { + "epoch": 0.03, + "learning_rate": 4.989003007652214e-05, + "loss": 0.8741, + "step": 29964 + }, + { + "epoch": 0.03, + "learning_rate": 4.989002271708842e-05, + "loss": 1.17, + "step": 29965 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890015357408994e-05, + "loss": 1.0292, + "step": 29966 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890007997483865e-05, + "loss": 0.8326, + "step": 29967 + }, + { + "epoch": 0.03, + "learning_rate": 4.9890000637313036e-05, + "loss": 0.9014, + "step": 29968 + }, + { + "epoch": 0.03, + "learning_rate": 4.98899932768965e-05, + "loss": 1.0025, + "step": 29969 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889985916234256e-05, + "loss": 0.858, + "step": 29970 + }, + { + "epoch": 0.03, + "learning_rate": 4.988997855532631e-05, + "loss": 1.3052, + "step": 29971 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889971194172666e-05, + "loss": 0.9992, + "step": 29972 + }, + { + "epoch": 0.03, + "learning_rate": 4.988996383277332e-05, + "loss": 1.094, + "step": 29973 + }, + { + "epoch": 0.03, + "learning_rate": 4.988995647112826e-05, + "loss": 0.9455, + "step": 29974 + }, + { + "epoch": 0.03, + "learning_rate": 4.988994910923751e-05, + "loss": 1.3309, + "step": 29975 + }, + { + "epoch": 0.03, + "learning_rate": 4.988994174710105e-05, + "loss": 0.8915, + "step": 29976 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889934384718886e-05, + "loss": 1.1632, + "step": 29977 + }, + { + "epoch": 0.03, + "learning_rate": 4.988992702209102e-05, + "loss": 0.9303, + "step": 29978 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889919659217455e-05, + "loss": 0.6541, + "step": 29979 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889912296098186e-05, + "loss": 0.9076, + "step": 29980 + }, + { + "epoch": 0.03, + "learning_rate": 4.988990493273321e-05, + "loss": 1.3652, + "step": 29981 + }, + { + "epoch": 0.03, + "learning_rate": 4.988989756912253e-05, + "loss": 0.9591, + "step": 29982 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889890205266154e-05, + "loss": 0.9511, + "step": 29983 + }, + { + "epoch": 0.03, + "learning_rate": 4.988988284116407e-05, + "loss": 1.3999, + "step": 29984 + }, + { + "epoch": 0.03, + "learning_rate": 4.988987547681629e-05, + "loss": 1.8046, + "step": 29985 + }, + { + "epoch": 0.03, + "learning_rate": 4.98898681122228e-05, + "loss": 1.4365, + "step": 29986 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889860747383616e-05, + "loss": 0.665, + "step": 29987 + }, + { + "epoch": 0.03, + "learning_rate": 4.988985338229872e-05, + "loss": 0.7023, + "step": 29988 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889846016968134e-05, + "loss": 0.7238, + "step": 29989 + }, + { + "epoch": 0.03, + "learning_rate": 4.988983865139184e-05, + "loss": 1.0411, + "step": 29990 + }, + { + "epoch": 0.03, + "learning_rate": 4.988983128556984e-05, + "loss": 1.1723, + "step": 29991 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889823919502146e-05, + "loss": 3.1638, + "step": 29992 + }, + { + "epoch": 0.03, + "learning_rate": 4.988981655318874e-05, + "loss": 5.9895, + "step": 29993 + }, + { + "epoch": 0.03, + "learning_rate": 4.988980918662964e-05, + "loss": 5.9314, + "step": 29994 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889801819824833e-05, + "loss": 5.1085, + "step": 29995 + }, + { + "epoch": 0.03, + "learning_rate": 4.988979445277433e-05, + "loss": 1.0203, + "step": 29996 + }, + { + "epoch": 0.03, + "learning_rate": 4.988978708547812e-05, + "loss": 0.6293, + "step": 29997 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889779717936216e-05, + "loss": 1.1096, + "step": 29998 + }, + { + "epoch": 0.03, + "learning_rate": 4.98897723501486e-05, + "loss": 1.2676, + "step": 29999 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889764982115295e-05, + "loss": 0.9795, + "step": 30000 + }, + { + "epoch": 0.03, + "eval_loss": 1.067270040512085, + "eval_runtime": 125.8764, + "eval_samples_per_second": 11.003, + "eval_steps_per_second": 5.505, + "step": 30000 + }, + { + "epoch": 0.03, + "learning_rate": 4.988975761383628e-05, + "loss": 0.8561, + "step": 30001 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889750245311564e-05, + "loss": 0.7536, + "step": 30002 + }, + { + "epoch": 0.03, + "learning_rate": 4.988974287654115e-05, + "loss": 0.9649, + "step": 30003 + }, + { + "epoch": 0.03, + "learning_rate": 4.988973550752504e-05, + "loss": 0.6546, + "step": 30004 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889728138263215e-05, + "loss": 0.9403, + "step": 30005 + }, + { + "epoch": 0.03, + "learning_rate": 4.98897207687557e-05, + "loss": 0.992, + "step": 30006 + }, + { + "epoch": 0.03, + "learning_rate": 4.988971339900248e-05, + "loss": 1.0207, + "step": 30007 + }, + { + "epoch": 0.03, + "learning_rate": 4.988970602900356e-05, + "loss": 1.3156, + "step": 30008 + }, + { + "epoch": 0.03, + "learning_rate": 4.988969865875894e-05, + "loss": 1.1153, + "step": 30009 + }, + { + "epoch": 0.03, + "learning_rate": 4.988969128826862e-05, + "loss": 0.8449, + "step": 30010 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889683917532595e-05, + "loss": 0.9724, + "step": 30011 + }, + { + "epoch": 0.03, + "learning_rate": 4.988967654655088e-05, + "loss": 1.0259, + "step": 30012 + }, + { + "epoch": 0.03, + "learning_rate": 4.988966917532345e-05, + "loss": 0.9029, + "step": 30013 + }, + { + "epoch": 0.03, + "learning_rate": 4.988966180385033e-05, + "loss": 1.072, + "step": 30014 + }, + { + "epoch": 0.03, + "learning_rate": 4.98896544321315e-05, + "loss": 1.3351, + "step": 30015 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889647060166986e-05, + "loss": 1.1017, + "step": 30016 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889639687956756e-05, + "loss": 1.0928, + "step": 30017 + }, + { + "epoch": 0.03, + "learning_rate": 4.988963231550083e-05, + "loss": 0.9602, + "step": 30018 + }, + { + "epoch": 0.03, + "learning_rate": 4.988962494279922e-05, + "loss": 0.928, + "step": 30019 + }, + { + "epoch": 0.03, + "learning_rate": 4.988961756985189e-05, + "loss": 1.2747, + "step": 30020 + }, + { + "epoch": 0.03, + "learning_rate": 4.988961019665886e-05, + "loss": 0.977, + "step": 30021 + }, + { + "epoch": 0.03, + "learning_rate": 4.988960282322015e-05, + "loss": 1.3526, + "step": 30022 + }, + { + "epoch": 0.03, + "learning_rate": 4.988959544953572e-05, + "loss": 1.0309, + "step": 30023 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889588075605596e-05, + "loss": 0.9657, + "step": 30024 + }, + { + "epoch": 0.03, + "learning_rate": 4.988958070142978e-05, + "loss": 1.1161, + "step": 30025 + }, + { + "epoch": 0.03, + "learning_rate": 4.988957332700826e-05, + "loss": 1.197, + "step": 30026 + }, + { + "epoch": 0.03, + "learning_rate": 4.988956595234104e-05, + "loss": 1.6176, + "step": 30027 + }, + { + "epoch": 0.03, + "learning_rate": 4.988955857742812e-05, + "loss": 0.8076, + "step": 30028 + }, + { + "epoch": 0.03, + "learning_rate": 4.98895512022695e-05, + "loss": 1.0706, + "step": 30029 + }, + { + "epoch": 0.03, + "learning_rate": 4.988954382686518e-05, + "loss": 0.9241, + "step": 30030 + }, + { + "epoch": 0.03, + "learning_rate": 4.988953645121517e-05, + "loss": 0.7848, + "step": 30031 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889529075319454e-05, + "loss": 0.968, + "step": 30032 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889521699178035e-05, + "loss": 0.9676, + "step": 30033 + }, + { + "epoch": 0.03, + "learning_rate": 4.988951432279092e-05, + "loss": 0.635, + "step": 30034 + }, + { + "epoch": 0.03, + "learning_rate": 4.98895069461581e-05, + "loss": 0.6899, + "step": 30035 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889499569279597e-05, + "loss": 1.1009, + "step": 30036 + }, + { + "epoch": 0.03, + "learning_rate": 4.988949219215538e-05, + "loss": 1.3362, + "step": 30037 + }, + { + "epoch": 0.03, + "learning_rate": 4.988948481478548e-05, + "loss": 0.9635, + "step": 30038 + }, + { + "epoch": 0.03, + "learning_rate": 4.988947743716987e-05, + "loss": 0.8819, + "step": 30039 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889470059308565e-05, + "loss": 0.8034, + "step": 30040 + }, + { + "epoch": 0.03, + "learning_rate": 4.988946268120156e-05, + "loss": 0.9142, + "step": 30041 + }, + { + "epoch": 0.03, + "learning_rate": 4.988945530284886e-05, + "loss": 1.2261, + "step": 30042 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889447924250455e-05, + "loss": 1.7407, + "step": 30043 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889440545406365e-05, + "loss": 1.4434, + "step": 30044 + }, + { + "epoch": 0.03, + "learning_rate": 4.988943316631656e-05, + "loss": 1.3505, + "step": 30045 + }, + { + "epoch": 0.03, + "learning_rate": 4.988942578698107e-05, + "loss": 0.8519, + "step": 30046 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889418407399876e-05, + "loss": 1.3265, + "step": 30047 + }, + { + "epoch": 0.03, + "learning_rate": 4.988941102757298e-05, + "loss": 1.1079, + "step": 30048 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889403647500396e-05, + "loss": 1.2416, + "step": 30049 + }, + { + "epoch": 0.03, + "learning_rate": 4.988939626718211e-05, + "loss": 1.285, + "step": 30050 + }, + { + "epoch": 0.03, + "learning_rate": 4.988938888661813e-05, + "loss": 0.5974, + "step": 30051 + }, + { + "epoch": 0.03, + "learning_rate": 4.988938150580845e-05, + "loss": 0.1412, + "step": 30052 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889374124753065e-05, + "loss": 0.8011, + "step": 30053 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889366743451996e-05, + "loss": 1.0467, + "step": 30054 + }, + { + "epoch": 0.03, + "learning_rate": 4.988935936190522e-05, + "loss": 0.794, + "step": 30055 + }, + { + "epoch": 0.03, + "learning_rate": 4.988935198011275e-05, + "loss": 1.0841, + "step": 30056 + }, + { + "epoch": 0.03, + "learning_rate": 4.988934459807458e-05, + "loss": 0.871, + "step": 30057 + }, + { + "epoch": 0.03, + "learning_rate": 4.988933721579071e-05, + "loss": 1.0036, + "step": 30058 + }, + { + "epoch": 0.03, + "learning_rate": 4.988932983326116e-05, + "loss": 0.9519, + "step": 30059 + }, + { + "epoch": 0.03, + "learning_rate": 4.988932245048589e-05, + "loss": 0.9271, + "step": 30060 + }, + { + "epoch": 0.03, + "learning_rate": 4.988931506746494e-05, + "loss": 0.7764, + "step": 30061 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889307684198286e-05, + "loss": 0.9126, + "step": 30062 + }, + { + "epoch": 0.03, + "learning_rate": 4.988930030068594e-05, + "loss": 1.0509, + "step": 30063 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889292916927885e-05, + "loss": 0.8942, + "step": 30064 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889285532924144e-05, + "loss": 1.6981, + "step": 30065 + }, + { + "epoch": 0.03, + "learning_rate": 4.988927814867471e-05, + "loss": 0.997, + "step": 30066 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889270764179574e-05, + "loss": 0.942, + "step": 30067 + }, + { + "epoch": 0.03, + "learning_rate": 4.988926337943874e-05, + "loss": 0.7465, + "step": 30068 + }, + { + "epoch": 0.03, + "learning_rate": 4.988925599445221e-05, + "loss": 0.983, + "step": 30069 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889248609219985e-05, + "loss": 0.9202, + "step": 30070 + }, + { + "epoch": 0.03, + "learning_rate": 4.988924122374207e-05, + "loss": 1.1255, + "step": 30071 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889233838018443e-05, + "loss": 0.8071, + "step": 30072 + }, + { + "epoch": 0.03, + "learning_rate": 4.988922645204913e-05, + "loss": 1.0476, + "step": 30073 + }, + { + "epoch": 0.03, + "learning_rate": 4.988921906583413e-05, + "loss": 1.4412, + "step": 30074 + }, + { + "epoch": 0.03, + "learning_rate": 4.988921167937342e-05, + "loss": 0.7119, + "step": 30075 + }, + { + "epoch": 0.03, + "learning_rate": 4.988920429266702e-05, + "loss": 0.7946, + "step": 30076 + }, + { + "epoch": 0.03, + "learning_rate": 4.988919690571492e-05, + "loss": 0.7659, + "step": 30077 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889189518517134e-05, + "loss": 1.1851, + "step": 30078 + }, + { + "epoch": 0.03, + "learning_rate": 4.988918213107364e-05, + "loss": 1.2242, + "step": 30079 + }, + { + "epoch": 0.03, + "learning_rate": 4.988917474338446e-05, + "loss": 0.8172, + "step": 30080 + }, + { + "epoch": 0.03, + "learning_rate": 4.988916735544958e-05, + "loss": 0.5037, + "step": 30081 + }, + { + "epoch": 0.03, + "learning_rate": 4.988915996726901e-05, + "loss": 0.6559, + "step": 30082 + }, + { + "epoch": 0.03, + "learning_rate": 4.988915257884274e-05, + "loss": 0.6095, + "step": 30083 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889145190170775e-05, + "loss": 0.5905, + "step": 30084 + }, + { + "epoch": 0.03, + "learning_rate": 4.988913780125312e-05, + "loss": 0.6398, + "step": 30085 + }, + { + "epoch": 0.03, + "learning_rate": 4.988913041208976e-05, + "loss": 0.6047, + "step": 30086 + }, + { + "epoch": 0.03, + "learning_rate": 4.988912302268071e-05, + "loss": 0.7, + "step": 30087 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889115633025964e-05, + "loss": 0.5251, + "step": 30088 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889108243125525e-05, + "loss": 0.4846, + "step": 30089 + }, + { + "epoch": 0.03, + "learning_rate": 4.988910085297939e-05, + "loss": 0.9276, + "step": 30090 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889093462587566e-05, + "loss": 1.2737, + "step": 30091 + }, + { + "epoch": 0.03, + "learning_rate": 4.988908607195004e-05, + "loss": 1.2495, + "step": 30092 + }, + { + "epoch": 0.03, + "learning_rate": 4.988907868106682e-05, + "loss": 1.2025, + "step": 30093 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889071289937904e-05, + "loss": 1.408, + "step": 30094 + }, + { + "epoch": 0.03, + "learning_rate": 4.98890638985633e-05, + "loss": 1.1675, + "step": 30095 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889056506943e-05, + "loss": 1.1656, + "step": 30096 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889049115077005e-05, + "loss": 1.404, + "step": 30097 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889041722965316e-05, + "loss": 1.0117, + "step": 30098 + }, + { + "epoch": 0.03, + "learning_rate": 4.988903433060793e-05, + "loss": 1.0372, + "step": 30099 + }, + { + "epoch": 0.03, + "learning_rate": 4.988902693800485e-05, + "loss": 1.3365, + "step": 30100 + }, + { + "epoch": 0.03, + "learning_rate": 4.988901954515608e-05, + "loss": 1.1179, + "step": 30101 + }, + { + "epoch": 0.03, + "learning_rate": 4.988901215206161e-05, + "loss": 0.9046, + "step": 30102 + }, + { + "epoch": 0.03, + "learning_rate": 4.9889004758721455e-05, + "loss": 0.6692, + "step": 30103 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888997365135596e-05, + "loss": 0.8716, + "step": 30104 + }, + { + "epoch": 0.03, + "learning_rate": 4.988898997130405e-05, + "loss": 1.0247, + "step": 30105 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888982577226804e-05, + "loss": 1.0656, + "step": 30106 + }, + { + "epoch": 0.03, + "learning_rate": 4.988897518290387e-05, + "loss": 1.2995, + "step": 30107 + }, + { + "epoch": 0.03, + "learning_rate": 4.988896778833525e-05, + "loss": 0.8465, + "step": 30108 + }, + { + "epoch": 0.03, + "learning_rate": 4.988896039352092e-05, + "loss": 0.8802, + "step": 30109 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888952998460915e-05, + "loss": 0.6234, + "step": 30110 + }, + { + "epoch": 0.03, + "learning_rate": 4.98889456031552e-05, + "loss": 0.8553, + "step": 30111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888938207603804e-05, + "loss": 0.7358, + "step": 30112 + }, + { + "epoch": 0.03, + "learning_rate": 4.988893081180671e-05, + "loss": 0.7124, + "step": 30113 + }, + { + "epoch": 0.03, + "learning_rate": 4.988892341576392e-05, + "loss": 1.1041, + "step": 30114 + }, + { + "epoch": 0.03, + "learning_rate": 4.988891601947544e-05, + "loss": 0.8704, + "step": 30115 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888908622941265e-05, + "loss": 0.892, + "step": 30116 + }, + { + "epoch": 0.03, + "learning_rate": 4.98889012261614e-05, + "loss": 1.0348, + "step": 30117 + }, + { + "epoch": 0.03, + "learning_rate": 4.988889382913584e-05, + "loss": 1.0378, + "step": 30118 + }, + { + "epoch": 0.03, + "learning_rate": 4.988888643186459e-05, + "loss": 1.0489, + "step": 30119 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888879034347645e-05, + "loss": 1.3287, + "step": 30120 + }, + { + "epoch": 0.03, + "learning_rate": 4.988887163658501e-05, + "loss": 0.9195, + "step": 30121 + }, + { + "epoch": 0.03, + "learning_rate": 4.988886423857668e-05, + "loss": 1.4397, + "step": 30122 + }, + { + "epoch": 0.03, + "learning_rate": 4.988885684032266e-05, + "loss": 1.2695, + "step": 30123 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888849441822946e-05, + "loss": 0.6715, + "step": 30124 + }, + { + "epoch": 0.03, + "learning_rate": 4.988884204307754e-05, + "loss": 1.033, + "step": 30125 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888834644086444e-05, + "loss": 1.0234, + "step": 30126 + }, + { + "epoch": 0.03, + "learning_rate": 4.988882724484965e-05, + "loss": 1.1189, + "step": 30127 + }, + { + "epoch": 0.03, + "learning_rate": 4.988881984536717e-05, + "loss": 1.0796, + "step": 30128 + }, + { + "epoch": 0.03, + "learning_rate": 4.988881244563899e-05, + "loss": 0.9846, + "step": 30129 + }, + { + "epoch": 0.03, + "learning_rate": 4.988880504566513e-05, + "loss": 0.564, + "step": 30130 + }, + { + "epoch": 0.03, + "learning_rate": 4.988879764544557e-05, + "loss": 0.5768, + "step": 30131 + }, + { + "epoch": 0.03, + "learning_rate": 4.988879024498032e-05, + "loss": 0.5614, + "step": 30132 + }, + { + "epoch": 0.03, + "learning_rate": 4.988878284426938e-05, + "loss": 0.6401, + "step": 30133 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888775443312746e-05, + "loss": 0.8961, + "step": 30134 + }, + { + "epoch": 0.03, + "learning_rate": 4.988876804211042e-05, + "loss": 1.0739, + "step": 30135 + }, + { + "epoch": 0.03, + "learning_rate": 4.98887606406624e-05, + "loss": 0.8521, + "step": 30136 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888753238968696e-05, + "loss": 0.6555, + "step": 30137 + }, + { + "epoch": 0.03, + "learning_rate": 4.98887458370293e-05, + "loss": 1.0566, + "step": 30138 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888738434844204e-05, + "loss": 0.9956, + "step": 30139 + }, + { + "epoch": 0.03, + "learning_rate": 4.988873103241343e-05, + "loss": 1.115, + "step": 30140 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888723629736957e-05, + "loss": 1.0433, + "step": 30141 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888716226814796e-05, + "loss": 1.0275, + "step": 30142 + }, + { + "epoch": 0.03, + "learning_rate": 4.988870882364695e-05, + "loss": 0.9016, + "step": 30143 + }, + { + "epoch": 0.03, + "learning_rate": 4.98887014202334e-05, + "loss": 1.2042, + "step": 30144 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888694016574164e-05, + "loss": 1.0766, + "step": 30145 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888686612669235e-05, + "loss": 1.0458, + "step": 30146 + }, + { + "epoch": 0.03, + "learning_rate": 4.988867920851862e-05, + "loss": 0.9327, + "step": 30147 + }, + { + "epoch": 0.03, + "learning_rate": 4.988867180412231e-05, + "loss": 1.0657, + "step": 30148 + }, + { + "epoch": 0.03, + "learning_rate": 4.988866439948031e-05, + "loss": 1.3534, + "step": 30149 + }, + { + "epoch": 0.03, + "learning_rate": 4.988865699459263e-05, + "loss": 1.3681, + "step": 30150 + }, + { + "epoch": 0.03, + "learning_rate": 4.988864958945924e-05, + "loss": 0.9059, + "step": 30151 + }, + { + "epoch": 0.03, + "learning_rate": 4.988864218408018e-05, + "loss": 0.777, + "step": 30152 + }, + { + "epoch": 0.03, + "learning_rate": 4.988863477845542e-05, + "loss": 1.0503, + "step": 30153 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888627372584975e-05, + "loss": 1.2866, + "step": 30154 + }, + { + "epoch": 0.03, + "learning_rate": 4.988861996646883e-05, + "loss": 1.1146, + "step": 30155 + }, + { + "epoch": 0.03, + "learning_rate": 4.988861256010701e-05, + "loss": 0.9585, + "step": 30156 + }, + { + "epoch": 0.03, + "learning_rate": 4.988860515349948e-05, + "loss": 0.7846, + "step": 30157 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888597746646276e-05, + "loss": 0.8215, + "step": 30158 + }, + { + "epoch": 0.03, + "learning_rate": 4.988859033954738e-05, + "loss": 0.9612, + "step": 30159 + }, + { + "epoch": 0.03, + "learning_rate": 4.988858293220279e-05, + "loss": 1.2376, + "step": 30160 + }, + { + "epoch": 0.03, + "learning_rate": 4.988857552461251e-05, + "loss": 0.7119, + "step": 30161 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888568116776544e-05, + "loss": 1.0639, + "step": 30162 + }, + { + "epoch": 0.03, + "learning_rate": 4.988856070869489e-05, + "loss": 1.3227, + "step": 30163 + }, + { + "epoch": 0.03, + "learning_rate": 4.988855330036754e-05, + "loss": 0.8215, + "step": 30164 + }, + { + "epoch": 0.03, + "learning_rate": 4.98885458917945e-05, + "loss": 0.6373, + "step": 30165 + }, + { + "epoch": 0.03, + "learning_rate": 4.988853848297578e-05, + "loss": 0.5602, + "step": 30166 + }, + { + "epoch": 0.03, + "learning_rate": 4.988853107391136e-05, + "loss": 0.4477, + "step": 30167 + }, + { + "epoch": 0.03, + "learning_rate": 4.988852366460126e-05, + "loss": 0.5147, + "step": 30168 + }, + { + "epoch": 0.03, + "learning_rate": 4.988851625504547e-05, + "loss": 0.5639, + "step": 30169 + }, + { + "epoch": 0.03, + "learning_rate": 4.988850884524399e-05, + "loss": 0.348, + "step": 30170 + }, + { + "epoch": 0.03, + "learning_rate": 4.988850143519682e-05, + "loss": 0.831, + "step": 30171 + }, + { + "epoch": 0.03, + "learning_rate": 4.988849402490397e-05, + "loss": 1.08, + "step": 30172 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888486614365414e-05, + "loss": 1.1954, + "step": 30173 + }, + { + "epoch": 0.03, + "learning_rate": 4.988847920358118e-05, + "loss": 1.1147, + "step": 30174 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888471792551255e-05, + "loss": 0.9505, + "step": 30175 + }, + { + "epoch": 0.03, + "learning_rate": 4.988846438127565e-05, + "loss": 1.1232, + "step": 30176 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888456969754335e-05, + "loss": 0.8701, + "step": 30177 + }, + { + "epoch": 0.03, + "learning_rate": 4.988844955798735e-05, + "loss": 0.9474, + "step": 30178 + }, + { + "epoch": 0.03, + "learning_rate": 4.988844214597467e-05, + "loss": 0.9673, + "step": 30179 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888434733716305e-05, + "loss": 0.8828, + "step": 30180 + }, + { + "epoch": 0.03, + "learning_rate": 4.988842732121225e-05, + "loss": 0.8849, + "step": 30181 + }, + { + "epoch": 0.03, + "learning_rate": 4.988841990846251e-05, + "loss": 1.1111, + "step": 30182 + }, + { + "epoch": 0.03, + "learning_rate": 4.988841249546708e-05, + "loss": 1.004, + "step": 30183 + }, + { + "epoch": 0.03, + "learning_rate": 4.988840508222596e-05, + "loss": 1.0423, + "step": 30184 + }, + { + "epoch": 0.03, + "learning_rate": 4.988839766873915e-05, + "loss": 0.8431, + "step": 30185 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888390255006656e-05, + "loss": 0.946, + "step": 30186 + }, + { + "epoch": 0.03, + "learning_rate": 4.988838284102848e-05, + "loss": 0.79, + "step": 30187 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888375426804615e-05, + "loss": 0.7173, + "step": 30188 + }, + { + "epoch": 0.03, + "learning_rate": 4.988836801233505e-05, + "loss": 1.0165, + "step": 30189 + }, + { + "epoch": 0.03, + "learning_rate": 4.988836059761981e-05, + "loss": 1.2401, + "step": 30190 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888353182658874e-05, + "loss": 0.8419, + "step": 30191 + }, + { + "epoch": 0.03, + "learning_rate": 4.988834576745226e-05, + "loss": 0.8808, + "step": 30192 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888338351999956e-05, + "loss": 0.8797, + "step": 30193 + }, + { + "epoch": 0.03, + "learning_rate": 4.988833093630195e-05, + "loss": 1.0948, + "step": 30194 + }, + { + "epoch": 0.03, + "learning_rate": 4.988832352035828e-05, + "loss": 1.037, + "step": 30195 + }, + { + "epoch": 0.03, + "learning_rate": 4.988831610416891e-05, + "loss": 0.9667, + "step": 30196 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888308687733856e-05, + "loss": 1.2562, + "step": 30197 + }, + { + "epoch": 0.03, + "learning_rate": 4.988830127105311e-05, + "loss": 1.1892, + "step": 30198 + }, + { + "epoch": 0.03, + "learning_rate": 4.988829385412668e-05, + "loss": 1.059, + "step": 30199 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888286436954566e-05, + "loss": 0.6695, + "step": 30200 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888279019536765e-05, + "loss": 0.7663, + "step": 30201 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888271601873276e-05, + "loss": 0.6876, + "step": 30202 + }, + { + "epoch": 0.03, + "learning_rate": 4.98882641839641e-05, + "loss": 0.9289, + "step": 30203 + }, + { + "epoch": 0.03, + "learning_rate": 4.988825676580924e-05, + "loss": 1.1614, + "step": 30204 + }, + { + "epoch": 0.03, + "learning_rate": 4.988824934740869e-05, + "loss": 0.1559, + "step": 30205 + }, + { + "epoch": 0.03, + "learning_rate": 4.988824192876246e-05, + "loss": 0.5756, + "step": 30206 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888234509870535e-05, + "loss": 1.3487, + "step": 30207 + }, + { + "epoch": 0.03, + "learning_rate": 4.988822709073293e-05, + "loss": 1.3009, + "step": 30208 + }, + { + "epoch": 0.03, + "learning_rate": 4.988821967134964e-05, + "loss": 1.135, + "step": 30209 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888212251720656e-05, + "loss": 0.7026, + "step": 30210 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888204831846e-05, + "loss": 1.128, + "step": 30211 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888197411725646e-05, + "loss": 1.335, + "step": 30212 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888189991359614e-05, + "loss": 0.7835, + "step": 30213 + }, + { + "epoch": 0.03, + "learning_rate": 4.988818257074789e-05, + "loss": 1.2684, + "step": 30214 + }, + { + "epoch": 0.03, + "learning_rate": 4.988817514989048e-05, + "loss": 0.8821, + "step": 30215 + }, + { + "epoch": 0.03, + "learning_rate": 4.988816772878738e-05, + "loss": 0.8863, + "step": 30216 + }, + { + "epoch": 0.03, + "learning_rate": 4.988816030743861e-05, + "loss": 0.783, + "step": 30217 + }, + { + "epoch": 0.03, + "learning_rate": 4.988815288584414e-05, + "loss": 0.9566, + "step": 30218 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888145464003995e-05, + "loss": 0.742, + "step": 30219 + }, + { + "epoch": 0.03, + "learning_rate": 4.988813804191816e-05, + "loss": 1.0893, + "step": 30220 + }, + { + "epoch": 0.03, + "learning_rate": 4.988813061958664e-05, + "loss": 1.2597, + "step": 30221 + }, + { + "epoch": 0.03, + "learning_rate": 4.988812319700943e-05, + "loss": 0.8772, + "step": 30222 + }, + { + "epoch": 0.03, + "learning_rate": 4.988811577418654e-05, + "loss": 0.3902, + "step": 30223 + }, + { + "epoch": 0.03, + "learning_rate": 4.988810835111797e-05, + "loss": 1.0076, + "step": 30224 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888100927803706e-05, + "loss": 1.2914, + "step": 30225 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888093504243764e-05, + "loss": 1.1535, + "step": 30226 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888086080438134e-05, + "loss": 0.8601, + "step": 30227 + }, + { + "epoch": 0.03, + "learning_rate": 4.988807865638682e-05, + "loss": 0.5766, + "step": 30228 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888071232089814e-05, + "loss": 0.7543, + "step": 30229 + }, + { + "epoch": 0.03, + "learning_rate": 4.988806380754714e-05, + "loss": 0.5239, + "step": 30230 + }, + { + "epoch": 0.03, + "learning_rate": 4.988805638275877e-05, + "loss": 0.5673, + "step": 30231 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888048957724716e-05, + "loss": 0.8708, + "step": 30232 + }, + { + "epoch": 0.03, + "learning_rate": 4.988804153244498e-05, + "loss": 1.587, + "step": 30233 + }, + { + "epoch": 0.03, + "learning_rate": 4.988803410691956e-05, + "loss": 0.7112, + "step": 30234 + }, + { + "epoch": 0.03, + "learning_rate": 4.988802668114845e-05, + "loss": 1.0927, + "step": 30235 + }, + { + "epoch": 0.03, + "learning_rate": 4.988801925513166e-05, + "loss": 1.027, + "step": 30236 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888011828869184e-05, + "loss": 0.9851, + "step": 30237 + }, + { + "epoch": 0.03, + "learning_rate": 4.988800440236103e-05, + "loss": 1.024, + "step": 30238 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887996975607185e-05, + "loss": 0.8952, + "step": 30239 + }, + { + "epoch": 0.03, + "learning_rate": 4.988798954860766e-05, + "loss": 1.168, + "step": 30240 + }, + { + "epoch": 0.03, + "learning_rate": 4.988798212136245e-05, + "loss": 1.1928, + "step": 30241 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887974693871565e-05, + "loss": 1.0965, + "step": 30242 + }, + { + "epoch": 0.03, + "learning_rate": 4.988796726613499e-05, + "loss": 0.8589, + "step": 30243 + }, + { + "epoch": 0.03, + "learning_rate": 4.988795983815273e-05, + "loss": 0.9743, + "step": 30244 + }, + { + "epoch": 0.03, + "learning_rate": 4.988795240992479e-05, + "loss": 1.1529, + "step": 30245 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887944981451154e-05, + "loss": 1.2218, + "step": 30246 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887937552731846e-05, + "loss": 0.8461, + "step": 30247 + }, + { + "epoch": 0.03, + "learning_rate": 4.988793012376686e-05, + "loss": 0.8287, + "step": 30248 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887922694556175e-05, + "loss": 1.0073, + "step": 30249 + }, + { + "epoch": 0.03, + "learning_rate": 4.988791526509981e-05, + "loss": 0.8315, + "step": 30250 + }, + { + "epoch": 0.03, + "learning_rate": 4.988790783539777e-05, + "loss": 0.9716, + "step": 30251 + }, + { + "epoch": 0.03, + "learning_rate": 4.988790040545005e-05, + "loss": 1.0512, + "step": 30252 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887892975256644e-05, + "loss": 0.9775, + "step": 30253 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887885544817554e-05, + "loss": 0.744, + "step": 30254 + }, + { + "epoch": 0.03, + "learning_rate": 4.988787811413278e-05, + "loss": 1.4164, + "step": 30255 + }, + { + "epoch": 0.03, + "learning_rate": 4.988787068320232e-05, + "loss": 1.6704, + "step": 30256 + }, + { + "epoch": 0.03, + "learning_rate": 4.988786325202618e-05, + "loss": 1.2049, + "step": 30257 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887855820604365e-05, + "loss": 1.2972, + "step": 30258 + }, + { + "epoch": 0.03, + "learning_rate": 4.988784838893686e-05, + "loss": 0.9573, + "step": 30259 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887840957023675e-05, + "loss": 0.9949, + "step": 30260 + }, + { + "epoch": 0.03, + "learning_rate": 4.98878335248648e-05, + "loss": 1.1524, + "step": 30261 + }, + { + "epoch": 0.03, + "learning_rate": 4.988782609246026e-05, + "loss": 0.9001, + "step": 30262 + }, + { + "epoch": 0.03, + "learning_rate": 4.988781865981002e-05, + "loss": 1.1154, + "step": 30263 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887811226914106e-05, + "loss": 1.31, + "step": 30264 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887803793772514e-05, + "loss": 1.0048, + "step": 30265 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887796360385234e-05, + "loss": 1.419, + "step": 30266 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887788926752274e-05, + "loss": 1.0197, + "step": 30267 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887781492873634e-05, + "loss": 1.3006, + "step": 30268 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887774058749307e-05, + "loss": 1.4185, + "step": 30269 + }, + { + "epoch": 0.03, + "learning_rate": 4.988776662437929e-05, + "loss": 1.0588, + "step": 30270 + }, + { + "epoch": 0.03, + "learning_rate": 4.988775918976361e-05, + "loss": 0.9786, + "step": 30271 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887751754902244e-05, + "loss": 0.867, + "step": 30272 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887744319795196e-05, + "loss": 1.1618, + "step": 30273 + }, + { + "epoch": 0.03, + "learning_rate": 4.988773688444246e-05, + "loss": 1.2939, + "step": 30274 + }, + { + "epoch": 0.03, + "learning_rate": 4.988772944884405e-05, + "loss": 0.9406, + "step": 30275 + }, + { + "epoch": 0.03, + "learning_rate": 4.988772201299996e-05, + "loss": 0.8931, + "step": 30276 + }, + { + "epoch": 0.03, + "learning_rate": 4.988771457691018e-05, + "loss": 1.3811, + "step": 30277 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887707140574726e-05, + "loss": 1.0532, + "step": 30278 + }, + { + "epoch": 0.03, + "learning_rate": 4.988769970399358e-05, + "loss": 0.9081, + "step": 30279 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887692267166766e-05, + "loss": 0.3936, + "step": 30280 + }, + { + "epoch": 0.03, + "learning_rate": 4.988768483009427e-05, + "loss": 0.2737, + "step": 30281 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887677392776087e-05, + "loss": 0.7237, + "step": 30282 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887669955212236e-05, + "loss": 1.9378, + "step": 30283 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887662517402686e-05, + "loss": 0.97, + "step": 30284 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887655079347475e-05, + "loss": 1.0471, + "step": 30285 + }, + { + "epoch": 0.03, + "learning_rate": 4.988764764104657e-05, + "loss": 1.0743, + "step": 30286 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887640202499986e-05, + "loss": 1.0297, + "step": 30287 + }, + { + "epoch": 0.03, + "learning_rate": 4.988763276370773e-05, + "loss": 0.964, + "step": 30288 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887625324669784e-05, + "loss": 0.9504, + "step": 30289 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887617885386165e-05, + "loss": 1.1814, + "step": 30290 + }, + { + "epoch": 0.03, + "learning_rate": 4.988761044585686e-05, + "loss": 0.9997, + "step": 30291 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887603006081874e-05, + "loss": 0.9505, + "step": 30292 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887595566061215e-05, + "loss": 0.81, + "step": 30293 + }, + { + "epoch": 0.03, + "learning_rate": 4.988758812579487e-05, + "loss": 0.7603, + "step": 30294 + }, + { + "epoch": 0.03, + "learning_rate": 4.988758068528285e-05, + "loss": 0.9263, + "step": 30295 + }, + { + "epoch": 0.03, + "learning_rate": 4.988757324452514e-05, + "loss": 0.5048, + "step": 30296 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887565803521763e-05, + "loss": 0.7699, + "step": 30297 + }, + { + "epoch": 0.03, + "learning_rate": 4.98875583622727e-05, + "loss": 0.9971, + "step": 30298 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887550920777956e-05, + "loss": 1.2109, + "step": 30299 + }, + { + "epoch": 0.03, + "learning_rate": 4.988754347903755e-05, + "loss": 0.9692, + "step": 30300 + }, + { + "epoch": 0.03, + "learning_rate": 4.988753603705144e-05, + "loss": 1.2812, + "step": 30301 + }, + { + "epoch": 0.03, + "learning_rate": 4.988752859481967e-05, + "loss": 1.3916, + "step": 30302 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887521152342207e-05, + "loss": 0.8942, + "step": 30303 + }, + { + "epoch": 0.03, + "learning_rate": 4.988751370961907e-05, + "loss": 1.3513, + "step": 30304 + }, + { + "epoch": 0.03, + "learning_rate": 4.988750626665025e-05, + "loss": 0.8628, + "step": 30305 + }, + { + "epoch": 0.03, + "learning_rate": 4.988749882343576e-05, + "loss": 1.2382, + "step": 30306 + }, + { + "epoch": 0.03, + "learning_rate": 4.988749137997558e-05, + "loss": 1.1532, + "step": 30307 + }, + { + "epoch": 0.03, + "learning_rate": 4.988748393626973e-05, + "loss": 0.9638, + "step": 30308 + }, + { + "epoch": 0.03, + "learning_rate": 4.98874764923182e-05, + "loss": 1.3169, + "step": 30309 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887469048120994e-05, + "loss": 1.0043, + "step": 30310 + }, + { + "epoch": 0.03, + "learning_rate": 4.98874616036781e-05, + "loss": 0.8142, + "step": 30311 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887454158989535e-05, + "loss": 1.01, + "step": 30312 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887446714055285e-05, + "loss": 1.4629, + "step": 30313 + }, + { + "epoch": 0.03, + "learning_rate": 4.988743926887537e-05, + "loss": 0.9614, + "step": 30314 + }, + { + "epoch": 0.03, + "learning_rate": 4.988743182344976e-05, + "loss": 1.092, + "step": 30315 + }, + { + "epoch": 0.03, + "learning_rate": 4.988742437777848e-05, + "loss": 0.9066, + "step": 30316 + }, + { + "epoch": 0.03, + "learning_rate": 4.988741693186152e-05, + "loss": 1.1068, + "step": 30317 + }, + { + "epoch": 0.03, + "learning_rate": 4.988740948569889e-05, + "loss": 1.0629, + "step": 30318 + }, + { + "epoch": 0.03, + "learning_rate": 4.988740203929057e-05, + "loss": 0.796, + "step": 30319 + }, + { + "epoch": 0.03, + "learning_rate": 4.988739459263658e-05, + "loss": 0.9685, + "step": 30320 + }, + { + "epoch": 0.03, + "learning_rate": 4.988738714573691e-05, + "loss": 0.3947, + "step": 30321 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887379698591564e-05, + "loss": 0.2907, + "step": 30322 + }, + { + "epoch": 0.03, + "learning_rate": 4.988737225120054e-05, + "loss": 1.1765, + "step": 30323 + }, + { + "epoch": 0.03, + "learning_rate": 4.988736480356383e-05, + "loss": 1.0742, + "step": 30324 + }, + { + "epoch": 0.03, + "learning_rate": 4.988735735568145e-05, + "loss": 1.1261, + "step": 30325 + }, + { + "epoch": 0.03, + "learning_rate": 4.988734990755339e-05, + "loss": 1.1172, + "step": 30326 + }, + { + "epoch": 0.03, + "learning_rate": 4.988734245917965e-05, + "loss": 1.0789, + "step": 30327 + }, + { + "epoch": 0.03, + "learning_rate": 4.988733501056024e-05, + "loss": 1.1008, + "step": 30328 + }, + { + "epoch": 0.03, + "learning_rate": 4.988732756169515e-05, + "loss": 1.0198, + "step": 30329 + }, + { + "epoch": 0.03, + "learning_rate": 4.988732011258438e-05, + "loss": 1.0322, + "step": 30330 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887312663227935e-05, + "loss": 0.8964, + "step": 30331 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887305213625815e-05, + "loss": 0.6941, + "step": 30332 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887297763778016e-05, + "loss": 1.0092, + "step": 30333 + }, + { + "epoch": 0.03, + "learning_rate": 4.988729031368453e-05, + "loss": 0.958, + "step": 30334 + }, + { + "epoch": 0.03, + "learning_rate": 4.988728286334538e-05, + "loss": 1.2494, + "step": 30335 + }, + { + "epoch": 0.03, + "learning_rate": 4.988727541276055e-05, + "loss": 1.1272, + "step": 30336 + }, + { + "epoch": 0.03, + "learning_rate": 4.988726796193005e-05, + "loss": 1.1704, + "step": 30337 + }, + { + "epoch": 0.03, + "learning_rate": 4.988726051085386e-05, + "loss": 0.9745, + "step": 30338 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887253059532e-05, + "loss": 1.1959, + "step": 30339 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887245607964466e-05, + "loss": 1.0604, + "step": 30340 + }, + { + "epoch": 0.03, + "learning_rate": 4.988723815615125e-05, + "loss": 0.9175, + "step": 30341 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887230704092364e-05, + "loss": 1.0695, + "step": 30342 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887223251787796e-05, + "loss": 1.019, + "step": 30343 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887215799237555e-05, + "loss": 1.1072, + "step": 30344 + }, + { + "epoch": 0.03, + "learning_rate": 4.988720834644164e-05, + "loss": 1.0298, + "step": 30345 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887200893400045e-05, + "loss": 0.8788, + "step": 30346 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887193440112776e-05, + "loss": 0.7512, + "step": 30347 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887185986579834e-05, + "loss": 0.7697, + "step": 30348 + }, + { + "epoch": 0.03, + "learning_rate": 4.988717853280121e-05, + "loss": 1.3264, + "step": 30349 + }, + { + "epoch": 0.03, + "learning_rate": 4.988717107877691e-05, + "loss": 0.8916, + "step": 30350 + }, + { + "epoch": 0.03, + "learning_rate": 4.988716362450694e-05, + "loss": 1.1013, + "step": 30351 + }, + { + "epoch": 0.03, + "learning_rate": 4.988715616999129e-05, + "loss": 1.1604, + "step": 30352 + }, + { + "epoch": 0.03, + "learning_rate": 4.988714871522997e-05, + "loss": 1.1834, + "step": 30353 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887141260222966e-05, + "loss": 0.7964, + "step": 30354 + }, + { + "epoch": 0.03, + "learning_rate": 4.988713380497029e-05, + "loss": 0.9598, + "step": 30355 + }, + { + "epoch": 0.03, + "learning_rate": 4.988712634947195e-05, + "loss": 1.1074, + "step": 30356 + }, + { + "epoch": 0.03, + "learning_rate": 4.988711889372792e-05, + "loss": 1.3051, + "step": 30357 + }, + { + "epoch": 0.03, + "learning_rate": 4.988711143773822e-05, + "loss": 0.7754, + "step": 30358 + }, + { + "epoch": 0.03, + "learning_rate": 4.988710398150284e-05, + "loss": 0.861, + "step": 30359 + }, + { + "epoch": 0.03, + "learning_rate": 4.988709652502179e-05, + "loss": 1.189, + "step": 30360 + }, + { + "epoch": 0.03, + "learning_rate": 4.988708906829507e-05, + "loss": 1.0141, + "step": 30361 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887081611322665e-05, + "loss": 0.897, + "step": 30362 + }, + { + "epoch": 0.03, + "learning_rate": 4.988707415410459e-05, + "loss": 0.8908, + "step": 30363 + }, + { + "epoch": 0.03, + "learning_rate": 4.988706669664084e-05, + "loss": 1.6998, + "step": 30364 + }, + { + "epoch": 0.03, + "learning_rate": 4.988705923893141e-05, + "loss": 1.6928, + "step": 30365 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887051780976315e-05, + "loss": 1.213, + "step": 30366 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887044322775544e-05, + "loss": 1.1294, + "step": 30367 + }, + { + "epoch": 0.03, + "learning_rate": 4.988703686432909e-05, + "loss": 0.9543, + "step": 30368 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887029405636975e-05, + "loss": 1.3386, + "step": 30369 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887021946699177e-05, + "loss": 0.9312, + "step": 30370 + }, + { + "epoch": 0.03, + "learning_rate": 4.9887014487515705e-05, + "loss": 0.9858, + "step": 30371 + }, + { + "epoch": 0.03, + "learning_rate": 4.988700702808656e-05, + "loss": 1.2342, + "step": 30372 + }, + { + "epoch": 0.03, + "learning_rate": 4.988699956841174e-05, + "loss": 0.9419, + "step": 30373 + }, + { + "epoch": 0.03, + "learning_rate": 4.988699210849125e-05, + "loss": 1.0845, + "step": 30374 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886984648325084e-05, + "loss": 1.1061, + "step": 30375 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886977187913245e-05, + "loss": 1.157, + "step": 30376 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886969727255726e-05, + "loss": 1.0333, + "step": 30377 + }, + { + "epoch": 0.03, + "learning_rate": 4.988696226635254e-05, + "loss": 0.8983, + "step": 30378 + }, + { + "epoch": 0.03, + "learning_rate": 4.988695480520368e-05, + "loss": 1.323, + "step": 30379 + }, + { + "epoch": 0.03, + "learning_rate": 4.988694734380914e-05, + "loss": 1.1321, + "step": 30380 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886939882168936e-05, + "loss": 1.1689, + "step": 30381 + }, + { + "epoch": 0.03, + "learning_rate": 4.988693242028306e-05, + "loss": 0.5773, + "step": 30382 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886924958151504e-05, + "loss": 0.629, + "step": 30383 + }, + { + "epoch": 0.03, + "learning_rate": 4.988691749577427e-05, + "loss": 0.8304, + "step": 30384 + }, + { + "epoch": 0.03, + "learning_rate": 4.988691003315137e-05, + "loss": 1.2591, + "step": 30385 + }, + { + "epoch": 0.03, + "learning_rate": 4.98869025702828e-05, + "loss": 0.7578, + "step": 30386 + }, + { + "epoch": 0.03, + "learning_rate": 4.988689510716855e-05, + "loss": 1.0838, + "step": 30387 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886887643808624e-05, + "loss": 1.2589, + "step": 30388 + }, + { + "epoch": 0.03, + "learning_rate": 4.988688018020303e-05, + "loss": 1.2296, + "step": 30389 + }, + { + "epoch": 0.03, + "learning_rate": 4.988687271635177e-05, + "loss": 1.0462, + "step": 30390 + }, + { + "epoch": 0.03, + "learning_rate": 4.988686525225483e-05, + "loss": 1.3683, + "step": 30391 + }, + { + "epoch": 0.03, + "learning_rate": 4.988685778791222e-05, + "loss": 0.8464, + "step": 30392 + }, + { + "epoch": 0.03, + "learning_rate": 4.988685032332393e-05, + "loss": 1.131, + "step": 30393 + }, + { + "epoch": 0.03, + "learning_rate": 4.988684285848998e-05, + "loss": 1.151, + "step": 30394 + }, + { + "epoch": 0.03, + "learning_rate": 4.988683539341035e-05, + "loss": 0.9147, + "step": 30395 + }, + { + "epoch": 0.03, + "learning_rate": 4.988682792808505e-05, + "loss": 0.7759, + "step": 30396 + }, + { + "epoch": 0.03, + "learning_rate": 4.988682046251407e-05, + "loss": 0.798, + "step": 30397 + }, + { + "epoch": 0.03, + "learning_rate": 4.988681299669743e-05, + "loss": 1.1345, + "step": 30398 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886805530635105e-05, + "loss": 1.1985, + "step": 30399 + }, + { + "epoch": 0.03, + "learning_rate": 4.988679806432712e-05, + "loss": 0.8181, + "step": 30400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886790597773456e-05, + "loss": 0.7605, + "step": 30401 + }, + { + "epoch": 0.03, + "learning_rate": 4.988678313097412e-05, + "loss": 0.8699, + "step": 30402 + }, + { + "epoch": 0.03, + "learning_rate": 4.988677566392912e-05, + "loss": 0.8725, + "step": 30403 + }, + { + "epoch": 0.03, + "learning_rate": 4.988676819663843e-05, + "loss": 0.85, + "step": 30404 + }, + { + "epoch": 0.03, + "learning_rate": 4.988676072910209e-05, + "loss": 0.9312, + "step": 30405 + }, + { + "epoch": 0.03, + "learning_rate": 4.988675326132007e-05, + "loss": 0.7508, + "step": 30406 + }, + { + "epoch": 0.03, + "learning_rate": 4.988674579329238e-05, + "loss": 1.189, + "step": 30407 + }, + { + "epoch": 0.03, + "learning_rate": 4.988673832501901e-05, + "loss": 1.0089, + "step": 30408 + }, + { + "epoch": 0.03, + "learning_rate": 4.988673085649998e-05, + "loss": 1.2847, + "step": 30409 + }, + { + "epoch": 0.03, + "learning_rate": 4.988672338773527e-05, + "loss": 0.8953, + "step": 30410 + }, + { + "epoch": 0.03, + "learning_rate": 4.988671591872489e-05, + "loss": 0.8516, + "step": 30411 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886708449468845e-05, + "loss": 1.048, + "step": 30412 + }, + { + "epoch": 0.03, + "learning_rate": 4.988670097996712e-05, + "loss": 0.6563, + "step": 30413 + }, + { + "epoch": 0.03, + "learning_rate": 4.988669351021973e-05, + "loss": 0.9653, + "step": 30414 + }, + { + "epoch": 0.03, + "learning_rate": 4.988668604022667e-05, + "loss": 0.9193, + "step": 30415 + }, + { + "epoch": 0.03, + "learning_rate": 4.988667856998793e-05, + "loss": 0.9939, + "step": 30416 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886671099503534e-05, + "loss": 1.0528, + "step": 30417 + }, + { + "epoch": 0.03, + "learning_rate": 4.988666362877346e-05, + "loss": 1.3531, + "step": 30418 + }, + { + "epoch": 0.03, + "learning_rate": 4.988665615779771e-05, + "loss": 0.8761, + "step": 30419 + }, + { + "epoch": 0.03, + "learning_rate": 4.98866486865763e-05, + "loss": 0.9057, + "step": 30420 + }, + { + "epoch": 0.03, + "learning_rate": 4.988664121510921e-05, + "loss": 0.9434, + "step": 30421 + }, + { + "epoch": 0.03, + "learning_rate": 4.988663374339645e-05, + "loss": 0.7422, + "step": 30422 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886626271438026e-05, + "loss": 1.0685, + "step": 30423 + }, + { + "epoch": 0.03, + "learning_rate": 4.988661879923393e-05, + "loss": 1.315, + "step": 30424 + }, + { + "epoch": 0.03, + "learning_rate": 4.988661132678416e-05, + "loss": 0.9088, + "step": 30425 + }, + { + "epoch": 0.03, + "learning_rate": 4.988660385408872e-05, + "loss": 1.104, + "step": 30426 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886596381147615e-05, + "loss": 1.1951, + "step": 30427 + }, + { + "epoch": 0.03, + "learning_rate": 4.988658890796084e-05, + "loss": 0.816, + "step": 30428 + }, + { + "epoch": 0.03, + "learning_rate": 4.988658143452839e-05, + "loss": 0.9876, + "step": 30429 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886573960850266e-05, + "loss": 0.9168, + "step": 30430 + }, + { + "epoch": 0.03, + "learning_rate": 4.988656648692648e-05, + "loss": 0.9905, + "step": 30431 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886559012757027e-05, + "loss": 1.182, + "step": 30432 + }, + { + "epoch": 0.03, + "learning_rate": 4.98865515383419e-05, + "loss": 0.9935, + "step": 30433 + }, + { + "epoch": 0.03, + "learning_rate": 4.988654406368111e-05, + "loss": 0.8657, + "step": 30434 + }, + { + "epoch": 0.03, + "learning_rate": 4.988653658877464e-05, + "loss": 0.7408, + "step": 30435 + }, + { + "epoch": 0.03, + "learning_rate": 4.988652911362251e-05, + "loss": 0.9621, + "step": 30436 + }, + { + "epoch": 0.03, + "learning_rate": 4.98865216382247e-05, + "loss": 0.9885, + "step": 30437 + }, + { + "epoch": 0.03, + "learning_rate": 4.988651416258122e-05, + "loss": 1.0256, + "step": 30438 + }, + { + "epoch": 0.03, + "learning_rate": 4.988650668669208e-05, + "loss": 0.9647, + "step": 30439 + }, + { + "epoch": 0.03, + "learning_rate": 4.988649921055727e-05, + "loss": 0.8234, + "step": 30440 + }, + { + "epoch": 0.03, + "learning_rate": 4.988649173417679e-05, + "loss": 0.5206, + "step": 30441 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886484257550634e-05, + "loss": 0.766, + "step": 30442 + }, + { + "epoch": 0.03, + "learning_rate": 4.988647678067882e-05, + "loss": 0.4214, + "step": 30443 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886469303561334e-05, + "loss": 0.8505, + "step": 30444 + }, + { + "epoch": 0.03, + "learning_rate": 4.988646182619817e-05, + "loss": 1.0268, + "step": 30445 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886454348589354e-05, + "loss": 0.8361, + "step": 30446 + }, + { + "epoch": 0.03, + "learning_rate": 4.988644687073486e-05, + "loss": 1.1138, + "step": 30447 + }, + { + "epoch": 0.03, + "learning_rate": 4.98864393926347e-05, + "loss": 0.8205, + "step": 30448 + }, + { + "epoch": 0.03, + "learning_rate": 4.988643191428887e-05, + "loss": 0.9563, + "step": 30449 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886424435697366e-05, + "loss": 0.574, + "step": 30450 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886416956860196e-05, + "loss": 0.9863, + "step": 30451 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886409477777366e-05, + "loss": 0.7047, + "step": 30452 + }, + { + "epoch": 0.03, + "learning_rate": 4.988640199844886e-05, + "loss": 0.7565, + "step": 30453 + }, + { + "epoch": 0.03, + "learning_rate": 4.988639451887469e-05, + "loss": 0.9902, + "step": 30454 + }, + { + "epoch": 0.03, + "learning_rate": 4.988638703905485e-05, + "loss": 1.0765, + "step": 30455 + }, + { + "epoch": 0.03, + "learning_rate": 4.988637955898934e-05, + "loss": 1.0259, + "step": 30456 + }, + { + "epoch": 0.03, + "learning_rate": 4.988637207867817e-05, + "loss": 0.8864, + "step": 30457 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886364598121323e-05, + "loss": 1.0341, + "step": 30458 + }, + { + "epoch": 0.03, + "learning_rate": 4.988635711731881e-05, + "loss": 0.9685, + "step": 30459 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886349636270636e-05, + "loss": 0.9665, + "step": 30460 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886342154976785e-05, + "loss": 1.1273, + "step": 30461 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886334673437275e-05, + "loss": 1.0754, + "step": 30462 + }, + { + "epoch": 0.03, + "learning_rate": 4.988632719165209e-05, + "loss": 0.7055, + "step": 30463 + }, + { + "epoch": 0.03, + "learning_rate": 4.988631970962124e-05, + "loss": 0.9298, + "step": 30464 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886312227344724e-05, + "loss": 1.1543, + "step": 30465 + }, + { + "epoch": 0.03, + "learning_rate": 4.988630474482254e-05, + "loss": 0.8627, + "step": 30466 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886297262054696e-05, + "loss": 0.8864, + "step": 30467 + }, + { + "epoch": 0.03, + "learning_rate": 4.988628977904117e-05, + "loss": 0.961, + "step": 30468 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886282295781995e-05, + "loss": 1.0342, + "step": 30469 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886274812277145e-05, + "loss": 1.2027, + "step": 30470 + }, + { + "epoch": 0.03, + "learning_rate": 4.988626732852663e-05, + "loss": 0.8824, + "step": 30471 + }, + { + "epoch": 0.03, + "learning_rate": 4.988625984453044e-05, + "loss": 0.9542, + "step": 30472 + }, + { + "epoch": 0.03, + "learning_rate": 4.988625236028859e-05, + "loss": 1.0521, + "step": 30473 + }, + { + "epoch": 0.03, + "learning_rate": 4.988624487580107e-05, + "loss": 0.9454, + "step": 30474 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886237391067886e-05, + "loss": 0.8952, + "step": 30475 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886229906089036e-05, + "loss": 0.8995, + "step": 30476 + }, + { + "epoch": 0.03, + "learning_rate": 4.988622242086451e-05, + "loss": 0.4351, + "step": 30477 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886214935394335e-05, + "loss": 0.2577, + "step": 30478 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886207449678485e-05, + "loss": 0.6427, + "step": 30479 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886199963716975e-05, + "loss": 1.2303, + "step": 30480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886192477509784e-05, + "loss": 0.9121, + "step": 30481 + }, + { + "epoch": 0.03, + "learning_rate": 4.988618499105694e-05, + "loss": 1.165, + "step": 30482 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886177504358424e-05, + "loss": 1.0825, + "step": 30483 + }, + { + "epoch": 0.03, + "learning_rate": 4.988617001741425e-05, + "loss": 0.9241, + "step": 30484 + }, + { + "epoch": 0.03, + "learning_rate": 4.98861625302244e-05, + "loss": 1.1902, + "step": 30485 + }, + { + "epoch": 0.03, + "learning_rate": 4.988615504278889e-05, + "loss": 0.9957, + "step": 30486 + }, + { + "epoch": 0.03, + "learning_rate": 4.988614755510771e-05, + "loss": 0.9244, + "step": 30487 + }, + { + "epoch": 0.03, + "learning_rate": 4.988614006718087e-05, + "loss": 1.1216, + "step": 30488 + }, + { + "epoch": 0.03, + "learning_rate": 4.988613257900836e-05, + "loss": 1.0553, + "step": 30489 + }, + { + "epoch": 0.03, + "learning_rate": 4.988612509059019e-05, + "loss": 1.2569, + "step": 30490 + }, + { + "epoch": 0.03, + "learning_rate": 4.988611760192635e-05, + "loss": 1.0344, + "step": 30491 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886110113016846e-05, + "loss": 0.7129, + "step": 30492 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886102623861675e-05, + "loss": 1.0227, + "step": 30493 + }, + { + "epoch": 0.03, + "learning_rate": 4.988609513446084e-05, + "loss": 1.1031, + "step": 30494 + }, + { + "epoch": 0.03, + "learning_rate": 4.988608764481434e-05, + "loss": 0.9299, + "step": 30495 + }, + { + "epoch": 0.03, + "learning_rate": 4.988608015492218e-05, + "loss": 0.8762, + "step": 30496 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886072664784356e-05, + "loss": 1.1619, + "step": 30497 + }, + { + "epoch": 0.03, + "learning_rate": 4.988606517440086e-05, + "loss": 0.8972, + "step": 30498 + }, + { + "epoch": 0.03, + "learning_rate": 4.98860576837717e-05, + "loss": 0.9351, + "step": 30499 + }, + { + "epoch": 0.03, + "learning_rate": 4.988605019289688e-05, + "loss": 0.9589, + "step": 30500 + }, + { + "epoch": 0.03, + "eval_loss": 1.062410831451416, + "eval_runtime": 127.2932, + "eval_samples_per_second": 10.88, + "eval_steps_per_second": 5.444, + "step": 30500 + }, + { + "epoch": 0.03, + "learning_rate": 4.988604270177639e-05, + "loss": 0.6742, + "step": 30501 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886035210410234e-05, + "loss": 0.9181, + "step": 30502 + }, + { + "epoch": 0.03, + "learning_rate": 4.9886027718798424e-05, + "loss": 0.9277, + "step": 30503 + }, + { + "epoch": 0.03, + "learning_rate": 4.988602022694094e-05, + "loss": 1.1456, + "step": 30504 + }, + { + "epoch": 0.03, + "learning_rate": 4.98860127348378e-05, + "loss": 0.303, + "step": 30505 + }, + { + "epoch": 0.03, + "learning_rate": 4.988600524248898e-05, + "loss": 1.084, + "step": 30506 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885997749894506e-05, + "loss": 1.4629, + "step": 30507 + }, + { + "epoch": 0.03, + "learning_rate": 4.988599025705437e-05, + "loss": 0.589, + "step": 30508 + }, + { + "epoch": 0.03, + "learning_rate": 4.988598276396857e-05, + "loss": 0.7545, + "step": 30509 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885975270637104e-05, + "loss": 0.9913, + "step": 30510 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885967777059975e-05, + "loss": 0.9444, + "step": 30511 + }, + { + "epoch": 0.03, + "learning_rate": 4.988596028323718e-05, + "loss": 0.8748, + "step": 30512 + }, + { + "epoch": 0.03, + "learning_rate": 4.988595278916872e-05, + "loss": 0.9296, + "step": 30513 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885945294854594e-05, + "loss": 1.1932, + "step": 30514 + }, + { + "epoch": 0.03, + "learning_rate": 4.988593780029481e-05, + "loss": 0.9905, + "step": 30515 + }, + { + "epoch": 0.03, + "learning_rate": 4.988593030548937e-05, + "loss": 0.4419, + "step": 30516 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885922810438254e-05, + "loss": 0.7889, + "step": 30517 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885915315141485e-05, + "loss": 1.2019, + "step": 30518 + }, + { + "epoch": 0.03, + "learning_rate": 4.988590781959904e-05, + "loss": 1.063, + "step": 30519 + }, + { + "epoch": 0.03, + "learning_rate": 4.988590032381094e-05, + "loss": 0.874, + "step": 30520 + }, + { + "epoch": 0.03, + "learning_rate": 4.988589282777718e-05, + "loss": 1.0266, + "step": 30521 + }, + { + "epoch": 0.03, + "learning_rate": 4.988588533149775e-05, + "loss": 1.2642, + "step": 30522 + }, + { + "epoch": 0.03, + "learning_rate": 4.988587783497266e-05, + "loss": 1.3523, + "step": 30523 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885870338201907e-05, + "loss": 1.1599, + "step": 30524 + }, + { + "epoch": 0.03, + "learning_rate": 4.988586284118549e-05, + "loss": 0.9617, + "step": 30525 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885855343923417e-05, + "loss": 1.3953, + "step": 30526 + }, + { + "epoch": 0.03, + "learning_rate": 4.988584784641567e-05, + "loss": 1.348, + "step": 30527 + }, + { + "epoch": 0.03, + "learning_rate": 4.988584034866227e-05, + "loss": 1.0633, + "step": 30528 + }, + { + "epoch": 0.03, + "learning_rate": 4.98858328506632e-05, + "loss": 0.7868, + "step": 30529 + }, + { + "epoch": 0.03, + "learning_rate": 4.988582535241847e-05, + "loss": 1.1971, + "step": 30530 + }, + { + "epoch": 0.03, + "learning_rate": 4.988581785392808e-05, + "loss": 0.6082, + "step": 30531 + }, + { + "epoch": 0.03, + "learning_rate": 4.988581035519203e-05, + "loss": 0.9953, + "step": 30532 + }, + { + "epoch": 0.03, + "learning_rate": 4.988580285621031e-05, + "loss": 1.0583, + "step": 30533 + }, + { + "epoch": 0.03, + "learning_rate": 4.988579535698293e-05, + "loss": 1.1714, + "step": 30534 + }, + { + "epoch": 0.03, + "learning_rate": 4.988578785750989e-05, + "loss": 1.1749, + "step": 30535 + }, + { + "epoch": 0.03, + "learning_rate": 4.988578035779119e-05, + "loss": 0.9516, + "step": 30536 + }, + { + "epoch": 0.03, + "learning_rate": 4.988577285782682e-05, + "loss": 1.0658, + "step": 30537 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885765357616796e-05, + "loss": 1.2269, + "step": 30538 + }, + { + "epoch": 0.03, + "learning_rate": 4.988575785716111e-05, + "loss": 1.1412, + "step": 30539 + }, + { + "epoch": 0.03, + "learning_rate": 4.988575035645976e-05, + "loss": 1.0141, + "step": 30540 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885742855512754e-05, + "loss": 1.0834, + "step": 30541 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885735354320074e-05, + "loss": 0.6472, + "step": 30542 + }, + { + "epoch": 0.03, + "learning_rate": 4.988572785288174e-05, + "loss": 1.139, + "step": 30543 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885720351197746e-05, + "loss": 1.0964, + "step": 30544 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885712849268086e-05, + "loss": 1.0609, + "step": 30545 + }, + { + "epoch": 0.03, + "learning_rate": 4.988570534709277e-05, + "loss": 1.0681, + "step": 30546 + }, + { + "epoch": 0.03, + "learning_rate": 4.988569784467179e-05, + "loss": 1.0075, + "step": 30547 + }, + { + "epoch": 0.03, + "learning_rate": 4.988569034200515e-05, + "loss": 0.8369, + "step": 30548 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885682839092854e-05, + "loss": 0.9221, + "step": 30549 + }, + { + "epoch": 0.03, + "learning_rate": 4.988567533593489e-05, + "loss": 1.0704, + "step": 30550 + }, + { + "epoch": 0.03, + "learning_rate": 4.988566783253126e-05, + "loss": 0.7737, + "step": 30551 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885660328881975e-05, + "loss": 1.1384, + "step": 30552 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885652824987036e-05, + "loss": 1.0507, + "step": 30553 + }, + { + "epoch": 0.03, + "learning_rate": 4.988564532084643e-05, + "loss": 0.9285, + "step": 30554 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885637816460164e-05, + "loss": 0.833, + "step": 30555 + }, + { + "epoch": 0.03, + "learning_rate": 4.988563031182824e-05, + "loss": 0.8649, + "step": 30556 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885622806950646e-05, + "loss": 1.0348, + "step": 30557 + }, + { + "epoch": 0.03, + "learning_rate": 4.98856153018274e-05, + "loss": 1.1943, + "step": 30558 + }, + { + "epoch": 0.03, + "learning_rate": 4.988560779645849e-05, + "loss": 0.8494, + "step": 30559 + }, + { + "epoch": 0.03, + "learning_rate": 4.988560029084392e-05, + "loss": 1.0628, + "step": 30560 + }, + { + "epoch": 0.03, + "learning_rate": 4.988559278498369e-05, + "loss": 0.7865, + "step": 30561 + }, + { + "epoch": 0.03, + "learning_rate": 4.98855852788778e-05, + "loss": 1.2546, + "step": 30562 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885577772526255e-05, + "loss": 1.2787, + "step": 30563 + }, + { + "epoch": 0.03, + "learning_rate": 4.988557026592905e-05, + "loss": 1.111, + "step": 30564 + }, + { + "epoch": 0.03, + "learning_rate": 4.988556275908618e-05, + "loss": 1.0025, + "step": 30565 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885555251997654e-05, + "loss": 1.1355, + "step": 30566 + }, + { + "epoch": 0.03, + "learning_rate": 4.988554774466346e-05, + "loss": 1.0609, + "step": 30567 + }, + { + "epoch": 0.03, + "learning_rate": 4.988554023708362e-05, + "loss": 0.928, + "step": 30568 + }, + { + "epoch": 0.03, + "learning_rate": 4.988553272925811e-05, + "loss": 0.7891, + "step": 30569 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885525221186944e-05, + "loss": 0.6626, + "step": 30570 + }, + { + "epoch": 0.03, + "learning_rate": 4.988551771287011e-05, + "loss": 0.5382, + "step": 30571 + }, + { + "epoch": 0.03, + "learning_rate": 4.988551020430763e-05, + "loss": 0.7892, + "step": 30572 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885502695499486e-05, + "loss": 0.7419, + "step": 30573 + }, + { + "epoch": 0.03, + "learning_rate": 4.988549518644568e-05, + "loss": 0.8191, + "step": 30574 + }, + { + "epoch": 0.03, + "learning_rate": 4.988548767714621e-05, + "loss": 0.7508, + "step": 30575 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885480167601095e-05, + "loss": 0.8749, + "step": 30576 + }, + { + "epoch": 0.03, + "learning_rate": 4.988547265781032e-05, + "loss": 1.1206, + "step": 30577 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885465147773876e-05, + "loss": 0.9604, + "step": 30578 + }, + { + "epoch": 0.03, + "learning_rate": 4.988545763749177e-05, + "loss": 1.2183, + "step": 30579 + }, + { + "epoch": 0.03, + "learning_rate": 4.988545012696402e-05, + "loss": 1.2267, + "step": 30580 + }, + { + "epoch": 0.03, + "learning_rate": 4.98854426161906e-05, + "loss": 0.9436, + "step": 30581 + }, + { + "epoch": 0.03, + "learning_rate": 4.988543510517153e-05, + "loss": 0.7906, + "step": 30582 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885427593906796e-05, + "loss": 1.1821, + "step": 30583 + }, + { + "epoch": 0.03, + "learning_rate": 4.988542008239641e-05, + "loss": 1.0951, + "step": 30584 + }, + { + "epoch": 0.03, + "learning_rate": 4.988541257064036e-05, + "loss": 1.1501, + "step": 30585 + }, + { + "epoch": 0.03, + "learning_rate": 4.988540505863864e-05, + "loss": 1.2809, + "step": 30586 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885397546391275e-05, + "loss": 1.3834, + "step": 30587 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885390033898246e-05, + "loss": 1.0349, + "step": 30588 + }, + { + "epoch": 0.03, + "learning_rate": 4.988538252115957e-05, + "loss": 0.9283, + "step": 30589 + }, + { + "epoch": 0.03, + "learning_rate": 4.988537500817523e-05, + "loss": 0.8912, + "step": 30590 + }, + { + "epoch": 0.03, + "learning_rate": 4.988536749494523e-05, + "loss": 0.5636, + "step": 30591 + }, + { + "epoch": 0.03, + "learning_rate": 4.988535998146957e-05, + "loss": 1.1915, + "step": 30592 + }, + { + "epoch": 0.03, + "learning_rate": 4.988535246774826e-05, + "loss": 1.1891, + "step": 30593 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885344953781286e-05, + "loss": 0.884, + "step": 30594 + }, + { + "epoch": 0.03, + "learning_rate": 4.988533743956866e-05, + "loss": 1.0431, + "step": 30595 + }, + { + "epoch": 0.03, + "learning_rate": 4.988532992511037e-05, + "loss": 1.1533, + "step": 30596 + }, + { + "epoch": 0.03, + "learning_rate": 4.988532241040642e-05, + "loss": 1.4342, + "step": 30597 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885314895456825e-05, + "loss": 1.4455, + "step": 30598 + }, + { + "epoch": 0.03, + "learning_rate": 4.988530738026156e-05, + "loss": 1.1039, + "step": 30599 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885299864820645e-05, + "loss": 0.6407, + "step": 30600 + }, + { + "epoch": 0.03, + "learning_rate": 4.988529234913407e-05, + "loss": 0.6461, + "step": 30601 + }, + { + "epoch": 0.03, + "learning_rate": 4.988528483320184e-05, + "loss": 0.874, + "step": 30602 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885277317023946e-05, + "loss": 1.035, + "step": 30603 + }, + { + "epoch": 0.03, + "learning_rate": 4.988526980060041e-05, + "loss": 1.0288, + "step": 30604 + }, + { + "epoch": 0.03, + "learning_rate": 4.98852622839312e-05, + "loss": 0.8685, + "step": 30605 + }, + { + "epoch": 0.03, + "learning_rate": 4.988525476701634e-05, + "loss": 1.1442, + "step": 30606 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885247249855824e-05, + "loss": 0.8064, + "step": 30607 + }, + { + "epoch": 0.03, + "learning_rate": 4.988523973244966e-05, + "loss": 1.2525, + "step": 30608 + }, + { + "epoch": 0.03, + "learning_rate": 4.988523221479783e-05, + "loss": 1.1239, + "step": 30609 + }, + { + "epoch": 0.03, + "learning_rate": 4.988522469690035e-05, + "loss": 0.8973, + "step": 30610 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885217178757204e-05, + "loss": 1.0113, + "step": 30611 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885209660368406e-05, + "loss": 1.0434, + "step": 30612 + }, + { + "epoch": 0.03, + "learning_rate": 4.988520214173394e-05, + "loss": 0.8621, + "step": 30613 + }, + { + "epoch": 0.03, + "learning_rate": 4.988519462285384e-05, + "loss": 1.0373, + "step": 30614 + }, + { + "epoch": 0.03, + "learning_rate": 4.988518710372807e-05, + "loss": 1.1877, + "step": 30615 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885179584356643e-05, + "loss": 1.2804, + "step": 30616 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885172064739566e-05, + "loss": 1.2374, + "step": 30617 + }, + { + "epoch": 0.03, + "learning_rate": 4.988516454487683e-05, + "loss": 1.5123, + "step": 30618 + }, + { + "epoch": 0.03, + "learning_rate": 4.988515702476844e-05, + "loss": 1.0186, + "step": 30619 + }, + { + "epoch": 0.03, + "learning_rate": 4.98851495044144e-05, + "loss": 1.0073, + "step": 30620 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885141983814694e-05, + "loss": 1.1017, + "step": 30621 + }, + { + "epoch": 0.03, + "learning_rate": 4.988513446296933e-05, + "loss": 0.8871, + "step": 30622 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885126941878315e-05, + "loss": 0.9008, + "step": 30623 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885119420541646e-05, + "loss": 0.9932, + "step": 30624 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885111898959324e-05, + "loss": 1.1745, + "step": 30625 + }, + { + "epoch": 0.03, + "learning_rate": 4.988510437713135e-05, + "loss": 0.605, + "step": 30626 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885096855057714e-05, + "loss": 0.8405, + "step": 30627 + }, + { + "epoch": 0.03, + "learning_rate": 4.988508933273842e-05, + "loss": 0.9884, + "step": 30628 + }, + { + "epoch": 0.03, + "learning_rate": 4.988508181017347e-05, + "loss": 0.6843, + "step": 30629 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885074287362876e-05, + "loss": 0.8055, + "step": 30630 + }, + { + "epoch": 0.03, + "learning_rate": 4.988506676430662e-05, + "loss": 1.1032, + "step": 30631 + }, + { + "epoch": 0.03, + "learning_rate": 4.988505924100471e-05, + "loss": 0.8871, + "step": 30632 + }, + { + "epoch": 0.03, + "learning_rate": 4.988505171745714e-05, + "loss": 1.0735, + "step": 30633 + }, + { + "epoch": 0.03, + "learning_rate": 4.988504419366392e-05, + "loss": 0.9792, + "step": 30634 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885036669625054e-05, + "loss": 0.8541, + "step": 30635 + }, + { + "epoch": 0.03, + "learning_rate": 4.988502914534052e-05, + "loss": 0.8583, + "step": 30636 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885021620810334e-05, + "loss": 1.2462, + "step": 30637 + }, + { + "epoch": 0.03, + "learning_rate": 4.98850140960345e-05, + "loss": 1.3682, + "step": 30638 + }, + { + "epoch": 0.03, + "learning_rate": 4.9885006571013e-05, + "loss": 1.0604, + "step": 30639 + }, + { + "epoch": 0.03, + "learning_rate": 4.988499904574586e-05, + "loss": 0.7721, + "step": 30640 + }, + { + "epoch": 0.03, + "learning_rate": 4.988499152023306e-05, + "loss": 1.0694, + "step": 30641 + }, + { + "epoch": 0.03, + "learning_rate": 4.98849839944746e-05, + "loss": 0.9346, + "step": 30642 + }, + { + "epoch": 0.03, + "learning_rate": 4.988497646847049e-05, + "loss": 0.7156, + "step": 30643 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884968942220735e-05, + "loss": 0.7742, + "step": 30644 + }, + { + "epoch": 0.03, + "learning_rate": 4.988496141572531e-05, + "loss": 0.8534, + "step": 30645 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884953888984245e-05, + "loss": 1.0803, + "step": 30646 + }, + { + "epoch": 0.03, + "learning_rate": 4.988494636199752e-05, + "loss": 0.8803, + "step": 30647 + }, + { + "epoch": 0.03, + "learning_rate": 4.988493883476514e-05, + "loss": 0.7285, + "step": 30648 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884931307287106e-05, + "loss": 0.9309, + "step": 30649 + }, + { + "epoch": 0.03, + "learning_rate": 4.988492377956342e-05, + "loss": 1.2221, + "step": 30650 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884916251594086e-05, + "loss": 1.3587, + "step": 30651 + }, + { + "epoch": 0.03, + "learning_rate": 4.988490872337909e-05, + "loss": 1.4991, + "step": 30652 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884901194918446e-05, + "loss": 1.0185, + "step": 30653 + }, + { + "epoch": 0.03, + "learning_rate": 4.988489366621214e-05, + "loss": 0.8844, + "step": 30654 + }, + { + "epoch": 0.03, + "learning_rate": 4.988488613726019e-05, + "loss": 0.8778, + "step": 30655 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884878608062585e-05, + "loss": 1.3571, + "step": 30656 + }, + { + "epoch": 0.03, + "learning_rate": 4.988487107861933e-05, + "loss": 0.9396, + "step": 30657 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884863548930414e-05, + "loss": 1.2973, + "step": 30658 + }, + { + "epoch": 0.03, + "learning_rate": 4.988485601899585e-05, + "loss": 1.6291, + "step": 30659 + }, + { + "epoch": 0.03, + "learning_rate": 4.988484848881563e-05, + "loss": 1.8122, + "step": 30660 + }, + { + "epoch": 0.03, + "learning_rate": 4.988484095838977e-05, + "loss": 1.6608, + "step": 30661 + }, + { + "epoch": 0.03, + "learning_rate": 4.988483342771824e-05, + "loss": 1.3499, + "step": 30662 + }, + { + "epoch": 0.03, + "learning_rate": 4.988482589680107e-05, + "loss": 1.5528, + "step": 30663 + }, + { + "epoch": 0.03, + "learning_rate": 4.988481836563824e-05, + "loss": 1.2257, + "step": 30664 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884810834229754e-05, + "loss": 1.1688, + "step": 30665 + }, + { + "epoch": 0.03, + "learning_rate": 4.988480330257562e-05, + "loss": 0.9462, + "step": 30666 + }, + { + "epoch": 0.03, + "learning_rate": 4.988479577067584e-05, + "loss": 1.0802, + "step": 30667 + }, + { + "epoch": 0.03, + "learning_rate": 4.988478823853041e-05, + "loss": 1.1848, + "step": 30668 + }, + { + "epoch": 0.03, + "learning_rate": 4.988478070613931e-05, + "loss": 1.2646, + "step": 30669 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884773173502576e-05, + "loss": 0.8181, + "step": 30670 + }, + { + "epoch": 0.03, + "learning_rate": 4.988476564062018e-05, + "loss": 1.0545, + "step": 30671 + }, + { + "epoch": 0.03, + "learning_rate": 4.988475810749213e-05, + "loss": 1.0717, + "step": 30672 + }, + { + "epoch": 0.03, + "learning_rate": 4.988475057411844e-05, + "loss": 0.9111, + "step": 30673 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884743040499096e-05, + "loss": 1.5837, + "step": 30674 + }, + { + "epoch": 0.03, + "learning_rate": 4.988473550663409e-05, + "loss": 1.2105, + "step": 30675 + }, + { + "epoch": 0.03, + "learning_rate": 4.988472797252344e-05, + "loss": 0.9293, + "step": 30676 + }, + { + "epoch": 0.03, + "learning_rate": 4.988472043816713e-05, + "loss": 1.0433, + "step": 30677 + }, + { + "epoch": 0.03, + "learning_rate": 4.988471290356518e-05, + "loss": 0.9289, + "step": 30678 + }, + { + "epoch": 0.03, + "learning_rate": 4.988470536871757e-05, + "loss": 0.9399, + "step": 30679 + }, + { + "epoch": 0.03, + "learning_rate": 4.988469783362432e-05, + "loss": 0.8584, + "step": 30680 + }, + { + "epoch": 0.03, + "learning_rate": 4.988469029828541e-05, + "loss": 1.0891, + "step": 30681 + }, + { + "epoch": 0.03, + "learning_rate": 4.988468276270085e-05, + "loss": 0.9515, + "step": 30682 + }, + { + "epoch": 0.03, + "learning_rate": 4.988467522687064e-05, + "loss": 1.1136, + "step": 30683 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884667690794776e-05, + "loss": 0.8503, + "step": 30684 + }, + { + "epoch": 0.03, + "learning_rate": 4.988466015447326e-05, + "loss": 0.1559, + "step": 30685 + }, + { + "epoch": 0.03, + "learning_rate": 4.988465261790609e-05, + "loss": 1.1242, + "step": 30686 + }, + { + "epoch": 0.03, + "learning_rate": 4.988464508109328e-05, + "loss": 1.0728, + "step": 30687 + }, + { + "epoch": 0.03, + "learning_rate": 4.988463754403481e-05, + "loss": 0.8833, + "step": 30688 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884630006730695e-05, + "loss": 0.4325, + "step": 30689 + }, + { + "epoch": 0.03, + "learning_rate": 4.988462246918093e-05, + "loss": 0.6094, + "step": 30690 + }, + { + "epoch": 0.03, + "learning_rate": 4.988461493138551e-05, + "loss": 0.627, + "step": 30691 + }, + { + "epoch": 0.03, + "learning_rate": 4.988460739334444e-05, + "loss": 0.6011, + "step": 30692 + }, + { + "epoch": 0.03, + "learning_rate": 4.988459985505772e-05, + "loss": 0.531, + "step": 30693 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884592316525355e-05, + "loss": 0.4345, + "step": 30694 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884584777747336e-05, + "loss": 0.8148, + "step": 30695 + }, + { + "epoch": 0.03, + "learning_rate": 4.988457723872367e-05, + "loss": 1.0006, + "step": 30696 + }, + { + "epoch": 0.03, + "learning_rate": 4.988456969945435e-05, + "loss": 1.0489, + "step": 30697 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884562159939386e-05, + "loss": 1.4584, + "step": 30698 + }, + { + "epoch": 0.03, + "learning_rate": 4.988455462017876e-05, + "loss": 1.4354, + "step": 30699 + }, + { + "epoch": 0.03, + "learning_rate": 4.98845470801725e-05, + "loss": 0.8983, + "step": 30700 + }, + { + "epoch": 0.03, + "learning_rate": 4.988453953992057e-05, + "loss": 1.4133, + "step": 30701 + }, + { + "epoch": 0.03, + "learning_rate": 4.988453199942301e-05, + "loss": 1.4515, + "step": 30702 + }, + { + "epoch": 0.03, + "learning_rate": 4.988452445867979e-05, + "loss": 1.0514, + "step": 30703 + }, + { + "epoch": 0.03, + "learning_rate": 4.988451691769092e-05, + "loss": 0.9635, + "step": 30704 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884509376456405e-05, + "loss": 1.3388, + "step": 30705 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884501834976235e-05, + "loss": 1.0488, + "step": 30706 + }, + { + "epoch": 0.03, + "learning_rate": 4.988449429325042e-05, + "loss": 0.9885, + "step": 30707 + }, + { + "epoch": 0.03, + "learning_rate": 4.988448675127896e-05, + "loss": 1.0295, + "step": 30708 + }, + { + "epoch": 0.03, + "learning_rate": 4.988447920906185e-05, + "loss": 1.2971, + "step": 30709 + }, + { + "epoch": 0.03, + "learning_rate": 4.988447166659908e-05, + "loss": 1.0716, + "step": 30710 + }, + { + "epoch": 0.03, + "learning_rate": 4.988446412389067e-05, + "loss": 0.8338, + "step": 30711 + }, + { + "epoch": 0.03, + "learning_rate": 4.988445658093661e-05, + "loss": 0.8478, + "step": 30712 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884449037736904e-05, + "loss": 0.7238, + "step": 30713 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884441494291536e-05, + "loss": 0.9706, + "step": 30714 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884433950600536e-05, + "loss": 1.1998, + "step": 30715 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884426406663876e-05, + "loss": 0.8999, + "step": 30716 + }, + { + "epoch": 0.03, + "learning_rate": 4.988441886248158e-05, + "loss": 1.2387, + "step": 30717 + }, + { + "epoch": 0.03, + "learning_rate": 4.988441131805362e-05, + "loss": 1.0482, + "step": 30718 + }, + { + "epoch": 0.03, + "learning_rate": 4.988440377338002e-05, + "loss": 0.9968, + "step": 30719 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884396228460766e-05, + "loss": 1.0392, + "step": 30720 + }, + { + "epoch": 0.03, + "learning_rate": 4.988438868329587e-05, + "loss": 1.1175, + "step": 30721 + }, + { + "epoch": 0.03, + "learning_rate": 4.988438113788533e-05, + "loss": 1.0123, + "step": 30722 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884373592229125e-05, + "loss": 1.0363, + "step": 30723 + }, + { + "epoch": 0.03, + "learning_rate": 4.988436604632729e-05, + "loss": 1.1278, + "step": 30724 + }, + { + "epoch": 0.03, + "learning_rate": 4.98843585001798e-05, + "loss": 0.542, + "step": 30725 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884350953786654e-05, + "loss": 0.576, + "step": 30726 + }, + { + "epoch": 0.03, + "learning_rate": 4.988434340714787e-05, + "loss": 0.3973, + "step": 30727 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884335860263434e-05, + "loss": 0.3287, + "step": 30728 + }, + { + "epoch": 0.03, + "learning_rate": 4.988432831313335e-05, + "loss": 1.2414, + "step": 30729 + }, + { + "epoch": 0.03, + "learning_rate": 4.988432076575763e-05, + "loss": 1.4549, + "step": 30730 + }, + { + "epoch": 0.03, + "learning_rate": 4.988431321813625e-05, + "loss": 1.2731, + "step": 30731 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884305670269226e-05, + "loss": 1.0209, + "step": 30732 + }, + { + "epoch": 0.03, + "learning_rate": 4.988429812215656e-05, + "loss": 0.8647, + "step": 30733 + }, + { + "epoch": 0.03, + "learning_rate": 4.988429057379824e-05, + "loss": 0.6275, + "step": 30734 + }, + { + "epoch": 0.03, + "learning_rate": 4.988428302519427e-05, + "loss": 0.8931, + "step": 30735 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884275476344656e-05, + "loss": 0.8454, + "step": 30736 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884267927249396e-05, + "loss": 1.054, + "step": 30737 + }, + { + "epoch": 0.03, + "learning_rate": 4.988426037790849e-05, + "loss": 0.8033, + "step": 30738 + }, + { + "epoch": 0.03, + "learning_rate": 4.988425282832193e-05, + "loss": 0.7815, + "step": 30739 + }, + { + "epoch": 0.03, + "learning_rate": 4.988424527848973e-05, + "loss": 1.2287, + "step": 30740 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884237728411886e-05, + "loss": 0.895, + "step": 30741 + }, + { + "epoch": 0.03, + "learning_rate": 4.988423017808839e-05, + "loss": 1.2197, + "step": 30742 + }, + { + "epoch": 0.03, + "learning_rate": 4.988422262751925e-05, + "loss": 1.1645, + "step": 30743 + }, + { + "epoch": 0.03, + "learning_rate": 4.988421507670446e-05, + "loss": 0.9192, + "step": 30744 + }, + { + "epoch": 0.03, + "learning_rate": 4.988420752564403e-05, + "loss": 1.1278, + "step": 30745 + }, + { + "epoch": 0.03, + "learning_rate": 4.988419997433795e-05, + "loss": 0.894, + "step": 30746 + }, + { + "epoch": 0.03, + "learning_rate": 4.988419242278622e-05, + "loss": 0.8949, + "step": 30747 + }, + { + "epoch": 0.03, + "learning_rate": 4.988418487098885e-05, + "loss": 1.2168, + "step": 30748 + }, + { + "epoch": 0.03, + "learning_rate": 4.988417731894582e-05, + "loss": 1.4005, + "step": 30749 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884169766657165e-05, + "loss": 0.9872, + "step": 30750 + }, + { + "epoch": 0.03, + "learning_rate": 4.988416221412285e-05, + "loss": 1.2191, + "step": 30751 + }, + { + "epoch": 0.03, + "learning_rate": 4.988415466134289e-05, + "loss": 1.0413, + "step": 30752 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884147108317293e-05, + "loss": 0.9987, + "step": 30753 + }, + { + "epoch": 0.03, + "learning_rate": 4.988413955504604e-05, + "loss": 1.1717, + "step": 30754 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884132001529145e-05, + "loss": 1.0951, + "step": 30755 + }, + { + "epoch": 0.03, + "learning_rate": 4.98841244477666e-05, + "loss": 1.2327, + "step": 30756 + }, + { + "epoch": 0.03, + "learning_rate": 4.988411689375841e-05, + "loss": 0.7399, + "step": 30757 + }, + { + "epoch": 0.03, + "learning_rate": 4.988410933950458e-05, + "loss": 0.8827, + "step": 30758 + }, + { + "epoch": 0.03, + "learning_rate": 4.988410178500511e-05, + "loss": 1.0662, + "step": 30759 + }, + { + "epoch": 0.03, + "learning_rate": 4.988409423025998e-05, + "loss": 1.015, + "step": 30760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884086675269215e-05, + "loss": 0.9964, + "step": 30761 + }, + { + "epoch": 0.03, + "learning_rate": 4.98840791200328e-05, + "loss": 1.0911, + "step": 30762 + }, + { + "epoch": 0.03, + "learning_rate": 4.988407156455075e-05, + "loss": 0.9063, + "step": 30763 + }, + { + "epoch": 0.03, + "learning_rate": 4.988406400882304e-05, + "loss": 1.0577, + "step": 30764 + }, + { + "epoch": 0.03, + "learning_rate": 4.988405645284969e-05, + "loss": 0.5652, + "step": 30765 + }, + { + "epoch": 0.03, + "learning_rate": 4.98840488966307e-05, + "loss": 0.2647, + "step": 30766 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884041340166064e-05, + "loss": 0.1963, + "step": 30767 + }, + { + "epoch": 0.03, + "learning_rate": 4.988403378345578e-05, + "loss": 0.1829, + "step": 30768 + }, + { + "epoch": 0.03, + "learning_rate": 4.988402622649985e-05, + "loss": 0.2239, + "step": 30769 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884018669298284e-05, + "loss": 0.178, + "step": 30770 + }, + { + "epoch": 0.03, + "learning_rate": 4.988401111185106e-05, + "loss": 0.147, + "step": 30771 + }, + { + "epoch": 0.03, + "learning_rate": 4.98840035541582e-05, + "loss": 0.1481, + "step": 30772 + }, + { + "epoch": 0.03, + "learning_rate": 4.988399599621969e-05, + "loss": 0.1164, + "step": 30773 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883988438035546e-05, + "loss": 0.1026, + "step": 30774 + }, + { + "epoch": 0.03, + "learning_rate": 4.988398087960575e-05, + "loss": 0.1238, + "step": 30775 + }, + { + "epoch": 0.03, + "learning_rate": 4.988397332093031e-05, + "loss": 0.4293, + "step": 30776 + }, + { + "epoch": 0.03, + "learning_rate": 4.988396576200923e-05, + "loss": 0.7289, + "step": 30777 + }, + { + "epoch": 0.03, + "learning_rate": 4.988395820284251e-05, + "loss": 0.5855, + "step": 30778 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883950643430135e-05, + "loss": 0.5322, + "step": 30779 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883943083772124e-05, + "loss": 0.462, + "step": 30780 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883935523868466e-05, + "loss": 0.3822, + "step": 30781 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883927963719155e-05, + "loss": 0.3764, + "step": 30782 + }, + { + "epoch": 0.03, + "learning_rate": 4.988392040332421e-05, + "loss": 0.2117, + "step": 30783 + }, + { + "epoch": 0.03, + "learning_rate": 4.988391284268362e-05, + "loss": 0.152, + "step": 30784 + }, + { + "epoch": 0.03, + "learning_rate": 4.988390528179739e-05, + "loss": 0.124, + "step": 30785 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883897720665515e-05, + "loss": 0.0983, + "step": 30786 + }, + { + "epoch": 0.03, + "learning_rate": 4.988389015928799e-05, + "loss": 0.071, + "step": 30787 + }, + { + "epoch": 0.03, + "learning_rate": 4.988388259766483e-05, + "loss": 0.2442, + "step": 30788 + }, + { + "epoch": 0.03, + "learning_rate": 4.988387503579603e-05, + "loss": 0.8912, + "step": 30789 + }, + { + "epoch": 0.03, + "learning_rate": 4.988386747368158e-05, + "loss": 0.9765, + "step": 30790 + }, + { + "epoch": 0.03, + "learning_rate": 4.988385991132149e-05, + "loss": 0.9077, + "step": 30791 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883852348715754e-05, + "loss": 0.939, + "step": 30792 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883844785864374e-05, + "loss": 1.0323, + "step": 30793 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883837222767354e-05, + "loss": 0.9328, + "step": 30794 + }, + { + "epoch": 0.03, + "learning_rate": 4.988382965942469e-05, + "loss": 1.1524, + "step": 30795 + }, + { + "epoch": 0.03, + "learning_rate": 4.988382209583638e-05, + "loss": 0.9159, + "step": 30796 + }, + { + "epoch": 0.03, + "learning_rate": 4.988381453200244e-05, + "loss": 0.9552, + "step": 30797 + }, + { + "epoch": 0.03, + "learning_rate": 4.988380696792284e-05, + "loss": 1.0919, + "step": 30798 + }, + { + "epoch": 0.03, + "learning_rate": 4.988379940359761e-05, + "loss": 1.0775, + "step": 30799 + }, + { + "epoch": 0.03, + "learning_rate": 4.988379183902673e-05, + "loss": 0.9526, + "step": 30800 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883784274210214e-05, + "loss": 1.1698, + "step": 30801 + }, + { + "epoch": 0.03, + "learning_rate": 4.988377670914806e-05, + "loss": 0.9151, + "step": 30802 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883769143840255e-05, + "loss": 0.777, + "step": 30803 + }, + { + "epoch": 0.03, + "learning_rate": 4.98837615782868e-05, + "loss": 0.6588, + "step": 30804 + }, + { + "epoch": 0.03, + "learning_rate": 4.988375401248772e-05, + "loss": 1.2157, + "step": 30805 + }, + { + "epoch": 0.03, + "learning_rate": 4.988374644644299e-05, + "loss": 1.0752, + "step": 30806 + }, + { + "epoch": 0.03, + "learning_rate": 4.988373888015262e-05, + "loss": 1.0536, + "step": 30807 + }, + { + "epoch": 0.03, + "learning_rate": 4.988373131361661e-05, + "loss": 1.1489, + "step": 30808 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883723746834954e-05, + "loss": 1.0571, + "step": 30809 + }, + { + "epoch": 0.03, + "learning_rate": 4.988371617980766e-05, + "loss": 0.8992, + "step": 30810 + }, + { + "epoch": 0.03, + "learning_rate": 4.988370861253472e-05, + "loss": 1.0226, + "step": 30811 + }, + { + "epoch": 0.03, + "learning_rate": 4.988370104501614e-05, + "loss": 0.7734, + "step": 30812 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883693477251924e-05, + "loss": 0.9067, + "step": 30813 + }, + { + "epoch": 0.03, + "learning_rate": 4.988368590924206e-05, + "loss": 0.8182, + "step": 30814 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883678340986555e-05, + "loss": 0.3483, + "step": 30815 + }, + { + "epoch": 0.03, + "learning_rate": 4.988367077248541e-05, + "loss": 0.594, + "step": 30816 + }, + { + "epoch": 0.03, + "learning_rate": 4.988366320373863e-05, + "loss": 1.1313, + "step": 30817 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883655634746204e-05, + "loss": 1.1897, + "step": 30818 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883648065508135e-05, + "loss": 0.977, + "step": 30819 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883640496024426e-05, + "loss": 1.0866, + "step": 30820 + }, + { + "epoch": 0.03, + "learning_rate": 4.988363292629508e-05, + "loss": 1.1767, + "step": 30821 + }, + { + "epoch": 0.03, + "learning_rate": 4.988362535632008e-05, + "loss": 1.0421, + "step": 30822 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883617786099455e-05, + "loss": 0.9359, + "step": 30823 + }, + { + "epoch": 0.03, + "learning_rate": 4.988361021563319e-05, + "loss": 0.7084, + "step": 30824 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883602644921274e-05, + "loss": 1.358, + "step": 30825 + }, + { + "epoch": 0.03, + "learning_rate": 4.988359507396372e-05, + "loss": 1.149, + "step": 30826 + }, + { + "epoch": 0.03, + "learning_rate": 4.988358750276053e-05, + "loss": 1.0756, + "step": 30827 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883579931311696e-05, + "loss": 1.1956, + "step": 30828 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883572359617224e-05, + "loss": 1.5688, + "step": 30829 + }, + { + "epoch": 0.03, + "learning_rate": 4.988356478767711e-05, + "loss": 0.8604, + "step": 30830 + }, + { + "epoch": 0.03, + "learning_rate": 4.988355721549136e-05, + "loss": 0.8707, + "step": 30831 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883549643059965e-05, + "loss": 0.9937, + "step": 30832 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883542070382935e-05, + "loss": 0.8325, + "step": 30833 + }, + { + "epoch": 0.03, + "learning_rate": 4.988353449746026e-05, + "loss": 1.004, + "step": 30834 + }, + { + "epoch": 0.03, + "learning_rate": 4.988352692429194e-05, + "loss": 1.0539, + "step": 30835 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883519350877994e-05, + "loss": 0.98, + "step": 30836 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883511777218406e-05, + "loss": 1.102, + "step": 30837 + }, + { + "epoch": 0.03, + "learning_rate": 4.988350420331317e-05, + "loss": 0.8767, + "step": 30838 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883496629162304e-05, + "loss": 0.9658, + "step": 30839 + }, + { + "epoch": 0.03, + "learning_rate": 4.988348905476579e-05, + "loss": 0.989, + "step": 30840 + }, + { + "epoch": 0.03, + "learning_rate": 4.988348148012364e-05, + "loss": 0.9126, + "step": 30841 + }, + { + "epoch": 0.03, + "learning_rate": 4.988347390523586e-05, + "loss": 0.8621, + "step": 30842 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883466330102426e-05, + "loss": 0.9279, + "step": 30843 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883458754723354e-05, + "loss": 0.8736, + "step": 30844 + }, + { + "epoch": 0.03, + "learning_rate": 4.988345117909865e-05, + "loss": 0.9437, + "step": 30845 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883443603228305e-05, + "loss": 1.0656, + "step": 30846 + }, + { + "epoch": 0.03, + "learning_rate": 4.988343602711232e-05, + "loss": 0.9205, + "step": 30847 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883428450750705e-05, + "loss": 1.1868, + "step": 30848 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883420874143435e-05, + "loss": 1.2871, + "step": 30849 + }, + { + "epoch": 0.03, + "learning_rate": 4.988341329729054e-05, + "loss": 1.2622, + "step": 30850 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883405720192e-05, + "loss": 1.1967, + "step": 30851 + }, + { + "epoch": 0.03, + "learning_rate": 4.988339814284782e-05, + "loss": 1.1167, + "step": 30852 + }, + { + "epoch": 0.03, + "learning_rate": 4.988339056525801e-05, + "loss": 0.9046, + "step": 30853 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883382987422554e-05, + "loss": 0.6961, + "step": 30854 + }, + { + "epoch": 0.03, + "learning_rate": 4.988337540934146e-05, + "loss": 1.0732, + "step": 30855 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883367831014735e-05, + "loss": 1.0166, + "step": 30856 + }, + { + "epoch": 0.03, + "learning_rate": 4.988336025244236e-05, + "loss": 0.8903, + "step": 30857 + }, + { + "epoch": 0.03, + "learning_rate": 4.988335267362436e-05, + "loss": 0.9497, + "step": 30858 + }, + { + "epoch": 0.03, + "learning_rate": 4.988334509456071e-05, + "loss": 0.7152, + "step": 30859 + }, + { + "epoch": 0.03, + "learning_rate": 4.988333751525143e-05, + "loss": 1.1104, + "step": 30860 + }, + { + "epoch": 0.03, + "learning_rate": 4.988332993569651e-05, + "loss": 0.983, + "step": 30861 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883322355895954e-05, + "loss": 0.9386, + "step": 30862 + }, + { + "epoch": 0.03, + "learning_rate": 4.988331477584976e-05, + "loss": 1.1622, + "step": 30863 + }, + { + "epoch": 0.03, + "learning_rate": 4.988330719555793e-05, + "loss": 0.8267, + "step": 30864 + }, + { + "epoch": 0.03, + "learning_rate": 4.988329961502045e-05, + "loss": 0.4029, + "step": 30865 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883292034237345e-05, + "loss": 0.7313, + "step": 30866 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883284453208604e-05, + "loss": 1.3283, + "step": 30867 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883276871934217e-05, + "loss": 0.8308, + "step": 30868 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883269290414197e-05, + "loss": 0.6896, + "step": 30869 + }, + { + "epoch": 0.03, + "learning_rate": 4.988326170864854e-05, + "loss": 0.9304, + "step": 30870 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883254126637245e-05, + "loss": 0.9377, + "step": 30871 + }, + { + "epoch": 0.03, + "learning_rate": 4.988324654438031e-05, + "loss": 0.8895, + "step": 30872 + }, + { + "epoch": 0.03, + "learning_rate": 4.988323896187774e-05, + "loss": 0.9685, + "step": 30873 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883231379129544e-05, + "loss": 1.1858, + "step": 30874 + }, + { + "epoch": 0.03, + "learning_rate": 4.98832237961357e-05, + "loss": 0.8138, + "step": 30875 + }, + { + "epoch": 0.03, + "learning_rate": 4.988321621289622e-05, + "loss": 0.8471, + "step": 30876 + }, + { + "epoch": 0.03, + "learning_rate": 4.988320862941111e-05, + "loss": 1.1149, + "step": 30877 + }, + { + "epoch": 0.03, + "learning_rate": 4.988320104568035e-05, + "loss": 1.0742, + "step": 30878 + }, + { + "epoch": 0.03, + "learning_rate": 4.988319346170396e-05, + "loss": 1.1216, + "step": 30879 + }, + { + "epoch": 0.03, + "learning_rate": 4.988318587748194e-05, + "loss": 0.7568, + "step": 30880 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883178293014274e-05, + "loss": 1.0571, + "step": 30881 + }, + { + "epoch": 0.03, + "learning_rate": 4.988317070830099e-05, + "loss": 0.9979, + "step": 30882 + }, + { + "epoch": 0.03, + "learning_rate": 4.988316312334205e-05, + "loss": 1.0705, + "step": 30883 + }, + { + "epoch": 0.03, + "learning_rate": 4.988315553813748e-05, + "loss": 1.4355, + "step": 30884 + }, + { + "epoch": 0.03, + "learning_rate": 4.988314795268727e-05, + "loss": 1.0151, + "step": 30885 + }, + { + "epoch": 0.03, + "learning_rate": 4.988314036699143e-05, + "loss": 1.1235, + "step": 30886 + }, + { + "epoch": 0.03, + "learning_rate": 4.988313278104996e-05, + "loss": 1.0384, + "step": 30887 + }, + { + "epoch": 0.03, + "learning_rate": 4.988312519486285e-05, + "loss": 1.0041, + "step": 30888 + }, + { + "epoch": 0.03, + "learning_rate": 4.98831176084301e-05, + "loss": 1.1394, + "step": 30889 + }, + { + "epoch": 0.03, + "learning_rate": 4.988311002175171e-05, + "loss": 1.0498, + "step": 30890 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883102434827686e-05, + "loss": 0.9868, + "step": 30891 + }, + { + "epoch": 0.03, + "learning_rate": 4.988309484765803e-05, + "loss": 1.248, + "step": 30892 + }, + { + "epoch": 0.03, + "learning_rate": 4.988308726024274e-05, + "loss": 1.1788, + "step": 30893 + }, + { + "epoch": 0.03, + "learning_rate": 4.988307967258181e-05, + "loss": 1.0357, + "step": 30894 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883072084675255e-05, + "loss": 0.8957, + "step": 30895 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883064496523055e-05, + "loss": 1.2091, + "step": 30896 + }, + { + "epoch": 0.03, + "learning_rate": 4.988305690812522e-05, + "loss": 1.1129, + "step": 30897 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883049319481757e-05, + "loss": 0.8051, + "step": 30898 + }, + { + "epoch": 0.03, + "learning_rate": 4.988304173059266e-05, + "loss": 1.0976, + "step": 30899 + }, + { + "epoch": 0.03, + "learning_rate": 4.988303414145792e-05, + "loss": 0.9567, + "step": 30900 + }, + { + "epoch": 0.03, + "learning_rate": 4.988302655207755e-05, + "loss": 1.2036, + "step": 30901 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883018962451546e-05, + "loss": 1.0303, + "step": 30902 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883011372579903e-05, + "loss": 0.7258, + "step": 30903 + }, + { + "epoch": 0.03, + "learning_rate": 4.988300378246262e-05, + "loss": 0.6816, + "step": 30904 + }, + { + "epoch": 0.03, + "learning_rate": 4.988299619209972e-05, + "loss": 0.7713, + "step": 30905 + }, + { + "epoch": 0.03, + "learning_rate": 4.988298860149117e-05, + "loss": 1.0189, + "step": 30906 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882981010636983e-05, + "loss": 0.8823, + "step": 30907 + }, + { + "epoch": 0.03, + "learning_rate": 4.988297341953718e-05, + "loss": 1.0655, + "step": 30908 + }, + { + "epoch": 0.03, + "learning_rate": 4.988296582819173e-05, + "loss": 1.0257, + "step": 30909 + }, + { + "epoch": 0.03, + "learning_rate": 4.988295823660065e-05, + "loss": 0.8649, + "step": 30910 + }, + { + "epoch": 0.03, + "learning_rate": 4.988295064476393e-05, + "loss": 0.9377, + "step": 30911 + }, + { + "epoch": 0.03, + "learning_rate": 4.988294305268159e-05, + "loss": 0.4873, + "step": 30912 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882935460353596e-05, + "loss": 2.4373, + "step": 30913 + }, + { + "epoch": 0.03, + "learning_rate": 4.988292786777998e-05, + "loss": 2.1283, + "step": 30914 + }, + { + "epoch": 0.03, + "learning_rate": 4.988292027496073e-05, + "loss": 1.1733, + "step": 30915 + }, + { + "epoch": 0.03, + "learning_rate": 4.988291268189585e-05, + "loss": 1.0675, + "step": 30916 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882905088585325e-05, + "loss": 1.4456, + "step": 30917 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882897495029177e-05, + "loss": 1.0112, + "step": 30918 + }, + { + "epoch": 0.03, + "learning_rate": 4.988288990122739e-05, + "loss": 1.0282, + "step": 30919 + }, + { + "epoch": 0.03, + "learning_rate": 4.988288230717997e-05, + "loss": 1.4935, + "step": 30920 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882874712886915e-05, + "loss": 1.2815, + "step": 30921 + }, + { + "epoch": 0.03, + "learning_rate": 4.988286711834823e-05, + "loss": 1.1714, + "step": 30922 + }, + { + "epoch": 0.03, + "learning_rate": 4.988285952356391e-05, + "loss": 1.1801, + "step": 30923 + }, + { + "epoch": 0.03, + "learning_rate": 4.988285192853396e-05, + "loss": 1.3389, + "step": 30924 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882844333258374e-05, + "loss": 1.2132, + "step": 30925 + }, + { + "epoch": 0.03, + "learning_rate": 4.988283673773716e-05, + "loss": 1.2709, + "step": 30926 + }, + { + "epoch": 0.03, + "learning_rate": 4.988282914197031e-05, + "loss": 1.027, + "step": 30927 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882821545957824e-05, + "loss": 1.1039, + "step": 30928 + }, + { + "epoch": 0.03, + "learning_rate": 4.98828139496997e-05, + "loss": 1.0166, + "step": 30929 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882806353195953e-05, + "loss": 0.895, + "step": 30930 + }, + { + "epoch": 0.03, + "learning_rate": 4.988279875644657e-05, + "loss": 0.4044, + "step": 30931 + }, + { + "epoch": 0.03, + "learning_rate": 4.988279115945156e-05, + "loss": 0.5636, + "step": 30932 + }, + { + "epoch": 0.03, + "learning_rate": 4.988278356221091e-05, + "loss": 0.9179, + "step": 30933 + }, + { + "epoch": 0.03, + "learning_rate": 4.988277596472464e-05, + "loss": 1.3104, + "step": 30934 + }, + { + "epoch": 0.03, + "learning_rate": 4.988276836699273e-05, + "loss": 1.2106, + "step": 30935 + }, + { + "epoch": 0.03, + "learning_rate": 4.988276076901518e-05, + "loss": 1.2803, + "step": 30936 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882753170792004e-05, + "loss": 0.6509, + "step": 30937 + }, + { + "epoch": 0.03, + "learning_rate": 4.98827455723232e-05, + "loss": 0.9695, + "step": 30938 + }, + { + "epoch": 0.03, + "learning_rate": 4.988273797360876e-05, + "loss": 1.0386, + "step": 30939 + }, + { + "epoch": 0.03, + "learning_rate": 4.988273037464868e-05, + "loss": 1.0993, + "step": 30940 + }, + { + "epoch": 0.03, + "learning_rate": 4.988272277544298e-05, + "loss": 1.1072, + "step": 30941 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882715175991644e-05, + "loss": 1.0058, + "step": 30942 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882707576294677e-05, + "loss": 1.0063, + "step": 30943 + }, + { + "epoch": 0.03, + "learning_rate": 4.988269997635208e-05, + "loss": 0.5996, + "step": 30944 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882692376163856e-05, + "loss": 1.251, + "step": 30945 + }, + { + "epoch": 0.03, + "learning_rate": 4.988268477572999e-05, + "loss": 1.4792, + "step": 30946 + }, + { + "epoch": 0.03, + "learning_rate": 4.98826771750505e-05, + "loss": 0.9442, + "step": 30947 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882669574125374e-05, + "loss": 0.9332, + "step": 30948 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882661972954616e-05, + "loss": 0.6913, + "step": 30949 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882654371538226e-05, + "loss": 0.9789, + "step": 30950 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882646769876216e-05, + "loss": 0.8085, + "step": 30951 + }, + { + "epoch": 0.03, + "learning_rate": 4.988263916796857e-05, + "loss": 1.0667, + "step": 30952 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882631565815285e-05, + "loss": 1.8214, + "step": 30953 + }, + { + "epoch": 0.03, + "learning_rate": 4.988262396341638e-05, + "loss": 1.1644, + "step": 30954 + }, + { + "epoch": 0.03, + "learning_rate": 4.988261636077184e-05, + "loss": 1.1427, + "step": 30955 + }, + { + "epoch": 0.03, + "learning_rate": 4.988260875788166e-05, + "loss": 1.3241, + "step": 30956 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882601154745864e-05, + "loss": 0.9922, + "step": 30957 + }, + { + "epoch": 0.03, + "learning_rate": 4.988259355136443e-05, + "loss": 0.9675, + "step": 30958 + }, + { + "epoch": 0.03, + "learning_rate": 4.988258594773737e-05, + "loss": 0.9938, + "step": 30959 + }, + { + "epoch": 0.03, + "learning_rate": 4.988257834386467e-05, + "loss": 1.2241, + "step": 30960 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882570739746345e-05, + "loss": 1.0492, + "step": 30961 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882563135382395e-05, + "loss": 0.6426, + "step": 30962 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882555530772806e-05, + "loss": 0.855, + "step": 30963 + }, + { + "epoch": 0.03, + "learning_rate": 4.988254792591759e-05, + "loss": 0.9148, + "step": 30964 + }, + { + "epoch": 0.03, + "learning_rate": 4.988254032081675e-05, + "loss": 0.972, + "step": 30965 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882532715470275e-05, + "loss": 0.9347, + "step": 30966 + }, + { + "epoch": 0.03, + "learning_rate": 4.988252510987817e-05, + "loss": 1.3056, + "step": 30967 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882517504040435e-05, + "loss": 1.072, + "step": 30968 + }, + { + "epoch": 0.03, + "learning_rate": 4.988250989795707e-05, + "loss": 1.0219, + "step": 30969 + }, + { + "epoch": 0.03, + "learning_rate": 4.988250229162808e-05, + "loss": 0.9766, + "step": 30970 + }, + { + "epoch": 0.03, + "learning_rate": 4.988249468505345e-05, + "loss": 1.0861, + "step": 30971 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882487078233196e-05, + "loss": 1.0784, + "step": 30972 + }, + { + "epoch": 0.03, + "learning_rate": 4.988247947116732e-05, + "loss": 1.4828, + "step": 30973 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882471863855804e-05, + "loss": 1.1959, + "step": 30974 + }, + { + "epoch": 0.03, + "learning_rate": 4.988246425629867e-05, + "loss": 1.0805, + "step": 30975 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882456648495894e-05, + "loss": 0.6388, + "step": 30976 + }, + { + "epoch": 0.03, + "learning_rate": 4.98824490404475e-05, + "loss": 0.9993, + "step": 30977 + }, + { + "epoch": 0.03, + "learning_rate": 4.988244143215347e-05, + "loss": 0.8818, + "step": 30978 + }, + { + "epoch": 0.03, + "learning_rate": 4.988243382361381e-05, + "loss": 1.5963, + "step": 30979 + }, + { + "epoch": 0.03, + "learning_rate": 4.988242621482853e-05, + "loss": 1.1106, + "step": 30980 + }, + { + "epoch": 0.03, + "learning_rate": 4.988241860579761e-05, + "loss": 1.1211, + "step": 30981 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882410996521065e-05, + "loss": 0.7224, + "step": 30982 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882403386998895e-05, + "loss": 0.2853, + "step": 30983 + }, + { + "epoch": 0.03, + "learning_rate": 4.988239577723109e-05, + "loss": 0.1742, + "step": 30984 + }, + { + "epoch": 0.03, + "learning_rate": 4.988238816721766e-05, + "loss": 0.1263, + "step": 30985 + }, + { + "epoch": 0.03, + "learning_rate": 4.988238055695861e-05, + "loss": 0.036, + "step": 30986 + }, + { + "epoch": 0.03, + "learning_rate": 4.988237294645391e-05, + "loss": 0.0554, + "step": 30987 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882365335703605e-05, + "loss": 0.2686, + "step": 30988 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882357724707665e-05, + "loss": 1.0851, + "step": 30989 + }, + { + "epoch": 0.03, + "learning_rate": 4.988235011346609e-05, + "loss": 0.6612, + "step": 30990 + }, + { + "epoch": 0.03, + "learning_rate": 4.988234250197889e-05, + "loss": 0.8276, + "step": 30991 + }, + { + "epoch": 0.03, + "learning_rate": 4.988233489024606e-05, + "loss": 0.8289, + "step": 30992 + }, + { + "epoch": 0.03, + "learning_rate": 4.988232727826761e-05, + "loss": 1.2702, + "step": 30993 + }, + { + "epoch": 0.03, + "learning_rate": 4.988231966604353e-05, + "loss": 0.7934, + "step": 30994 + }, + { + "epoch": 0.03, + "learning_rate": 4.988231205357381e-05, + "loss": 0.7554, + "step": 30995 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882304440858476e-05, + "loss": 1.058, + "step": 30996 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882296827897515e-05, + "loss": 1.2017, + "step": 30997 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882289214690914e-05, + "loss": 0.8772, + "step": 30998 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882281601238694e-05, + "loss": 1.1035, + "step": 30999 + }, + { + "epoch": 0.03, + "learning_rate": 4.988227398754085e-05, + "loss": 0.9526, + "step": 31000 + }, + { + "epoch": 0.03, + "eval_loss": 1.0707390308380127, + "eval_runtime": 126.0963, + "eval_samples_per_second": 10.984, + "eval_steps_per_second": 5.496, + "step": 31000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882266373597375e-05, + "loss": 0.7795, + "step": 31001 + }, + { + "epoch": 0.03, + "learning_rate": 4.988225875940827e-05, + "loss": 1.2093, + "step": 31002 + }, + { + "epoch": 0.03, + "learning_rate": 4.988225114497353e-05, + "loss": 0.979, + "step": 31003 + }, + { + "epoch": 0.03, + "learning_rate": 4.988224353029318e-05, + "loss": 0.9168, + "step": 31004 + }, + { + "epoch": 0.03, + "learning_rate": 4.98822359153672e-05, + "loss": 1.0257, + "step": 31005 + }, + { + "epoch": 0.03, + "learning_rate": 4.988222830019559e-05, + "loss": 1.0683, + "step": 31006 + }, + { + "epoch": 0.03, + "learning_rate": 4.988222068477835e-05, + "loss": 1.2736, + "step": 31007 + }, + { + "epoch": 0.03, + "learning_rate": 4.988221306911548e-05, + "loss": 0.876, + "step": 31008 + }, + { + "epoch": 0.03, + "learning_rate": 4.988220545320699e-05, + "loss": 1.008, + "step": 31009 + }, + { + "epoch": 0.03, + "learning_rate": 4.988219783705287e-05, + "loss": 0.9431, + "step": 31010 + }, + { + "epoch": 0.03, + "learning_rate": 4.988219022065313e-05, + "loss": 0.8674, + "step": 31011 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882182604007755e-05, + "loss": 0.6015, + "step": 31012 + }, + { + "epoch": 0.03, + "learning_rate": 4.988217498711676e-05, + "loss": 1.0582, + "step": 31013 + }, + { + "epoch": 0.03, + "learning_rate": 4.988216736998014e-05, + "loss": 1.0658, + "step": 31014 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882159752597884e-05, + "loss": 0.994, + "step": 31015 + }, + { + "epoch": 0.03, + "learning_rate": 4.988215213497001e-05, + "loss": 1.0014, + "step": 31016 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882144517096506e-05, + "loss": 0.8718, + "step": 31017 + }, + { + "epoch": 0.03, + "learning_rate": 4.988213689897738e-05, + "loss": 1.1802, + "step": 31018 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882129280612624e-05, + "loss": 0.9197, + "step": 31019 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882121662002244e-05, + "loss": 0.8151, + "step": 31020 + }, + { + "epoch": 0.03, + "learning_rate": 4.988211404314623e-05, + "loss": 1.0026, + "step": 31021 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882106424044607e-05, + "loss": 1.0432, + "step": 31022 + }, + { + "epoch": 0.03, + "learning_rate": 4.988209880469734e-05, + "loss": 0.9839, + "step": 31023 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882091185104465e-05, + "loss": 0.9435, + "step": 31024 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882083565265955e-05, + "loss": 1.1737, + "step": 31025 + }, + { + "epoch": 0.03, + "learning_rate": 4.988207594518182e-05, + "loss": 1.0965, + "step": 31026 + }, + { + "epoch": 0.03, + "learning_rate": 4.988206832485206e-05, + "loss": 1.1231, + "step": 31027 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882060704276676e-05, + "loss": 0.9103, + "step": 31028 + }, + { + "epoch": 0.03, + "learning_rate": 4.988205308345566e-05, + "loss": 0.9904, + "step": 31029 + }, + { + "epoch": 0.03, + "learning_rate": 4.988204546238903e-05, + "loss": 1.0698, + "step": 31030 + }, + { + "epoch": 0.03, + "learning_rate": 4.988203784107677e-05, + "loss": 0.6189, + "step": 31031 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882030219518885e-05, + "loss": 0.8418, + "step": 31032 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882022597715374e-05, + "loss": 0.3539, + "step": 31033 + }, + { + "epoch": 0.03, + "learning_rate": 4.9882014975666236e-05, + "loss": 0.9451, + "step": 31034 + }, + { + "epoch": 0.03, + "learning_rate": 4.988200735337147e-05, + "loss": 1.2266, + "step": 31035 + }, + { + "epoch": 0.03, + "learning_rate": 4.988199973083109e-05, + "loss": 1.4084, + "step": 31036 + }, + { + "epoch": 0.03, + "learning_rate": 4.988199210804508e-05, + "loss": 1.0336, + "step": 31037 + }, + { + "epoch": 0.03, + "learning_rate": 4.988198448501345e-05, + "loss": 1.37, + "step": 31038 + }, + { + "epoch": 0.03, + "learning_rate": 4.988197686173619e-05, + "loss": 1.0472, + "step": 31039 + }, + { + "epoch": 0.03, + "learning_rate": 4.988196923821331e-05, + "loss": 1.3074, + "step": 31040 + }, + { + "epoch": 0.03, + "learning_rate": 4.98819616144448e-05, + "loss": 1.2019, + "step": 31041 + }, + { + "epoch": 0.03, + "learning_rate": 4.988195399043067e-05, + "loss": 0.778, + "step": 31042 + }, + { + "epoch": 0.03, + "learning_rate": 4.988194636617092e-05, + "loss": 1.1895, + "step": 31043 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881938741665537e-05, + "loss": 1.0543, + "step": 31044 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881931116914534e-05, + "loss": 1.1415, + "step": 31045 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881923491917905e-05, + "loss": 1.052, + "step": 31046 + }, + { + "epoch": 0.03, + "learning_rate": 4.988191586667566e-05, + "loss": 0.9192, + "step": 31047 + }, + { + "epoch": 0.03, + "learning_rate": 4.988190824118778e-05, + "loss": 1.1586, + "step": 31048 + }, + { + "epoch": 0.03, + "learning_rate": 4.988190061545428e-05, + "loss": 1.052, + "step": 31049 + }, + { + "epoch": 0.03, + "learning_rate": 4.988189298947516e-05, + "loss": 0.8031, + "step": 31050 + }, + { + "epoch": 0.03, + "learning_rate": 4.988188536325041e-05, + "loss": 0.9432, + "step": 31051 + }, + { + "epoch": 0.03, + "learning_rate": 4.988187773678003e-05, + "loss": 1.044, + "step": 31052 + }, + { + "epoch": 0.03, + "learning_rate": 4.988187011006404e-05, + "loss": 1.0176, + "step": 31053 + }, + { + "epoch": 0.03, + "learning_rate": 4.988186248310243e-05, + "loss": 0.9243, + "step": 31054 + }, + { + "epoch": 0.03, + "learning_rate": 4.988185485589519e-05, + "loss": 1.023, + "step": 31055 + }, + { + "epoch": 0.03, + "learning_rate": 4.988184722844233e-05, + "loss": 1.1306, + "step": 31056 + }, + { + "epoch": 0.03, + "learning_rate": 4.988183960074384e-05, + "loss": 0.8476, + "step": 31057 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881831972799734e-05, + "loss": 0.9711, + "step": 31058 + }, + { + "epoch": 0.03, + "learning_rate": 4.988182434461e-05, + "loss": 0.8711, + "step": 31059 + }, + { + "epoch": 0.03, + "learning_rate": 4.988181671617464e-05, + "loss": 1.1508, + "step": 31060 + }, + { + "epoch": 0.03, + "learning_rate": 4.988180908749367e-05, + "loss": 1.3703, + "step": 31061 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881801458567066e-05, + "loss": 1.0606, + "step": 31062 + }, + { + "epoch": 0.03, + "learning_rate": 4.988179382939484e-05, + "loss": 0.8126, + "step": 31063 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881786199977e-05, + "loss": 1.1637, + "step": 31064 + }, + { + "epoch": 0.03, + "learning_rate": 4.988177857031353e-05, + "loss": 0.9856, + "step": 31065 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881770940404444e-05, + "loss": 1.1652, + "step": 31066 + }, + { + "epoch": 0.03, + "learning_rate": 4.988176331024973e-05, + "loss": 0.8629, + "step": 31067 + }, + { + "epoch": 0.03, + "learning_rate": 4.98817556798494e-05, + "loss": 1.0797, + "step": 31068 + }, + { + "epoch": 0.03, + "learning_rate": 4.988174804920344e-05, + "loss": 1.0835, + "step": 31069 + }, + { + "epoch": 0.03, + "learning_rate": 4.988174041831186e-05, + "loss": 0.9097, + "step": 31070 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881732787174655e-05, + "loss": 1.2518, + "step": 31071 + }, + { + "epoch": 0.03, + "learning_rate": 4.988172515579184e-05, + "loss": 0.9548, + "step": 31072 + }, + { + "epoch": 0.03, + "learning_rate": 4.988171752416338e-05, + "loss": 0.7177, + "step": 31073 + }, + { + "epoch": 0.03, + "learning_rate": 4.988170989228932e-05, + "loss": 0.8398, + "step": 31074 + }, + { + "epoch": 0.03, + "learning_rate": 4.988170226016963e-05, + "loss": 1.6581, + "step": 31075 + }, + { + "epoch": 0.03, + "learning_rate": 4.988169462780432e-05, + "loss": 1.3204, + "step": 31076 + }, + { + "epoch": 0.03, + "learning_rate": 4.988168699519339e-05, + "loss": 0.9408, + "step": 31077 + }, + { + "epoch": 0.03, + "learning_rate": 4.988167936233684e-05, + "loss": 0.8958, + "step": 31078 + }, + { + "epoch": 0.03, + "learning_rate": 4.988167172923466e-05, + "loss": 1.0068, + "step": 31079 + }, + { + "epoch": 0.03, + "learning_rate": 4.988166409588686e-05, + "loss": 0.9276, + "step": 31080 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881656462293446e-05, + "loss": 1.1198, + "step": 31081 + }, + { + "epoch": 0.03, + "learning_rate": 4.988164882845441e-05, + "loss": 1.3312, + "step": 31082 + }, + { + "epoch": 0.03, + "learning_rate": 4.988164119436974e-05, + "loss": 1.5236, + "step": 31083 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881633560039466e-05, + "loss": 1.5965, + "step": 31084 + }, + { + "epoch": 0.03, + "learning_rate": 4.988162592546356e-05, + "loss": 2.0023, + "step": 31085 + }, + { + "epoch": 0.03, + "learning_rate": 4.988161829064204e-05, + "loss": 1.9275, + "step": 31086 + }, + { + "epoch": 0.03, + "learning_rate": 4.988161065557489e-05, + "loss": 1.0676, + "step": 31087 + }, + { + "epoch": 0.03, + "learning_rate": 4.988160302026213e-05, + "loss": 0.9395, + "step": 31088 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881595384703743e-05, + "loss": 1.0419, + "step": 31089 + }, + { + "epoch": 0.03, + "learning_rate": 4.988158774889974e-05, + "loss": 0.9633, + "step": 31090 + }, + { + "epoch": 0.03, + "learning_rate": 4.988158011285011e-05, + "loss": 1.0848, + "step": 31091 + }, + { + "epoch": 0.03, + "learning_rate": 4.988157247655486e-05, + "loss": 0.7604, + "step": 31092 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881564840013994e-05, + "loss": 0.893, + "step": 31093 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881557203227506e-05, + "loss": 0.5856, + "step": 31094 + }, + { + "epoch": 0.03, + "learning_rate": 4.988154956619539e-05, + "loss": 0.797, + "step": 31095 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881541928917674e-05, + "loss": 1.2096, + "step": 31096 + }, + { + "epoch": 0.03, + "learning_rate": 4.988153429139432e-05, + "loss": 0.8775, + "step": 31097 + }, + { + "epoch": 0.03, + "learning_rate": 4.988152665362535e-05, + "loss": 1.0341, + "step": 31098 + }, + { + "epoch": 0.03, + "learning_rate": 4.988151901561077e-05, + "loss": 0.4907, + "step": 31099 + }, + { + "epoch": 0.03, + "learning_rate": 4.988151137735055e-05, + "loss": 0.9136, + "step": 31100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881503738844724e-05, + "loss": 0.9816, + "step": 31101 + }, + { + "epoch": 0.03, + "learning_rate": 4.988149610009328e-05, + "loss": 0.6617, + "step": 31102 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881488461096216e-05, + "loss": 0.6599, + "step": 31103 + }, + { + "epoch": 0.03, + "learning_rate": 4.988148082185353e-05, + "loss": 0.9855, + "step": 31104 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881473182365224e-05, + "loss": 0.4911, + "step": 31105 + }, + { + "epoch": 0.03, + "learning_rate": 4.988146554263129e-05, + "loss": 0.9109, + "step": 31106 + }, + { + "epoch": 0.03, + "learning_rate": 4.988145790265175e-05, + "loss": 1.0325, + "step": 31107 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881450262426586e-05, + "loss": 1.1878, + "step": 31108 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881442621955796e-05, + "loss": 0.8843, + "step": 31109 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881434981239395e-05, + "loss": 1.089, + "step": 31110 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881427340277374e-05, + "loss": 1.1829, + "step": 31111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881419699069734e-05, + "loss": 0.8589, + "step": 31112 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881412057616475e-05, + "loss": 1.1502, + "step": 31113 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881404415917596e-05, + "loss": 0.9139, + "step": 31114 + }, + { + "epoch": 0.03, + "learning_rate": 4.98813967739731e-05, + "loss": 0.8628, + "step": 31115 + }, + { + "epoch": 0.03, + "learning_rate": 4.988138913178299e-05, + "loss": 1.1171, + "step": 31116 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881381489347245e-05, + "loss": 0.8848, + "step": 31117 + }, + { + "epoch": 0.03, + "learning_rate": 4.98813738466659e-05, + "loss": 0.4539, + "step": 31118 + }, + { + "epoch": 0.03, + "learning_rate": 4.988136620373892e-05, + "loss": 1.135, + "step": 31119 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881358560566335e-05, + "loss": 1.098, + "step": 31120 + }, + { + "epoch": 0.03, + "learning_rate": 4.988135091714813e-05, + "loss": 0.9795, + "step": 31121 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881343273484296e-05, + "loss": 1.0612, + "step": 31122 + }, + { + "epoch": 0.03, + "learning_rate": 4.988133562957485e-05, + "loss": 0.6399, + "step": 31123 + }, + { + "epoch": 0.03, + "learning_rate": 4.988132798541979e-05, + "loss": 0.6805, + "step": 31124 + }, + { + "epoch": 0.03, + "learning_rate": 4.988132034101911e-05, + "loss": 0.2489, + "step": 31125 + }, + { + "epoch": 0.03, + "learning_rate": 4.988131269637281e-05, + "loss": 0.3996, + "step": 31126 + }, + { + "epoch": 0.03, + "learning_rate": 4.98813050514809e-05, + "loss": 0.518, + "step": 31127 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881297406343367e-05, + "loss": 0.5071, + "step": 31128 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881289760960206e-05, + "loss": 0.9123, + "step": 31129 + }, + { + "epoch": 0.03, + "learning_rate": 4.988128211533144e-05, + "loss": 0.8301, + "step": 31130 + }, + { + "epoch": 0.03, + "learning_rate": 4.988127446945705e-05, + "loss": 0.9191, + "step": 31131 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881266823337045e-05, + "loss": 1.0048, + "step": 31132 + }, + { + "epoch": 0.03, + "learning_rate": 4.988125917697143e-05, + "loss": 0.9482, + "step": 31133 + }, + { + "epoch": 0.03, + "learning_rate": 4.988125153036019e-05, + "loss": 1.145, + "step": 31134 + }, + { + "epoch": 0.03, + "learning_rate": 4.988124388350333e-05, + "loss": 1.0047, + "step": 31135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881236236400856e-05, + "loss": 0.9471, + "step": 31136 + }, + { + "epoch": 0.03, + "learning_rate": 4.988122858905277e-05, + "loss": 1.1327, + "step": 31137 + }, + { + "epoch": 0.03, + "learning_rate": 4.988122094145906e-05, + "loss": 0.8633, + "step": 31138 + }, + { + "epoch": 0.03, + "learning_rate": 4.988121329361973e-05, + "loss": 0.9268, + "step": 31139 + }, + { + "epoch": 0.03, + "learning_rate": 4.988120564553479e-05, + "loss": 1.0376, + "step": 31140 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881197997204236e-05, + "loss": 1.1929, + "step": 31141 + }, + { + "epoch": 0.03, + "learning_rate": 4.988119034862806e-05, + "loss": 1.0171, + "step": 31142 + }, + { + "epoch": 0.03, + "learning_rate": 4.988118269980627e-05, + "loss": 1.0079, + "step": 31143 + }, + { + "epoch": 0.03, + "learning_rate": 4.988117505073886e-05, + "loss": 0.9677, + "step": 31144 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881167401425837e-05, + "loss": 1.2103, + "step": 31145 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881159751867204e-05, + "loss": 1.1452, + "step": 31146 + }, + { + "epoch": 0.03, + "learning_rate": 4.988115210206294e-05, + "loss": 1.0002, + "step": 31147 + }, + { + "epoch": 0.03, + "learning_rate": 4.988114445201307e-05, + "loss": 0.9408, + "step": 31148 + }, + { + "epoch": 0.03, + "learning_rate": 4.988113680171758e-05, + "loss": 1.1108, + "step": 31149 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881129151176474e-05, + "loss": 1.169, + "step": 31150 + }, + { + "epoch": 0.03, + "learning_rate": 4.988112150038975e-05, + "loss": 0.6471, + "step": 31151 + }, + { + "epoch": 0.03, + "learning_rate": 4.988111384935741e-05, + "loss": 0.9441, + "step": 31152 + }, + { + "epoch": 0.03, + "learning_rate": 4.988110619807946e-05, + "loss": 0.7557, + "step": 31153 + }, + { + "epoch": 0.03, + "learning_rate": 4.988109854655589e-05, + "loss": 1.2642, + "step": 31154 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881090894786705e-05, + "loss": 1.0488, + "step": 31155 + }, + { + "epoch": 0.03, + "learning_rate": 4.98810832427719e-05, + "loss": 0.9651, + "step": 31156 + }, + { + "epoch": 0.03, + "learning_rate": 4.988107559051148e-05, + "loss": 0.8601, + "step": 31157 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881067938005454e-05, + "loss": 1.0281, + "step": 31158 + }, + { + "epoch": 0.03, + "learning_rate": 4.988106028525381e-05, + "loss": 1.2271, + "step": 31159 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881052632256544e-05, + "loss": 1.0188, + "step": 31160 + }, + { + "epoch": 0.03, + "learning_rate": 4.988104497901367e-05, + "loss": 0.6781, + "step": 31161 + }, + { + "epoch": 0.03, + "learning_rate": 4.988103732552518e-05, + "loss": 1.0964, + "step": 31162 + }, + { + "epoch": 0.03, + "learning_rate": 4.988102967179107e-05, + "loss": 0.5624, + "step": 31163 + }, + { + "epoch": 0.03, + "learning_rate": 4.988102201781134e-05, + "loss": 0.2672, + "step": 31164 + }, + { + "epoch": 0.03, + "learning_rate": 4.988101436358601e-05, + "loss": 0.7303, + "step": 31165 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881006709115054e-05, + "loss": 0.976, + "step": 31166 + }, + { + "epoch": 0.03, + "learning_rate": 4.988099905439848e-05, + "loss": 0.9867, + "step": 31167 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880991399436304e-05, + "loss": 1.0032, + "step": 31168 + }, + { + "epoch": 0.03, + "learning_rate": 4.98809837442285e-05, + "loss": 1.0153, + "step": 31169 + }, + { + "epoch": 0.03, + "learning_rate": 4.98809760887751e-05, + "loss": 0.9615, + "step": 31170 + }, + { + "epoch": 0.03, + "learning_rate": 4.988096843307607e-05, + "loss": 1.0578, + "step": 31171 + }, + { + "epoch": 0.03, + "learning_rate": 4.988096077713143e-05, + "loss": 1.0015, + "step": 31172 + }, + { + "epoch": 0.03, + "learning_rate": 4.988095312094117e-05, + "loss": 0.6541, + "step": 31173 + }, + { + "epoch": 0.03, + "learning_rate": 4.98809454645053e-05, + "loss": 1.0696, + "step": 31174 + }, + { + "epoch": 0.03, + "learning_rate": 4.988093780782382e-05, + "loss": 0.8195, + "step": 31175 + }, + { + "epoch": 0.03, + "learning_rate": 4.988093015089672e-05, + "loss": 0.9658, + "step": 31176 + }, + { + "epoch": 0.03, + "learning_rate": 4.988092249372401e-05, + "loss": 0.9435, + "step": 31177 + }, + { + "epoch": 0.03, + "learning_rate": 4.988091483630568e-05, + "loss": 1.1178, + "step": 31178 + }, + { + "epoch": 0.03, + "learning_rate": 4.988090717864175e-05, + "loss": 0.8645, + "step": 31179 + }, + { + "epoch": 0.03, + "learning_rate": 4.988089952073219e-05, + "loss": 1.0417, + "step": 31180 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880891862577026e-05, + "loss": 1.0651, + "step": 31181 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880884204176237e-05, + "loss": 1.1419, + "step": 31182 + }, + { + "epoch": 0.03, + "learning_rate": 4.988087654552985e-05, + "loss": 0.9721, + "step": 31183 + }, + { + "epoch": 0.03, + "learning_rate": 4.988086888663784e-05, + "loss": 0.9936, + "step": 31184 + }, + { + "epoch": 0.03, + "learning_rate": 4.988086122750022e-05, + "loss": 1.0207, + "step": 31185 + }, + { + "epoch": 0.03, + "learning_rate": 4.988085356811698e-05, + "loss": 0.9599, + "step": 31186 + }, + { + "epoch": 0.03, + "learning_rate": 4.988084590848813e-05, + "loss": 1.019, + "step": 31187 + }, + { + "epoch": 0.03, + "learning_rate": 4.988083824861367e-05, + "loss": 0.8259, + "step": 31188 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880830588493596e-05, + "loss": 0.6954, + "step": 31189 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880822928127914e-05, + "loss": 1.0704, + "step": 31190 + }, + { + "epoch": 0.03, + "learning_rate": 4.988081526751661e-05, + "loss": 0.2421, + "step": 31191 + }, + { + "epoch": 0.03, + "learning_rate": 4.98808076066597e-05, + "loss": 0.1939, + "step": 31192 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880799945557166e-05, + "loss": 0.1959, + "step": 31193 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880792284209034e-05, + "loss": 0.2206, + "step": 31194 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880784622615276e-05, + "loss": 0.2204, + "step": 31195 + }, + { + "epoch": 0.03, + "learning_rate": 4.988077696077591e-05, + "loss": 0.118, + "step": 31196 + }, + { + "epoch": 0.03, + "learning_rate": 4.988076929869093e-05, + "loss": 0.1738, + "step": 31197 + }, + { + "epoch": 0.03, + "learning_rate": 4.988076163636035e-05, + "loss": 0.2748, + "step": 31198 + }, + { + "epoch": 0.03, + "learning_rate": 4.988075397378415e-05, + "loss": 0.2908, + "step": 31199 + }, + { + "epoch": 0.03, + "learning_rate": 4.988074631096233e-05, + "loss": 1.133, + "step": 31200 + }, + { + "epoch": 0.03, + "learning_rate": 4.98807386478949e-05, + "loss": 1.1462, + "step": 31201 + }, + { + "epoch": 0.03, + "learning_rate": 4.988073098458187e-05, + "loss": 1.2061, + "step": 31202 + }, + { + "epoch": 0.03, + "learning_rate": 4.988072332102322e-05, + "loss": 0.4324, + "step": 31203 + }, + { + "epoch": 0.03, + "learning_rate": 4.988071565721895e-05, + "loss": 0.4541, + "step": 31204 + }, + { + "epoch": 0.03, + "learning_rate": 4.988070799316907e-05, + "loss": 0.6359, + "step": 31205 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880700328873584e-05, + "loss": 1.0166, + "step": 31206 + }, + { + "epoch": 0.03, + "learning_rate": 4.98806926643325e-05, + "loss": 1.4289, + "step": 31207 + }, + { + "epoch": 0.03, + "learning_rate": 4.988068499954578e-05, + "loss": 0.9895, + "step": 31208 + }, + { + "epoch": 0.03, + "learning_rate": 4.988067733451346e-05, + "loss": 1.0679, + "step": 31209 + }, + { + "epoch": 0.03, + "learning_rate": 4.988066966923553e-05, + "loss": 1.2157, + "step": 31210 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880662003711984e-05, + "loss": 1.1808, + "step": 31211 + }, + { + "epoch": 0.03, + "learning_rate": 4.988065433794282e-05, + "loss": 0.9076, + "step": 31212 + }, + { + "epoch": 0.03, + "learning_rate": 4.988064667192805e-05, + "loss": 0.9739, + "step": 31213 + }, + { + "epoch": 0.03, + "learning_rate": 4.988063900566768e-05, + "loss": 1.1349, + "step": 31214 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880631339161685e-05, + "loss": 0.9991, + "step": 31215 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880623672410086e-05, + "loss": 1.1511, + "step": 31216 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880616005412874e-05, + "loss": 0.5119, + "step": 31217 + }, + { + "epoch": 0.03, + "learning_rate": 4.988060833817006e-05, + "loss": 0.792, + "step": 31218 + }, + { + "epoch": 0.03, + "learning_rate": 4.988060067068162e-05, + "loss": 0.8691, + "step": 31219 + }, + { + "epoch": 0.03, + "learning_rate": 4.988059300294757e-05, + "loss": 1.0, + "step": 31220 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880585334967924e-05, + "loss": 1.0261, + "step": 31221 + }, + { + "epoch": 0.03, + "learning_rate": 4.988057766674266e-05, + "loss": 1.1777, + "step": 31222 + }, + { + "epoch": 0.03, + "learning_rate": 4.988056999827178e-05, + "loss": 0.5877, + "step": 31223 + }, + { + "epoch": 0.03, + "learning_rate": 4.988056232955529e-05, + "loss": 1.0576, + "step": 31224 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880554660593195e-05, + "loss": 1.0662, + "step": 31225 + }, + { + "epoch": 0.03, + "learning_rate": 4.988054699138548e-05, + "loss": 0.9929, + "step": 31226 + }, + { + "epoch": 0.03, + "learning_rate": 4.988053932193216e-05, + "loss": 0.7149, + "step": 31227 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880531652233234e-05, + "loss": 1.0287, + "step": 31228 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880523982288694e-05, + "loss": 1.0477, + "step": 31229 + }, + { + "epoch": 0.03, + "learning_rate": 4.988051631209855e-05, + "loss": 1.1147, + "step": 31230 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880508641662796e-05, + "loss": 0.8166, + "step": 31231 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880500970981424e-05, + "loss": 1.028, + "step": 31232 + }, + { + "epoch": 0.03, + "learning_rate": 4.988049330005444e-05, + "loss": 0.6914, + "step": 31233 + }, + { + "epoch": 0.03, + "learning_rate": 4.988048562888186e-05, + "loss": 0.9805, + "step": 31234 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880477957463656e-05, + "loss": 0.906, + "step": 31235 + }, + { + "epoch": 0.03, + "learning_rate": 4.988047028579985e-05, + "loss": 0.8714, + "step": 31236 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880462613890436e-05, + "loss": 0.9737, + "step": 31237 + }, + { + "epoch": 0.03, + "learning_rate": 4.988045494173541e-05, + "loss": 0.9573, + "step": 31238 + }, + { + "epoch": 0.03, + "learning_rate": 4.988044726933477e-05, + "loss": 1.0233, + "step": 31239 + }, + { + "epoch": 0.03, + "learning_rate": 4.988043959668853e-05, + "loss": 0.7674, + "step": 31240 + }, + { + "epoch": 0.03, + "learning_rate": 4.988043192379667e-05, + "loss": 0.1684, + "step": 31241 + }, + { + "epoch": 0.03, + "learning_rate": 4.988042425065921e-05, + "loss": 1.0099, + "step": 31242 + }, + { + "epoch": 0.03, + "learning_rate": 4.988041657727614e-05, + "loss": 1.1556, + "step": 31243 + }, + { + "epoch": 0.03, + "learning_rate": 4.988040890364746e-05, + "loss": 1.0267, + "step": 31244 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880401229773164e-05, + "loss": 0.6688, + "step": 31245 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880393555653266e-05, + "loss": 0.5381, + "step": 31246 + }, + { + "epoch": 0.03, + "learning_rate": 4.988038588128776e-05, + "loss": 0.5451, + "step": 31247 + }, + { + "epoch": 0.03, + "learning_rate": 4.988037820667664e-05, + "loss": 0.987, + "step": 31248 + }, + { + "epoch": 0.03, + "learning_rate": 4.988037053181992e-05, + "loss": 1.3244, + "step": 31249 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880362856717584e-05, + "loss": 1.0293, + "step": 31250 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880355181369644e-05, + "loss": 0.9325, + "step": 31251 + }, + { + "epoch": 0.03, + "learning_rate": 4.988034750577609e-05, + "loss": 0.9609, + "step": 31252 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880339829936926e-05, + "loss": 0.9002, + "step": 31253 + }, + { + "epoch": 0.03, + "learning_rate": 4.988033215385216e-05, + "loss": 0.9267, + "step": 31254 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880324477521786e-05, + "loss": 0.8006, + "step": 31255 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880316800945805e-05, + "loss": 0.7568, + "step": 31256 + }, + { + "epoch": 0.03, + "learning_rate": 4.988030912412422e-05, + "loss": 0.7206, + "step": 31257 + }, + { + "epoch": 0.03, + "learning_rate": 4.988030144705702e-05, + "loss": 0.67, + "step": 31258 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880293769744205e-05, + "loss": 0.6345, + "step": 31259 + }, + { + "epoch": 0.03, + "learning_rate": 4.988028609218579e-05, + "loss": 0.6068, + "step": 31260 + }, + { + "epoch": 0.03, + "learning_rate": 4.988027841438177e-05, + "loss": 0.588, + "step": 31261 + }, + { + "epoch": 0.03, + "learning_rate": 4.988027073633214e-05, + "loss": 0.5887, + "step": 31262 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880263058036905e-05, + "loss": 0.5562, + "step": 31263 + }, + { + "epoch": 0.03, + "learning_rate": 4.988025537949606e-05, + "loss": 0.5501, + "step": 31264 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880247700709604e-05, + "loss": 0.5397, + "step": 31265 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880240021677545e-05, + "loss": 0.6111, + "step": 31266 + }, + { + "epoch": 0.03, + "learning_rate": 4.988023234239988e-05, + "loss": 0.5958, + "step": 31267 + }, + { + "epoch": 0.03, + "learning_rate": 4.98802246628766e-05, + "loss": 0.5405, + "step": 31268 + }, + { + "epoch": 0.03, + "learning_rate": 4.988021698310772e-05, + "loss": 0.5337, + "step": 31269 + }, + { + "epoch": 0.03, + "learning_rate": 4.988020930309323e-05, + "loss": 0.5649, + "step": 31270 + }, + { + "epoch": 0.03, + "learning_rate": 4.988020162283313e-05, + "loss": 0.5318, + "step": 31271 + }, + { + "epoch": 0.03, + "learning_rate": 4.988019394232744e-05, + "loss": 0.5023, + "step": 31272 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880186261576126e-05, + "loss": 0.5171, + "step": 31273 + }, + { + "epoch": 0.03, + "learning_rate": 4.988017858057921e-05, + "loss": 0.5374, + "step": 31274 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880170899336685e-05, + "loss": 0.5112, + "step": 31275 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880163217848556e-05, + "loss": 0.4862, + "step": 31276 + }, + { + "epoch": 0.03, + "learning_rate": 4.988015553611483e-05, + "loss": 0.4903, + "step": 31277 + }, + { + "epoch": 0.03, + "learning_rate": 4.988014785413549e-05, + "loss": 0.4757, + "step": 31278 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880140171910534e-05, + "loss": 0.4871, + "step": 31279 + }, + { + "epoch": 0.03, + "learning_rate": 4.988013248943998e-05, + "loss": 0.5023, + "step": 31280 + }, + { + "epoch": 0.03, + "learning_rate": 4.988012480672382e-05, + "loss": 0.4761, + "step": 31281 + }, + { + "epoch": 0.03, + "learning_rate": 4.988011712376205e-05, + "loss": 0.4745, + "step": 31282 + }, + { + "epoch": 0.03, + "learning_rate": 4.988010944055468e-05, + "loss": 0.4805, + "step": 31283 + }, + { + "epoch": 0.03, + "learning_rate": 4.98801017571017e-05, + "loss": 0.4549, + "step": 31284 + }, + { + "epoch": 0.03, + "learning_rate": 4.988009407340312e-05, + "loss": 0.4824, + "step": 31285 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880086389458924e-05, + "loss": 0.4908, + "step": 31286 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880078705269126e-05, + "loss": 0.4913, + "step": 31287 + }, + { + "epoch": 0.03, + "learning_rate": 4.988007102083373e-05, + "loss": 0.4724, + "step": 31288 + }, + { + "epoch": 0.03, + "learning_rate": 4.988006333615272e-05, + "loss": 0.4766, + "step": 31289 + }, + { + "epoch": 0.03, + "learning_rate": 4.988005565122611e-05, + "loss": 0.4756, + "step": 31290 + }, + { + "epoch": 0.03, + "learning_rate": 4.988004796605389e-05, + "loss": 0.4859, + "step": 31291 + }, + { + "epoch": 0.03, + "learning_rate": 4.988004028063606e-05, + "loss": 0.4854, + "step": 31292 + }, + { + "epoch": 0.03, + "learning_rate": 4.9880032594972637e-05, + "loss": 0.4822, + "step": 31293 + }, + { + "epoch": 0.03, + "learning_rate": 4.98800249090636e-05, + "loss": 0.4801, + "step": 31294 + }, + { + "epoch": 0.03, + "learning_rate": 4.988001722290896e-05, + "loss": 0.4689, + "step": 31295 + }, + { + "epoch": 0.03, + "learning_rate": 4.988000953650872e-05, + "loss": 0.4696, + "step": 31296 + }, + { + "epoch": 0.03, + "learning_rate": 4.988000184986287e-05, + "loss": 0.4898, + "step": 31297 + }, + { + "epoch": 0.03, + "learning_rate": 4.987999416297141e-05, + "loss": 0.4761, + "step": 31298 + }, + { + "epoch": 0.03, + "learning_rate": 4.987998647583435e-05, + "loss": 0.4554, + "step": 31299 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879978788451684e-05, + "loss": 0.4613, + "step": 31300 + }, + { + "epoch": 0.03, + "learning_rate": 4.987997110082342e-05, + "loss": 0.4496, + "step": 31301 + }, + { + "epoch": 0.03, + "learning_rate": 4.987996341294955e-05, + "loss": 0.4686, + "step": 31302 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879955724830075e-05, + "loss": 0.4624, + "step": 31303 + }, + { + "epoch": 0.03, + "learning_rate": 4.987994803646499e-05, + "loss": 0.4656, + "step": 31304 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879940347854306e-05, + "loss": 0.4751, + "step": 31305 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879932658998014e-05, + "loss": 0.4573, + "step": 31306 + }, + { + "epoch": 0.03, + "learning_rate": 4.987992496989612e-05, + "loss": 0.4659, + "step": 31307 + }, + { + "epoch": 0.03, + "learning_rate": 4.987991728054862e-05, + "loss": 0.4605, + "step": 31308 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879909590955515e-05, + "loss": 0.4565, + "step": 31309 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879901901116813e-05, + "loss": 0.4605, + "step": 31310 + }, + { + "epoch": 0.03, + "learning_rate": 4.98798942110325e-05, + "loss": 0.448, + "step": 31311 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879886520702586e-05, + "loss": 0.4585, + "step": 31312 + }, + { + "epoch": 0.03, + "learning_rate": 4.987987883012706e-05, + "loss": 0.456, + "step": 31313 + }, + { + "epoch": 0.03, + "learning_rate": 4.987987113930594e-05, + "loss": 0.458, + "step": 31314 + }, + { + "epoch": 0.03, + "learning_rate": 4.987986344823922e-05, + "loss": 0.4532, + "step": 31315 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879855756926884e-05, + "loss": 0.4632, + "step": 31316 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879848065368956e-05, + "loss": 0.4532, + "step": 31317 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879840373565415e-05, + "loss": 0.453, + "step": 31318 + }, + { + "epoch": 0.03, + "learning_rate": 4.987983268151628e-05, + "loss": 0.4619, + "step": 31319 + }, + { + "epoch": 0.03, + "learning_rate": 4.987982498922154e-05, + "loss": 0.4454, + "step": 31320 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879817296681194e-05, + "loss": 0.4677, + "step": 31321 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879809603895244e-05, + "loss": 0.4514, + "step": 31322 + }, + { + "epoch": 0.03, + "learning_rate": 4.987980191086369e-05, + "loss": 0.4483, + "step": 31323 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879794217586535e-05, + "loss": 0.4437, + "step": 31324 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879786524063775e-05, + "loss": 0.4434, + "step": 31325 + }, + { + "epoch": 0.03, + "learning_rate": 4.987977883029542e-05, + "loss": 0.4409, + "step": 31326 + }, + { + "epoch": 0.03, + "learning_rate": 4.987977113628145e-05, + "loss": 0.4442, + "step": 31327 + }, + { + "epoch": 0.03, + "learning_rate": 4.987976344202189e-05, + "loss": 0.4423, + "step": 31328 + }, + { + "epoch": 0.03, + "learning_rate": 4.987975574751672e-05, + "loss": 0.4465, + "step": 31329 + }, + { + "epoch": 0.03, + "learning_rate": 4.987974805276595e-05, + "loss": 0.437, + "step": 31330 + }, + { + "epoch": 0.03, + "learning_rate": 4.987974035776958e-05, + "loss": 0.4347, + "step": 31331 + }, + { + "epoch": 0.03, + "learning_rate": 4.98797326625276e-05, + "loss": 0.4386, + "step": 31332 + }, + { + "epoch": 0.03, + "learning_rate": 4.987972496704002e-05, + "loss": 0.4444, + "step": 31333 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879717271306844e-05, + "loss": 0.4403, + "step": 31334 + }, + { + "epoch": 0.03, + "learning_rate": 4.987970957532806e-05, + "loss": 0.4377, + "step": 31335 + }, + { + "epoch": 0.03, + "learning_rate": 4.987970187910368e-05, + "loss": 0.4471, + "step": 31336 + }, + { + "epoch": 0.03, + "learning_rate": 4.987969418263369e-05, + "loss": 0.4293, + "step": 31337 + }, + { + "epoch": 0.03, + "learning_rate": 4.98796864859181e-05, + "loss": 0.4498, + "step": 31338 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879678788956915e-05, + "loss": 0.4412, + "step": 31339 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879671091750125e-05, + "loss": 0.4269, + "step": 31340 + }, + { + "epoch": 0.03, + "learning_rate": 4.987966339429773e-05, + "loss": 0.4225, + "step": 31341 + }, + { + "epoch": 0.03, + "learning_rate": 4.987965569659974e-05, + "loss": 0.4468, + "step": 31342 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879647998656146e-05, + "loss": 0.4182, + "step": 31343 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879640300466946e-05, + "loss": 0.432, + "step": 31344 + }, + { + "epoch": 0.03, + "learning_rate": 4.987963260203215e-05, + "loss": 0.4348, + "step": 31345 + }, + { + "epoch": 0.03, + "learning_rate": 4.987962490335174e-05, + "loss": 0.4464, + "step": 31346 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879617204425746e-05, + "loss": 0.4143, + "step": 31347 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879609505254144e-05, + "loss": 0.4289, + "step": 31348 + }, + { + "epoch": 0.03, + "learning_rate": 4.987960180583694e-05, + "loss": 0.4219, + "step": 31349 + }, + { + "epoch": 0.03, + "learning_rate": 4.987959410617413e-05, + "loss": 0.4398, + "step": 31350 + }, + { + "epoch": 0.03, + "learning_rate": 4.987958640626573e-05, + "loss": 0.4067, + "step": 31351 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879578706111726e-05, + "loss": 0.4154, + "step": 31352 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879571005712116e-05, + "loss": 0.4181, + "step": 31353 + }, + { + "epoch": 0.03, + "learning_rate": 4.987956330506691e-05, + "loss": 0.4394, + "step": 31354 + }, + { + "epoch": 0.03, + "learning_rate": 4.98795556041761e-05, + "loss": 0.4179, + "step": 31355 + }, + { + "epoch": 0.03, + "learning_rate": 4.987954790303969e-05, + "loss": 0.4269, + "step": 31356 + }, + { + "epoch": 0.03, + "learning_rate": 4.987954020165768e-05, + "loss": 0.4202, + "step": 31357 + }, + { + "epoch": 0.03, + "learning_rate": 4.987953250003008e-05, + "loss": 0.4203, + "step": 31358 + }, + { + "epoch": 0.03, + "learning_rate": 4.987952479815687e-05, + "loss": 0.4249, + "step": 31359 + }, + { + "epoch": 0.03, + "learning_rate": 4.987951709603806e-05, + "loss": 0.4199, + "step": 31360 + }, + { + "epoch": 0.03, + "learning_rate": 4.987950939367365e-05, + "loss": 0.4352, + "step": 31361 + }, + { + "epoch": 0.03, + "learning_rate": 4.987950169106363e-05, + "loss": 0.4265, + "step": 31362 + }, + { + "epoch": 0.03, + "learning_rate": 4.987949398820803e-05, + "loss": 0.4129, + "step": 31363 + }, + { + "epoch": 0.03, + "learning_rate": 4.987948628510682e-05, + "loss": 0.4278, + "step": 31364 + }, + { + "epoch": 0.03, + "learning_rate": 4.987947858176001e-05, + "loss": 0.6878, + "step": 31365 + }, + { + "epoch": 0.03, + "learning_rate": 4.98794708781676e-05, + "loss": 1.3427, + "step": 31366 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879463174329594e-05, + "loss": 1.7052, + "step": 31367 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879455470245986e-05, + "loss": 1.1452, + "step": 31368 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879447765916773e-05, + "loss": 1.1242, + "step": 31369 + }, + { + "epoch": 0.03, + "learning_rate": 4.987944006134197e-05, + "loss": 1.1614, + "step": 31370 + }, + { + "epoch": 0.03, + "learning_rate": 4.987943235652156e-05, + "loss": 0.7276, + "step": 31371 + }, + { + "epoch": 0.03, + "learning_rate": 4.987942465145556e-05, + "loss": 0.6911, + "step": 31372 + }, + { + "epoch": 0.03, + "learning_rate": 4.987941694614395e-05, + "loss": 1.0849, + "step": 31373 + }, + { + "epoch": 0.03, + "learning_rate": 4.987940924058675e-05, + "loss": 1.21, + "step": 31374 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879401534783946e-05, + "loss": 1.1928, + "step": 31375 + }, + { + "epoch": 0.03, + "learning_rate": 4.987939382873554e-05, + "loss": 0.7919, + "step": 31376 + }, + { + "epoch": 0.03, + "learning_rate": 4.987938612244154e-05, + "loss": 0.9625, + "step": 31377 + }, + { + "epoch": 0.03, + "learning_rate": 4.987937841590195e-05, + "loss": 0.8772, + "step": 31378 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879370709116746e-05, + "loss": 0.8403, + "step": 31379 + }, + { + "epoch": 0.03, + "learning_rate": 4.987936300208595e-05, + "loss": 0.8169, + "step": 31380 + }, + { + "epoch": 0.03, + "learning_rate": 4.987935529480955e-05, + "loss": 1.2746, + "step": 31381 + }, + { + "epoch": 0.03, + "learning_rate": 4.987934758728756e-05, + "loss": 1.1218, + "step": 31382 + }, + { + "epoch": 0.03, + "learning_rate": 4.987933987951996e-05, + "loss": 1.0376, + "step": 31383 + }, + { + "epoch": 0.03, + "learning_rate": 4.987933217150677e-05, + "loss": 0.9946, + "step": 31384 + }, + { + "epoch": 0.03, + "learning_rate": 4.987932446324799e-05, + "loss": 0.7909, + "step": 31385 + }, + { + "epoch": 0.03, + "learning_rate": 4.98793167547436e-05, + "loss": 1.196, + "step": 31386 + }, + { + "epoch": 0.03, + "learning_rate": 4.987930904599362e-05, + "loss": 0.2917, + "step": 31387 + }, + { + "epoch": 0.03, + "learning_rate": 4.987930133699803e-05, + "loss": 1.0851, + "step": 31388 + }, + { + "epoch": 0.03, + "learning_rate": 4.987929362775685e-05, + "loss": 0.9662, + "step": 31389 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879285918270066e-05, + "loss": 1.0555, + "step": 31390 + }, + { + "epoch": 0.03, + "learning_rate": 4.98792782085377e-05, + "loss": 0.989, + "step": 31391 + }, + { + "epoch": 0.03, + "learning_rate": 4.987927049855972e-05, + "loss": 1.2602, + "step": 31392 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879262788336144e-05, + "loss": 0.993, + "step": 31393 + }, + { + "epoch": 0.03, + "learning_rate": 4.987925507786697e-05, + "loss": 0.692, + "step": 31394 + }, + { + "epoch": 0.03, + "learning_rate": 4.98792473671522e-05, + "loss": 1.1419, + "step": 31395 + }, + { + "epoch": 0.03, + "learning_rate": 4.987923965619184e-05, + "loss": 1.0456, + "step": 31396 + }, + { + "epoch": 0.03, + "learning_rate": 4.987923194498588e-05, + "loss": 0.8825, + "step": 31397 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879224233534316e-05, + "loss": 0.8381, + "step": 31398 + }, + { + "epoch": 0.03, + "learning_rate": 4.987921652183716e-05, + "loss": 1.0332, + "step": 31399 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879208809894405e-05, + "loss": 1.0685, + "step": 31400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879201097706055e-05, + "loss": 0.8966, + "step": 31401 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879193385272105e-05, + "loss": 0.553, + "step": 31402 + }, + { + "epoch": 0.03, + "learning_rate": 4.987918567259256e-05, + "loss": 1.2182, + "step": 31403 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879177959667417e-05, + "loss": 1.6622, + "step": 31404 + }, + { + "epoch": 0.03, + "learning_rate": 4.987917024649668e-05, + "loss": 0.9646, + "step": 31405 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879162533080346e-05, + "loss": 1.0479, + "step": 31406 + }, + { + "epoch": 0.03, + "learning_rate": 4.987915481941841e-05, + "loss": 1.0013, + "step": 31407 + }, + { + "epoch": 0.03, + "learning_rate": 4.987914710551088e-05, + "loss": 1.0383, + "step": 31408 + }, + { + "epoch": 0.03, + "learning_rate": 4.987913939135775e-05, + "loss": 0.9929, + "step": 31409 + }, + { + "epoch": 0.03, + "learning_rate": 4.987913167695904e-05, + "loss": 1.7615, + "step": 31410 + }, + { + "epoch": 0.03, + "learning_rate": 4.987912396231472e-05, + "loss": 1.2479, + "step": 31411 + }, + { + "epoch": 0.03, + "learning_rate": 4.987911624742481e-05, + "loss": 0.9222, + "step": 31412 + }, + { + "epoch": 0.03, + "learning_rate": 4.987910853228929e-05, + "loss": 1.0115, + "step": 31413 + }, + { + "epoch": 0.03, + "learning_rate": 4.987910081690819e-05, + "loss": 1.0255, + "step": 31414 + }, + { + "epoch": 0.03, + "learning_rate": 4.987909310128148e-05, + "loss": 1.0233, + "step": 31415 + }, + { + "epoch": 0.03, + "learning_rate": 4.987908538540919e-05, + "loss": 0.7692, + "step": 31416 + }, + { + "epoch": 0.03, + "learning_rate": 4.987907766929129e-05, + "loss": 0.9952, + "step": 31417 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879069952927804e-05, + "loss": 0.9864, + "step": 31418 + }, + { + "epoch": 0.03, + "learning_rate": 4.987906223631872e-05, + "loss": 1.1943, + "step": 31419 + }, + { + "epoch": 0.03, + "learning_rate": 4.987905451946404e-05, + "loss": 0.9697, + "step": 31420 + }, + { + "epoch": 0.03, + "learning_rate": 4.987904680236376e-05, + "loss": 1.065, + "step": 31421 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879039085017884e-05, + "loss": 1.1168, + "step": 31422 + }, + { + "epoch": 0.03, + "learning_rate": 4.987903136742642e-05, + "loss": 0.9716, + "step": 31423 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879023649589355e-05, + "loss": 1.0178, + "step": 31424 + }, + { + "epoch": 0.03, + "learning_rate": 4.98790159315067e-05, + "loss": 1.0812, + "step": 31425 + }, + { + "epoch": 0.03, + "learning_rate": 4.987900821317844e-05, + "loss": 1.1416, + "step": 31426 + }, + { + "epoch": 0.03, + "learning_rate": 4.9879000494604596e-05, + "loss": 0.9744, + "step": 31427 + }, + { + "epoch": 0.03, + "learning_rate": 4.987899277578515e-05, + "loss": 1.0919, + "step": 31428 + }, + { + "epoch": 0.03, + "learning_rate": 4.987898505672011e-05, + "loss": 0.8761, + "step": 31429 + }, + { + "epoch": 0.03, + "learning_rate": 4.987897733740948e-05, + "loss": 1.2288, + "step": 31430 + }, + { + "epoch": 0.03, + "learning_rate": 4.987896961785325e-05, + "loss": 0.6179, + "step": 31431 + }, + { + "epoch": 0.03, + "learning_rate": 4.987896189805143e-05, + "loss": 1.1744, + "step": 31432 + }, + { + "epoch": 0.03, + "learning_rate": 4.987895417800401e-05, + "loss": 0.9555, + "step": 31433 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878946457711004e-05, + "loss": 1.2816, + "step": 31434 + }, + { + "epoch": 0.03, + "learning_rate": 4.987893873717239e-05, + "loss": 1.0224, + "step": 31435 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878931016388196e-05, + "loss": 0.7763, + "step": 31436 + }, + { + "epoch": 0.03, + "learning_rate": 4.98789232953584e-05, + "loss": 0.625, + "step": 31437 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878915574083006e-05, + "loss": 1.1646, + "step": 31438 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878907852562026e-05, + "loss": 0.7406, + "step": 31439 + }, + { + "epoch": 0.03, + "learning_rate": 4.987890013079545e-05, + "loss": 1.1652, + "step": 31440 + }, + { + "epoch": 0.03, + "learning_rate": 4.987889240878327e-05, + "loss": 1.2209, + "step": 31441 + }, + { + "epoch": 0.03, + "learning_rate": 4.987888468652551e-05, + "loss": 1.4779, + "step": 31442 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878876964022145e-05, + "loss": 0.99, + "step": 31443 + }, + { + "epoch": 0.03, + "learning_rate": 4.98788692412732e-05, + "loss": 0.9853, + "step": 31444 + }, + { + "epoch": 0.03, + "learning_rate": 4.987886151827865e-05, + "loss": 2.0161, + "step": 31445 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878853795038513e-05, + "loss": 1.0006, + "step": 31446 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878846071552776e-05, + "loss": 0.882, + "step": 31447 + }, + { + "epoch": 0.03, + "learning_rate": 4.987883834782145e-05, + "loss": 1.256, + "step": 31448 + }, + { + "epoch": 0.03, + "learning_rate": 4.987883062384453e-05, + "loss": 0.8581, + "step": 31449 + }, + { + "epoch": 0.03, + "learning_rate": 4.987882289962201e-05, + "loss": 1.1494, + "step": 31450 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878815175153906e-05, + "loss": 0.8532, + "step": 31451 + }, + { + "epoch": 0.03, + "learning_rate": 4.98788074504402e-05, + "loss": 0.9982, + "step": 31452 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878799725480905e-05, + "loss": 0.8695, + "step": 31453 + }, + { + "epoch": 0.03, + "learning_rate": 4.987879200027602e-05, + "loss": 0.8604, + "step": 31454 + }, + { + "epoch": 0.03, + "learning_rate": 4.987878427482554e-05, + "loss": 0.8363, + "step": 31455 + }, + { + "epoch": 0.03, + "learning_rate": 4.987877654912947e-05, + "loss": 0.8055, + "step": 31456 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878768823187804e-05, + "loss": 0.9307, + "step": 31457 + }, + { + "epoch": 0.03, + "learning_rate": 4.987876109700055e-05, + "loss": 0.9555, + "step": 31458 + }, + { + "epoch": 0.03, + "learning_rate": 4.98787533705677e-05, + "loss": 1.486, + "step": 31459 + }, + { + "epoch": 0.03, + "learning_rate": 4.987874564388925e-05, + "loss": 1.9775, + "step": 31460 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878737916965216e-05, + "loss": 1.8424, + "step": 31461 + }, + { + "epoch": 0.03, + "learning_rate": 4.987873018979558e-05, + "loss": 1.2312, + "step": 31462 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878722462380365e-05, + "loss": 1.0083, + "step": 31463 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878714734719555e-05, + "loss": 0.8371, + "step": 31464 + }, + { + "epoch": 0.03, + "learning_rate": 4.987870700681315e-05, + "loss": 0.8216, + "step": 31465 + }, + { + "epoch": 0.03, + "learning_rate": 4.987869927866116e-05, + "loss": 0.8231, + "step": 31466 + }, + { + "epoch": 0.03, + "learning_rate": 4.987869155026356e-05, + "loss": 1.2153, + "step": 31467 + }, + { + "epoch": 0.03, + "learning_rate": 4.987868382162038e-05, + "loss": 1.1343, + "step": 31468 + }, + { + "epoch": 0.03, + "learning_rate": 4.987867609273161e-05, + "loss": 1.2053, + "step": 31469 + }, + { + "epoch": 0.03, + "learning_rate": 4.987866836359725e-05, + "loss": 1.0998, + "step": 31470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878660634217286e-05, + "loss": 1.0523, + "step": 31471 + }, + { + "epoch": 0.03, + "learning_rate": 4.987865290459174e-05, + "loss": 1.1063, + "step": 31472 + }, + { + "epoch": 0.03, + "learning_rate": 4.987864517472059e-05, + "loss": 1.0122, + "step": 31473 + }, + { + "epoch": 0.03, + "learning_rate": 4.987863744460387e-05, + "loss": 1.0835, + "step": 31474 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878629714241545e-05, + "loss": 1.0043, + "step": 31475 + }, + { + "epoch": 0.03, + "learning_rate": 4.987862198363363e-05, + "loss": 0.9682, + "step": 31476 + }, + { + "epoch": 0.03, + "learning_rate": 4.987861425278012e-05, + "loss": 1.1624, + "step": 31477 + }, + { + "epoch": 0.03, + "learning_rate": 4.987860652168102e-05, + "loss": 1.1127, + "step": 31478 + }, + { + "epoch": 0.03, + "learning_rate": 4.987859879033634e-05, + "loss": 1.5615, + "step": 31479 + }, + { + "epoch": 0.03, + "learning_rate": 4.987859105874606e-05, + "loss": 1.3652, + "step": 31480 + }, + { + "epoch": 0.03, + "learning_rate": 4.987858332691019e-05, + "loss": 1.4139, + "step": 31481 + }, + { + "epoch": 0.03, + "learning_rate": 4.987857559482873e-05, + "loss": 1.3209, + "step": 31482 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878567862501677e-05, + "loss": 1.0365, + "step": 31483 + }, + { + "epoch": 0.03, + "learning_rate": 4.987856012992904e-05, + "loss": 0.9814, + "step": 31484 + }, + { + "epoch": 0.03, + "learning_rate": 4.98785523971108e-05, + "loss": 1.0244, + "step": 31485 + }, + { + "epoch": 0.03, + "learning_rate": 4.987854466404698e-05, + "loss": 0.8571, + "step": 31486 + }, + { + "epoch": 0.03, + "learning_rate": 4.987853693073756e-05, + "loss": 1.2056, + "step": 31487 + }, + { + "epoch": 0.03, + "learning_rate": 4.987852919718256e-05, + "loss": 0.946, + "step": 31488 + }, + { + "epoch": 0.03, + "learning_rate": 4.987852146338197e-05, + "loss": 0.8998, + "step": 31489 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878513729335784e-05, + "loss": 1.0761, + "step": 31490 + }, + { + "epoch": 0.03, + "learning_rate": 4.987850599504401e-05, + "loss": 0.4236, + "step": 31491 + }, + { + "epoch": 0.03, + "learning_rate": 4.987849826050664e-05, + "loss": 1.1291, + "step": 31492 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878490525723683e-05, + "loss": 0.8313, + "step": 31493 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878482790695144e-05, + "loss": 0.9125, + "step": 31494 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878475055421005e-05, + "loss": 1.2935, + "step": 31495 + }, + { + "epoch": 0.03, + "learning_rate": 4.987846731990128e-05, + "loss": 1.1113, + "step": 31496 + }, + { + "epoch": 0.03, + "learning_rate": 4.987845958413596e-05, + "loss": 1.0944, + "step": 31497 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878451848125064e-05, + "loss": 1.0799, + "step": 31498 + }, + { + "epoch": 0.03, + "learning_rate": 4.987844411186856e-05, + "loss": 0.7677, + "step": 31499 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878436375366484e-05, + "loss": 0.7831, + "step": 31500 + }, + { + "epoch": 0.03, + "eval_loss": 1.06670081615448, + "eval_runtime": 127.8487, + "eval_samples_per_second": 10.833, + "eval_steps_per_second": 5.42, + "step": 31500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878428638618813e-05, + "loss": 0.9292, + "step": 31501 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878420901625544e-05, + "loss": 1.1426, + "step": 31502 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878413164386695e-05, + "loss": 0.9489, + "step": 31503 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878405426902254e-05, + "loss": 0.8089, + "step": 31504 + }, + { + "epoch": 0.03, + "learning_rate": 4.987839768917222e-05, + "loss": 1.0579, + "step": 31505 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878389951196604e-05, + "loss": 0.9912, + "step": 31506 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878382212975393e-05, + "loss": 0.999, + "step": 31507 + }, + { + "epoch": 0.03, + "learning_rate": 4.98783744745086e-05, + "loss": 1.3683, + "step": 31508 + }, + { + "epoch": 0.03, + "learning_rate": 4.987836673579621e-05, + "loss": 1.0218, + "step": 31509 + }, + { + "epoch": 0.03, + "learning_rate": 4.987835899683824e-05, + "loss": 0.7452, + "step": 31510 + }, + { + "epoch": 0.03, + "learning_rate": 4.987835125763468e-05, + "loss": 0.9736, + "step": 31511 + }, + { + "epoch": 0.03, + "learning_rate": 4.987834351818552e-05, + "loss": 0.952, + "step": 31512 + }, + { + "epoch": 0.03, + "learning_rate": 4.987833577849078e-05, + "loss": 1.2411, + "step": 31513 + }, + { + "epoch": 0.03, + "learning_rate": 4.987832803855045e-05, + "loss": 1.0413, + "step": 31514 + }, + { + "epoch": 0.03, + "learning_rate": 4.987832029836453e-05, + "loss": 0.9205, + "step": 31515 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878312557933026e-05, + "loss": 0.8793, + "step": 31516 + }, + { + "epoch": 0.03, + "learning_rate": 4.987830481725594e-05, + "loss": 0.8732, + "step": 31517 + }, + { + "epoch": 0.03, + "learning_rate": 4.987829707633326e-05, + "loss": 0.8929, + "step": 31518 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878289335164984e-05, + "loss": 0.8626, + "step": 31519 + }, + { + "epoch": 0.03, + "learning_rate": 4.987828159375113e-05, + "loss": 1.1934, + "step": 31520 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878273852091685e-05, + "loss": 0.9111, + "step": 31521 + }, + { + "epoch": 0.03, + "learning_rate": 4.987826611018665e-05, + "loss": 0.7997, + "step": 31522 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878258368036024e-05, + "loss": 1.0889, + "step": 31523 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878250625639815e-05, + "loss": 1.0304, + "step": 31524 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878242882998015e-05, + "loss": 1.1372, + "step": 31525 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878235140110636e-05, + "loss": 1.0453, + "step": 31526 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878227396977664e-05, + "loss": 1.1428, + "step": 31527 + }, + { + "epoch": 0.03, + "learning_rate": 4.98782196535991e-05, + "loss": 0.6646, + "step": 31528 + }, + { + "epoch": 0.03, + "learning_rate": 4.987821190997496e-05, + "loss": 0.9391, + "step": 31529 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878204166105225e-05, + "loss": 0.95, + "step": 31530 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878196421989905e-05, + "loss": 1.034, + "step": 31531 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878188677628994e-05, + "loss": 1.0388, + "step": 31532 + }, + { + "epoch": 0.03, + "learning_rate": 4.98781809330225e-05, + "loss": 1.0364, + "step": 31533 + }, + { + "epoch": 0.03, + "learning_rate": 4.987817318817042e-05, + "loss": 1.1672, + "step": 31534 + }, + { + "epoch": 0.03, + "learning_rate": 4.987816544307275e-05, + "loss": 0.891, + "step": 31535 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878157697729494e-05, + "loss": 1.0793, + "step": 31536 + }, + { + "epoch": 0.03, + "learning_rate": 4.987814995214065e-05, + "loss": 0.8345, + "step": 31537 + }, + { + "epoch": 0.03, + "learning_rate": 4.987814220630622e-05, + "loss": 0.7572, + "step": 31538 + }, + { + "epoch": 0.03, + "learning_rate": 4.987813446022621e-05, + "loss": 0.8625, + "step": 31539 + }, + { + "epoch": 0.03, + "learning_rate": 4.987812671390061e-05, + "loss": 1.0572, + "step": 31540 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878118967329415e-05, + "loss": 1.2636, + "step": 31541 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878111220512644e-05, + "loss": 1.0138, + "step": 31542 + }, + { + "epoch": 0.03, + "learning_rate": 4.987810347345028e-05, + "loss": 0.7418, + "step": 31543 + }, + { + "epoch": 0.03, + "learning_rate": 4.987809572614234e-05, + "loss": 0.7776, + "step": 31544 + }, + { + "epoch": 0.03, + "learning_rate": 4.98780879785888e-05, + "loss": 1.0206, + "step": 31545 + }, + { + "epoch": 0.03, + "learning_rate": 4.987808023078968e-05, + "loss": 1.1416, + "step": 31546 + }, + { + "epoch": 0.03, + "learning_rate": 4.987807248274498e-05, + "loss": 1.0173, + "step": 31547 + }, + { + "epoch": 0.03, + "learning_rate": 4.987806473445469e-05, + "loss": 0.9082, + "step": 31548 + }, + { + "epoch": 0.03, + "learning_rate": 4.987805698591881e-05, + "loss": 0.9434, + "step": 31549 + }, + { + "epoch": 0.03, + "learning_rate": 4.987804923713735e-05, + "loss": 1.1064, + "step": 31550 + }, + { + "epoch": 0.03, + "learning_rate": 4.98780414881103e-05, + "loss": 0.909, + "step": 31551 + }, + { + "epoch": 0.03, + "learning_rate": 4.987803373883767e-05, + "loss": 0.9012, + "step": 31552 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878025989319454e-05, + "loss": 1.1068, + "step": 31553 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878018239555644e-05, + "loss": 0.9233, + "step": 31554 + }, + { + "epoch": 0.03, + "learning_rate": 4.9878010489546256e-05, + "loss": 0.9125, + "step": 31555 + }, + { + "epoch": 0.03, + "learning_rate": 4.987800273929128e-05, + "loss": 1.7135, + "step": 31556 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877994988790724e-05, + "loss": 0.9676, + "step": 31557 + }, + { + "epoch": 0.03, + "learning_rate": 4.987798723804458e-05, + "loss": 0.9832, + "step": 31558 + }, + { + "epoch": 0.03, + "learning_rate": 4.987797948705285e-05, + "loss": 0.5851, + "step": 31559 + }, + { + "epoch": 0.03, + "learning_rate": 4.987797173581553e-05, + "loss": 0.4362, + "step": 31560 + }, + { + "epoch": 0.03, + "learning_rate": 4.987796398433263e-05, + "loss": 0.9118, + "step": 31561 + }, + { + "epoch": 0.03, + "learning_rate": 4.987795623260415e-05, + "loss": 1.1061, + "step": 31562 + }, + { + "epoch": 0.03, + "learning_rate": 4.987794848063008e-05, + "loss": 1.1071, + "step": 31563 + }, + { + "epoch": 0.03, + "learning_rate": 4.987794072841043e-05, + "loss": 0.8589, + "step": 31564 + }, + { + "epoch": 0.03, + "learning_rate": 4.987793297594519e-05, + "loss": 0.8587, + "step": 31565 + }, + { + "epoch": 0.03, + "learning_rate": 4.987792522323437e-05, + "loss": 0.8679, + "step": 31566 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877917470277956e-05, + "loss": 1.131, + "step": 31567 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877909717075966e-05, + "loss": 0.6241, + "step": 31568 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877901963628396e-05, + "loss": 0.8259, + "step": 31569 + }, + { + "epoch": 0.03, + "learning_rate": 4.987789420993523e-05, + "loss": 0.9802, + "step": 31570 + }, + { + "epoch": 0.03, + "learning_rate": 4.987788645599649e-05, + "loss": 1.5742, + "step": 31571 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877878701812156e-05, + "loss": 1.6131, + "step": 31572 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877870947382245e-05, + "loss": 1.584, + "step": 31573 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877863192706756e-05, + "loss": 1.5155, + "step": 31574 + }, + { + "epoch": 0.03, + "learning_rate": 4.987785543778567e-05, + "loss": 1.4825, + "step": 31575 + }, + { + "epoch": 0.03, + "learning_rate": 4.987784768261901e-05, + "loss": 1.5186, + "step": 31576 + }, + { + "epoch": 0.03, + "learning_rate": 4.987783992720676e-05, + "loss": 1.4775, + "step": 31577 + }, + { + "epoch": 0.03, + "learning_rate": 4.987783217154893e-05, + "loss": 1.4539, + "step": 31578 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877824415645516e-05, + "loss": 1.4228, + "step": 31579 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877816659496515e-05, + "loss": 1.4172, + "step": 31580 + }, + { + "epoch": 0.03, + "learning_rate": 4.987780890310194e-05, + "loss": 1.3869, + "step": 31581 + }, + { + "epoch": 0.03, + "learning_rate": 4.987780114646177e-05, + "loss": 1.3251, + "step": 31582 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877793389576026e-05, + "loss": 1.3677, + "step": 31583 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877785632444696e-05, + "loss": 1.3353, + "step": 31584 + }, + { + "epoch": 0.03, + "learning_rate": 4.987777787506778e-05, + "loss": 1.2822, + "step": 31585 + }, + { + "epoch": 0.03, + "learning_rate": 4.987777011744529e-05, + "loss": 1.2558, + "step": 31586 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877762359577204e-05, + "loss": 1.1929, + "step": 31587 + }, + { + "epoch": 0.03, + "learning_rate": 4.987775460146355e-05, + "loss": 1.2535, + "step": 31588 + }, + { + "epoch": 0.03, + "learning_rate": 4.98777468431043e-05, + "loss": 1.2171, + "step": 31589 + }, + { + "epoch": 0.03, + "learning_rate": 4.987773908449948e-05, + "loss": 1.1873, + "step": 31590 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877731325649064e-05, + "loss": 1.1546, + "step": 31591 + }, + { + "epoch": 0.03, + "learning_rate": 4.987772356655307e-05, + "loss": 1.1065, + "step": 31592 + }, + { + "epoch": 0.03, + "learning_rate": 4.98777158072115e-05, + "loss": 1.1368, + "step": 31593 + }, + { + "epoch": 0.03, + "learning_rate": 4.987770804762434e-05, + "loss": 1.1545, + "step": 31594 + }, + { + "epoch": 0.03, + "learning_rate": 4.98777002877916e-05, + "loss": 1.0863, + "step": 31595 + }, + { + "epoch": 0.03, + "learning_rate": 4.987769252771328e-05, + "loss": 1.0981, + "step": 31596 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877684767389375e-05, + "loss": 1.0569, + "step": 31597 + }, + { + "epoch": 0.03, + "learning_rate": 4.987767700681989e-05, + "loss": 1.0647, + "step": 31598 + }, + { + "epoch": 0.03, + "learning_rate": 4.987766924600482e-05, + "loss": 1.1359, + "step": 31599 + }, + { + "epoch": 0.03, + "learning_rate": 4.987766148494417e-05, + "loss": 1.1418, + "step": 31600 + }, + { + "epoch": 0.03, + "learning_rate": 4.987765372363794e-05, + "loss": 1.0434, + "step": 31601 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877645962086125e-05, + "loss": 1.2659, + "step": 31602 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877638200288735e-05, + "loss": 1.1641, + "step": 31603 + }, + { + "epoch": 0.03, + "learning_rate": 4.987763043824576e-05, + "loss": 0.8731, + "step": 31604 + }, + { + "epoch": 0.03, + "learning_rate": 4.98776226759572e-05, + "loss": 1.0412, + "step": 31605 + }, + { + "epoch": 0.03, + "learning_rate": 4.987761491342307e-05, + "loss": 1.1791, + "step": 31606 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877607150643343e-05, + "loss": 1.1265, + "step": 31607 + }, + { + "epoch": 0.03, + "learning_rate": 4.987759938761804e-05, + "loss": 0.8375, + "step": 31608 + }, + { + "epoch": 0.03, + "learning_rate": 4.987759162434716e-05, + "loss": 1.0053, + "step": 31609 + }, + { + "epoch": 0.03, + "learning_rate": 4.987758386083069e-05, + "loss": 0.7843, + "step": 31610 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877576097068654e-05, + "loss": 1.1921, + "step": 31611 + }, + { + "epoch": 0.03, + "learning_rate": 4.987756833306102e-05, + "loss": 0.929, + "step": 31612 + }, + { + "epoch": 0.03, + "learning_rate": 4.987756056880782e-05, + "loss": 0.9057, + "step": 31613 + }, + { + "epoch": 0.03, + "learning_rate": 4.987755280430903e-05, + "loss": 1.087, + "step": 31614 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877545039564666e-05, + "loss": 0.7259, + "step": 31615 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877537274574714e-05, + "loss": 1.0172, + "step": 31616 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877529509339184e-05, + "loss": 1.1857, + "step": 31617 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877521743858075e-05, + "loss": 0.8986, + "step": 31618 + }, + { + "epoch": 0.03, + "learning_rate": 4.987751397813139e-05, + "loss": 1.5541, + "step": 31619 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877506212159115e-05, + "loss": 0.7627, + "step": 31620 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877498445941264e-05, + "loss": 0.8395, + "step": 31621 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877490679477834e-05, + "loss": 1.1617, + "step": 31622 + }, + { + "epoch": 0.03, + "learning_rate": 4.987748291276882e-05, + "loss": 1.142, + "step": 31623 + }, + { + "epoch": 0.03, + "learning_rate": 4.987747514581423e-05, + "loss": 1.1582, + "step": 31624 + }, + { + "epoch": 0.03, + "learning_rate": 4.987746737861406e-05, + "loss": 1.1773, + "step": 31625 + }, + { + "epoch": 0.03, + "learning_rate": 4.987745961116831e-05, + "loss": 0.9595, + "step": 31626 + }, + { + "epoch": 0.03, + "learning_rate": 4.987745184347697e-05, + "loss": 0.7063, + "step": 31627 + }, + { + "epoch": 0.03, + "learning_rate": 4.987744407554007e-05, + "loss": 0.2207, + "step": 31628 + }, + { + "epoch": 0.03, + "learning_rate": 4.987743630735757e-05, + "loss": 0.3346, + "step": 31629 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877428538929505e-05, + "loss": 1.0273, + "step": 31630 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877420770255854e-05, + "loss": 1.1559, + "step": 31631 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877413001336625e-05, + "loss": 1.4585, + "step": 31632 + }, + { + "epoch": 0.03, + "learning_rate": 4.987740523217182e-05, + "loss": 0.9491, + "step": 31633 + }, + { + "epoch": 0.03, + "learning_rate": 4.987739746276143e-05, + "loss": 0.9616, + "step": 31634 + }, + { + "epoch": 0.03, + "learning_rate": 4.987738969310547e-05, + "loss": 1.0613, + "step": 31635 + }, + { + "epoch": 0.03, + "learning_rate": 4.987738192320392e-05, + "loss": 1.8023, + "step": 31636 + }, + { + "epoch": 0.03, + "learning_rate": 4.98773741530568e-05, + "loss": 1.8394, + "step": 31637 + }, + { + "epoch": 0.03, + "learning_rate": 4.987736638266409e-05, + "loss": 0.8982, + "step": 31638 + }, + { + "epoch": 0.03, + "learning_rate": 4.987735861202581e-05, + "loss": 0.889, + "step": 31639 + }, + { + "epoch": 0.03, + "learning_rate": 4.987735084114194e-05, + "loss": 0.8388, + "step": 31640 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877343070012506e-05, + "loss": 1.3606, + "step": 31641 + }, + { + "epoch": 0.03, + "learning_rate": 4.987733529863749e-05, + "loss": 1.4235, + "step": 31642 + }, + { + "epoch": 0.03, + "learning_rate": 4.987732752701689e-05, + "loss": 1.2907, + "step": 31643 + }, + { + "epoch": 0.03, + "learning_rate": 4.987731975515072e-05, + "loss": 1.2511, + "step": 31644 + }, + { + "epoch": 0.03, + "learning_rate": 4.987731198303897e-05, + "loss": 0.6254, + "step": 31645 + }, + { + "epoch": 0.03, + "learning_rate": 4.987730421068163e-05, + "loss": 0.7027, + "step": 31646 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877296438078725e-05, + "loss": 0.3254, + "step": 31647 + }, + { + "epoch": 0.03, + "learning_rate": 4.987728866523023e-05, + "loss": 0.6849, + "step": 31648 + }, + { + "epoch": 0.03, + "learning_rate": 4.987728089213617e-05, + "loss": 1.3699, + "step": 31649 + }, + { + "epoch": 0.03, + "learning_rate": 4.987727311879652e-05, + "loss": 1.284, + "step": 31650 + }, + { + "epoch": 0.03, + "learning_rate": 4.98772653452113e-05, + "loss": 1.3115, + "step": 31651 + }, + { + "epoch": 0.03, + "learning_rate": 4.98772575713805e-05, + "loss": 1.1954, + "step": 31652 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877249797304115e-05, + "loss": 1.2254, + "step": 31653 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877242022982164e-05, + "loss": 1.3375, + "step": 31654 + }, + { + "epoch": 0.03, + "learning_rate": 4.987723424841463e-05, + "loss": 1.0722, + "step": 31655 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877226473601513e-05, + "loss": 0.7539, + "step": 31656 + }, + { + "epoch": 0.03, + "learning_rate": 4.987721869854283e-05, + "loss": 0.9471, + "step": 31657 + }, + { + "epoch": 0.03, + "learning_rate": 4.987721092323856e-05, + "loss": 1.0756, + "step": 31658 + }, + { + "epoch": 0.03, + "learning_rate": 4.987720314768872e-05, + "loss": 0.7231, + "step": 31659 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877195371893296e-05, + "loss": 0.9221, + "step": 31660 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877187595852295e-05, + "loss": 0.6837, + "step": 31661 + }, + { + "epoch": 0.03, + "learning_rate": 4.987717981956572e-05, + "loss": 0.9976, + "step": 31662 + }, + { + "epoch": 0.03, + "learning_rate": 4.987717204303357e-05, + "loss": 1.1344, + "step": 31663 + }, + { + "epoch": 0.03, + "learning_rate": 4.987716426625584e-05, + "loss": 1.1505, + "step": 31664 + }, + { + "epoch": 0.03, + "learning_rate": 4.987715648923253e-05, + "loss": 0.9606, + "step": 31665 + }, + { + "epoch": 0.03, + "learning_rate": 4.987714871196365e-05, + "loss": 0.9186, + "step": 31666 + }, + { + "epoch": 0.03, + "learning_rate": 4.987714093444919e-05, + "loss": 0.9501, + "step": 31667 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877133156689156e-05, + "loss": 1.0612, + "step": 31668 + }, + { + "epoch": 0.03, + "learning_rate": 4.987712537868354e-05, + "loss": 1.0047, + "step": 31669 + }, + { + "epoch": 0.03, + "learning_rate": 4.987711760043235e-05, + "loss": 1.2236, + "step": 31670 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877109821935587e-05, + "loss": 0.9869, + "step": 31671 + }, + { + "epoch": 0.03, + "learning_rate": 4.987710204319325e-05, + "loss": 1.1288, + "step": 31672 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877094264205325e-05, + "loss": 1.0172, + "step": 31673 + }, + { + "epoch": 0.03, + "learning_rate": 4.987708648497183e-05, + "loss": 1.0775, + "step": 31674 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877078705492756e-05, + "loss": 0.9275, + "step": 31675 + }, + { + "epoch": 0.03, + "learning_rate": 4.987707092576811e-05, + "loss": 1.2585, + "step": 31676 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877063145797887e-05, + "loss": 0.6953, + "step": 31677 + }, + { + "epoch": 0.03, + "learning_rate": 4.987705536558209e-05, + "loss": 0.9898, + "step": 31678 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877047585120716e-05, + "loss": 0.783, + "step": 31679 + }, + { + "epoch": 0.03, + "learning_rate": 4.987703980441376e-05, + "loss": 0.8523, + "step": 31680 + }, + { + "epoch": 0.03, + "learning_rate": 4.987703202346124e-05, + "loss": 1.1208, + "step": 31681 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877024242263135e-05, + "loss": 1.3804, + "step": 31682 + }, + { + "epoch": 0.03, + "learning_rate": 4.987701646081946e-05, + "loss": 1.1707, + "step": 31683 + }, + { + "epoch": 0.03, + "learning_rate": 4.987700867913021e-05, + "loss": 1.0394, + "step": 31684 + }, + { + "epoch": 0.03, + "learning_rate": 4.987700089719538e-05, + "loss": 1.1449, + "step": 31685 + }, + { + "epoch": 0.03, + "learning_rate": 4.987699311501498e-05, + "loss": 0.4049, + "step": 31686 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876985332589e-05, + "loss": 0.3557, + "step": 31687 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876977549917446e-05, + "loss": 0.4335, + "step": 31688 + }, + { + "epoch": 0.03, + "learning_rate": 4.987696976700031e-05, + "loss": 0.3495, + "step": 31689 + }, + { + "epoch": 0.03, + "learning_rate": 4.987696198383761e-05, + "loss": 0.2413, + "step": 31690 + }, + { + "epoch": 0.03, + "learning_rate": 4.987695420042934e-05, + "loss": 0.4409, + "step": 31691 + }, + { + "epoch": 0.03, + "learning_rate": 4.987694641677548e-05, + "loss": 0.5558, + "step": 31692 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876938632876056e-05, + "loss": 1.0945, + "step": 31693 + }, + { + "epoch": 0.03, + "learning_rate": 4.987693084873105e-05, + "loss": 0.9483, + "step": 31694 + }, + { + "epoch": 0.03, + "learning_rate": 4.987692306434047e-05, + "loss": 1.0021, + "step": 31695 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876915279704324e-05, + "loss": 1.0685, + "step": 31696 + }, + { + "epoch": 0.03, + "learning_rate": 4.98769074948226e-05, + "loss": 1.1254, + "step": 31697 + }, + { + "epoch": 0.03, + "learning_rate": 4.98768997096953e-05, + "loss": 0.8902, + "step": 31698 + }, + { + "epoch": 0.03, + "learning_rate": 4.987689192432242e-05, + "loss": 0.9064, + "step": 31699 + }, + { + "epoch": 0.03, + "learning_rate": 4.987688413870397e-05, + "loss": 0.8305, + "step": 31700 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876876352839954e-05, + "loss": 0.8932, + "step": 31701 + }, + { + "epoch": 0.03, + "learning_rate": 4.987686856673035e-05, + "loss": 0.9019, + "step": 31702 + }, + { + "epoch": 0.03, + "learning_rate": 4.987686078037518e-05, + "loss": 0.3333, + "step": 31703 + }, + { + "epoch": 0.03, + "learning_rate": 4.987685299377444e-05, + "loss": 0.8579, + "step": 31704 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876845206928116e-05, + "loss": 0.3433, + "step": 31705 + }, + { + "epoch": 0.03, + "learning_rate": 4.987683741983623e-05, + "loss": 0.957, + "step": 31706 + }, + { + "epoch": 0.03, + "learning_rate": 4.987682963249876e-05, + "loss": 0.6483, + "step": 31707 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876821844915725e-05, + "loss": 1.2886, + "step": 31708 + }, + { + "epoch": 0.03, + "learning_rate": 4.987681405708711e-05, + "loss": 0.847, + "step": 31709 + }, + { + "epoch": 0.03, + "learning_rate": 4.987680626901292e-05, + "loss": 0.7357, + "step": 31710 + }, + { + "epoch": 0.03, + "learning_rate": 4.987679848069317e-05, + "loss": 0.816, + "step": 31711 + }, + { + "epoch": 0.03, + "learning_rate": 4.987679069212783e-05, + "loss": 0.8368, + "step": 31712 + }, + { + "epoch": 0.03, + "learning_rate": 4.987678290331692e-05, + "loss": 0.7033, + "step": 31713 + }, + { + "epoch": 0.03, + "learning_rate": 4.987677511426044e-05, + "loss": 1.0977, + "step": 31714 + }, + { + "epoch": 0.03, + "learning_rate": 4.987676732495839e-05, + "loss": 1.2245, + "step": 31715 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876759535410764e-05, + "loss": 1.4229, + "step": 31716 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876751745617566e-05, + "loss": 1.1627, + "step": 31717 + }, + { + "epoch": 0.03, + "learning_rate": 4.98767439555788e-05, + "loss": 1.2098, + "step": 31718 + }, + { + "epoch": 0.03, + "learning_rate": 4.987673616529445e-05, + "loss": 1.2843, + "step": 31719 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876728374764537e-05, + "loss": 1.4804, + "step": 31720 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876720583989045e-05, + "loss": 0.7682, + "step": 31721 + }, + { + "epoch": 0.03, + "learning_rate": 4.987671279296798e-05, + "loss": 0.641, + "step": 31722 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876705001701354e-05, + "loss": 0.9784, + "step": 31723 + }, + { + "epoch": 0.03, + "learning_rate": 4.987669721018915e-05, + "loss": 0.9284, + "step": 31724 + }, + { + "epoch": 0.03, + "learning_rate": 4.987668941843137e-05, + "loss": 0.6791, + "step": 31725 + }, + { + "epoch": 0.03, + "learning_rate": 4.987668162642801e-05, + "loss": 1.0474, + "step": 31726 + }, + { + "epoch": 0.03, + "learning_rate": 4.987667383417909e-05, + "loss": 1.1024, + "step": 31727 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876666041684595e-05, + "loss": 0.7601, + "step": 31728 + }, + { + "epoch": 0.03, + "learning_rate": 4.987665824894453e-05, + "loss": 0.3253, + "step": 31729 + }, + { + "epoch": 0.03, + "learning_rate": 4.987665045595889e-05, + "loss": 1.1772, + "step": 31730 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876642662727676e-05, + "loss": 0.858, + "step": 31731 + }, + { + "epoch": 0.03, + "learning_rate": 4.98766348692509e-05, + "loss": 0.9403, + "step": 31732 + }, + { + "epoch": 0.03, + "learning_rate": 4.987662707552854e-05, + "loss": 0.9707, + "step": 31733 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876619281560624e-05, + "loss": 0.9898, + "step": 31734 + }, + { + "epoch": 0.03, + "learning_rate": 4.987661148734713e-05, + "loss": 0.3916, + "step": 31735 + }, + { + "epoch": 0.03, + "learning_rate": 4.987660369288805e-05, + "loss": 0.2784, + "step": 31736 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876595898183406e-05, + "loss": 0.2993, + "step": 31737 + }, + { + "epoch": 0.03, + "learning_rate": 4.98765881032332e-05, + "loss": 0.2481, + "step": 31738 + }, + { + "epoch": 0.03, + "learning_rate": 4.987658030803742e-05, + "loss": 0.1896, + "step": 31739 + }, + { + "epoch": 0.03, + "learning_rate": 4.987657251259606e-05, + "loss": 0.2071, + "step": 31740 + }, + { + "epoch": 0.03, + "learning_rate": 4.987656471690914e-05, + "loss": 0.2091, + "step": 31741 + }, + { + "epoch": 0.03, + "learning_rate": 4.987655692097665e-05, + "loss": 0.4274, + "step": 31742 + }, + { + "epoch": 0.03, + "learning_rate": 4.987654912479858e-05, + "loss": 1.7362, + "step": 31743 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876541328374936e-05, + "loss": 1.0943, + "step": 31744 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876533531705735e-05, + "loss": 0.8546, + "step": 31745 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876525734790956e-05, + "loss": 0.8424, + "step": 31746 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876517937630604e-05, + "loss": 0.4817, + "step": 31747 + }, + { + "epoch": 0.03, + "learning_rate": 4.987651014022469e-05, + "loss": 1.0888, + "step": 31748 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876502342573193e-05, + "loss": 1.0747, + "step": 31749 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876494544676134e-05, + "loss": 1.1098, + "step": 31750 + }, + { + "epoch": 0.03, + "learning_rate": 4.98764867465335e-05, + "loss": 1.1667, + "step": 31751 + }, + { + "epoch": 0.03, + "learning_rate": 4.98764789481453e-05, + "loss": 0.5702, + "step": 31752 + }, + { + "epoch": 0.03, + "learning_rate": 4.987647114951153e-05, + "loss": 0.4331, + "step": 31753 + }, + { + "epoch": 0.03, + "learning_rate": 4.987646335063219e-05, + "loss": 0.8206, + "step": 31754 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876455551507285e-05, + "loss": 0.9976, + "step": 31755 + }, + { + "epoch": 0.03, + "learning_rate": 4.98764477521368e-05, + "loss": 1.0751, + "step": 31756 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876439952520746e-05, + "loss": 0.8512, + "step": 31757 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876432152659125e-05, + "loss": 0.9071, + "step": 31758 + }, + { + "epoch": 0.03, + "learning_rate": 4.987642435255194e-05, + "loss": 0.8068, + "step": 31759 + }, + { + "epoch": 0.03, + "learning_rate": 4.987641655219918e-05, + "loss": 1.1453, + "step": 31760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876408751600846e-05, + "loss": 1.1479, + "step": 31761 + }, + { + "epoch": 0.03, + "learning_rate": 4.987640095075695e-05, + "loss": 0.5931, + "step": 31762 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876393149667486e-05, + "loss": 0.4754, + "step": 31763 + }, + { + "epoch": 0.03, + "learning_rate": 4.987638534833244e-05, + "loss": 0.4786, + "step": 31764 + }, + { + "epoch": 0.03, + "learning_rate": 4.987637754675184e-05, + "loss": 0.4964, + "step": 31765 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876369744925664e-05, + "loss": 0.42, + "step": 31766 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876361942853925e-05, + "loss": 0.3279, + "step": 31767 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876354140536606e-05, + "loss": 0.3479, + "step": 31768 + }, + { + "epoch": 0.03, + "learning_rate": 4.987634633797372e-05, + "loss": 0.3595, + "step": 31769 + }, + { + "epoch": 0.03, + "learning_rate": 4.987633853516527e-05, + "loss": 0.4791, + "step": 31770 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876330732111255e-05, + "loss": 0.5427, + "step": 31771 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876322928811656e-05, + "loss": 0.351, + "step": 31772 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876315125266506e-05, + "loss": 0.3722, + "step": 31773 + }, + { + "epoch": 0.03, + "learning_rate": 4.987630732147578e-05, + "loss": 0.4441, + "step": 31774 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876299517439484e-05, + "loss": 0.3322, + "step": 31775 + }, + { + "epoch": 0.03, + "learning_rate": 4.987629171315762e-05, + "loss": 0.3475, + "step": 31776 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876283908630195e-05, + "loss": 0.3592, + "step": 31777 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876276103857186e-05, + "loss": 0.3817, + "step": 31778 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876268298838626e-05, + "loss": 0.2885, + "step": 31779 + }, + { + "epoch": 0.03, + "learning_rate": 4.987626049357449e-05, + "loss": 0.3136, + "step": 31780 + }, + { + "epoch": 0.03, + "learning_rate": 4.987625268806478e-05, + "loss": 0.3134, + "step": 31781 + }, + { + "epoch": 0.03, + "learning_rate": 4.987624488230952e-05, + "loss": 0.6942, + "step": 31782 + }, + { + "epoch": 0.03, + "learning_rate": 4.987623707630867e-05, + "loss": 0.7933, + "step": 31783 + }, + { + "epoch": 0.03, + "learning_rate": 4.987622927006227e-05, + "loss": 1.1611, + "step": 31784 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876221463570296e-05, + "loss": 0.7482, + "step": 31785 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876213656832754e-05, + "loss": 0.8452, + "step": 31786 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876205849849646e-05, + "loss": 1.1772, + "step": 31787 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876198042620966e-05, + "loss": 1.0983, + "step": 31788 + }, + { + "epoch": 0.03, + "learning_rate": 4.987619023514672e-05, + "loss": 1.3966, + "step": 31789 + }, + { + "epoch": 0.03, + "learning_rate": 4.987618242742691e-05, + "loss": 1.4792, + "step": 31790 + }, + { + "epoch": 0.03, + "learning_rate": 4.987617461946154e-05, + "loss": 1.2043, + "step": 31791 + }, + { + "epoch": 0.03, + "learning_rate": 4.987616681125059e-05, + "loss": 3.2489, + "step": 31792 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876159002794074e-05, + "loss": 1.5124, + "step": 31793 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876151194092e-05, + "loss": 1.1884, + "step": 31794 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876143385144356e-05, + "loss": 0.7855, + "step": 31795 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876135575951136e-05, + "loss": 0.3306, + "step": 31796 + }, + { + "epoch": 0.03, + "learning_rate": 4.987612776651236e-05, + "loss": 0.8425, + "step": 31797 + }, + { + "epoch": 0.03, + "learning_rate": 4.987611995682801e-05, + "loss": 1.0788, + "step": 31798 + }, + { + "epoch": 0.03, + "learning_rate": 4.98761121468981e-05, + "loss": 1.1452, + "step": 31799 + }, + { + "epoch": 0.03, + "learning_rate": 4.987610433672262e-05, + "loss": 0.8731, + "step": 31800 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876096526301575e-05, + "loss": 0.3927, + "step": 31801 + }, + { + "epoch": 0.03, + "learning_rate": 4.987608871563496e-05, + "loss": 0.1377, + "step": 31802 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876080904722776e-05, + "loss": 0.2839, + "step": 31803 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876073093565036e-05, + "loss": 0.2665, + "step": 31804 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876065282161724e-05, + "loss": 0.7329, + "step": 31805 + }, + { + "epoch": 0.03, + "learning_rate": 4.987605747051285e-05, + "loss": 0.75, + "step": 31806 + }, + { + "epoch": 0.03, + "learning_rate": 4.9876049658618405e-05, + "loss": 1.3929, + "step": 31807 + }, + { + "epoch": 0.03, + "learning_rate": 4.987604184647839e-05, + "loss": 1.36, + "step": 31808 + }, + { + "epoch": 0.03, + "learning_rate": 4.987603403409282e-05, + "loss": 0.9154, + "step": 31809 + }, + { + "epoch": 0.03, + "learning_rate": 4.987602622146168e-05, + "loss": 0.7824, + "step": 31810 + }, + { + "epoch": 0.03, + "learning_rate": 4.987601840858498e-05, + "loss": 0.8329, + "step": 31811 + }, + { + "epoch": 0.03, + "learning_rate": 4.98760105954627e-05, + "loss": 0.9107, + "step": 31812 + }, + { + "epoch": 0.03, + "learning_rate": 4.987600278209487e-05, + "loss": 0.7013, + "step": 31813 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875994968481465e-05, + "loss": 1.2348, + "step": 31814 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875987154622496e-05, + "loss": 1.1806, + "step": 31815 + }, + { + "epoch": 0.03, + "learning_rate": 4.987597934051796e-05, + "loss": 1.0009, + "step": 31816 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875971526167863e-05, + "loss": 1.1363, + "step": 31817 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875963711572206e-05, + "loss": 0.8128, + "step": 31818 + }, + { + "epoch": 0.03, + "learning_rate": 4.987595589673098e-05, + "loss": 0.7929, + "step": 31819 + }, + { + "epoch": 0.03, + "learning_rate": 4.987594808164418e-05, + "loss": 0.9732, + "step": 31820 + }, + { + "epoch": 0.03, + "learning_rate": 4.987594026631182e-05, + "loss": 1.1977, + "step": 31821 + }, + { + "epoch": 0.03, + "learning_rate": 4.98759324507339e-05, + "loss": 4.0099, + "step": 31822 + }, + { + "epoch": 0.03, + "learning_rate": 4.987592463491041e-05, + "loss": 1.0616, + "step": 31823 + }, + { + "epoch": 0.03, + "learning_rate": 4.987591681884136e-05, + "loss": 1.0005, + "step": 31824 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875909002526746e-05, + "loss": 0.5469, + "step": 31825 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875901185966555e-05, + "loss": 1.056, + "step": 31826 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875893369160806e-05, + "loss": 0.1355, + "step": 31827 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875885552109506e-05, + "loss": 0.0795, + "step": 31828 + }, + { + "epoch": 0.03, + "learning_rate": 4.987587773481263e-05, + "loss": 0.0818, + "step": 31829 + }, + { + "epoch": 0.03, + "learning_rate": 4.987586991727019e-05, + "loss": 0.0801, + "step": 31830 + }, + { + "epoch": 0.03, + "learning_rate": 4.987586209948219e-05, + "loss": 0.0864, + "step": 31831 + }, + { + "epoch": 0.03, + "learning_rate": 4.987585428144862e-05, + "loss": 0.0468, + "step": 31832 + }, + { + "epoch": 0.03, + "learning_rate": 4.987584646316948e-05, + "loss": 0.0412, + "step": 31833 + }, + { + "epoch": 0.03, + "learning_rate": 4.987583864464479e-05, + "loss": 0.0527, + "step": 31834 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875830825874535e-05, + "loss": 0.0366, + "step": 31835 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875823006858714e-05, + "loss": 0.1616, + "step": 31836 + }, + { + "epoch": 0.03, + "learning_rate": 4.987581518759733e-05, + "loss": 0.2796, + "step": 31837 + }, + { + "epoch": 0.03, + "learning_rate": 4.987580736809038e-05, + "loss": 0.0781, + "step": 31838 + }, + { + "epoch": 0.03, + "learning_rate": 4.987579954833786e-05, + "loss": 0.0846, + "step": 31839 + }, + { + "epoch": 0.03, + "learning_rate": 4.987579172833979e-05, + "loss": 0.6318, + "step": 31840 + }, + { + "epoch": 0.03, + "learning_rate": 4.987578390809615e-05, + "loss": 0.8273, + "step": 31841 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875776087606946e-05, + "loss": 1.1391, + "step": 31842 + }, + { + "epoch": 0.03, + "learning_rate": 4.987576826687218e-05, + "loss": 0.8729, + "step": 31843 + }, + { + "epoch": 0.03, + "learning_rate": 4.987576044589185e-05, + "loss": 1.1101, + "step": 31844 + }, + { + "epoch": 0.03, + "learning_rate": 4.987575262466596e-05, + "loss": 0.9679, + "step": 31845 + }, + { + "epoch": 0.03, + "learning_rate": 4.98757448031945e-05, + "loss": 1.2256, + "step": 31846 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875736981477485e-05, + "loss": 1.2033, + "step": 31847 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875729159514905e-05, + "loss": 1.0598, + "step": 31848 + }, + { + "epoch": 0.03, + "learning_rate": 4.987572133730676e-05, + "loss": 1.0714, + "step": 31849 + }, + { + "epoch": 0.03, + "learning_rate": 4.987571351485305e-05, + "loss": 0.8735, + "step": 31850 + }, + { + "epoch": 0.03, + "learning_rate": 4.987570569215379e-05, + "loss": 1.2612, + "step": 31851 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875697869208954e-05, + "loss": 3.1046, + "step": 31852 + }, + { + "epoch": 0.03, + "learning_rate": 4.987569004601856e-05, + "loss": 3.0212, + "step": 31853 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875682222582605e-05, + "loss": 3.1012, + "step": 31854 + }, + { + "epoch": 0.03, + "learning_rate": 4.987567439890108e-05, + "loss": 3.0536, + "step": 31855 + }, + { + "epoch": 0.03, + "learning_rate": 4.987566657497401e-05, + "loss": 3.0911, + "step": 31856 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875658750801364e-05, + "loss": 3.0392, + "step": 31857 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875650926383154e-05, + "loss": 3.0407, + "step": 31858 + }, + { + "epoch": 0.03, + "learning_rate": 4.987564310171939e-05, + "loss": 3.063, + "step": 31859 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875635276810065e-05, + "loss": 3.0566, + "step": 31860 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875627451655174e-05, + "loss": 3.0408, + "step": 31861 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875619626254724e-05, + "loss": 1.5149, + "step": 31862 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875611800608715e-05, + "loss": 1.7108, + "step": 31863 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875603974717135e-05, + "loss": 1.1433, + "step": 31864 + }, + { + "epoch": 0.03, + "learning_rate": 4.987559614858e-05, + "loss": 0.635, + "step": 31865 + }, + { + "epoch": 0.03, + "learning_rate": 4.98755883221973e-05, + "loss": 1.0935, + "step": 31866 + }, + { + "epoch": 0.03, + "learning_rate": 4.987558049556904e-05, + "loss": 1.0038, + "step": 31867 + }, + { + "epoch": 0.03, + "learning_rate": 4.987557266869522e-05, + "loss": 0.9091, + "step": 31868 + }, + { + "epoch": 0.03, + "learning_rate": 4.987556484157584e-05, + "loss": 1.0211, + "step": 31869 + }, + { + "epoch": 0.03, + "learning_rate": 4.987555701421089e-05, + "loss": 0.9729, + "step": 31870 + }, + { + "epoch": 0.03, + "learning_rate": 4.987554918660039e-05, + "loss": 1.053, + "step": 31871 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875541358744324e-05, + "loss": 1.1989, + "step": 31872 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875533530642706e-05, + "loss": 1.1059, + "step": 31873 + }, + { + "epoch": 0.03, + "learning_rate": 4.987552570229551e-05, + "loss": 0.9189, + "step": 31874 + }, + { + "epoch": 0.03, + "learning_rate": 4.987551787370277e-05, + "loss": 1.1019, + "step": 31875 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875510044864456e-05, + "loss": 1.0345, + "step": 31876 + }, + { + "epoch": 0.03, + "learning_rate": 4.987550221578059e-05, + "loss": 1.0127, + "step": 31877 + }, + { + "epoch": 0.03, + "learning_rate": 4.987549438645116e-05, + "loss": 1.0473, + "step": 31878 + }, + { + "epoch": 0.03, + "learning_rate": 4.987548655687617e-05, + "loss": 1.1009, + "step": 31879 + }, + { + "epoch": 0.03, + "learning_rate": 4.987547872705562e-05, + "loss": 1.1956, + "step": 31880 + }, + { + "epoch": 0.03, + "learning_rate": 4.987547089698952e-05, + "loss": 0.9584, + "step": 31881 + }, + { + "epoch": 0.03, + "learning_rate": 4.987546306667784e-05, + "loss": 0.8262, + "step": 31882 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875455236120616e-05, + "loss": 0.9753, + "step": 31883 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875447405317824e-05, + "loss": 0.7293, + "step": 31884 + }, + { + "epoch": 0.03, + "learning_rate": 4.987543957426948e-05, + "loss": 1.0467, + "step": 31885 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875431742975565e-05, + "loss": 1.0311, + "step": 31886 + }, + { + "epoch": 0.03, + "learning_rate": 4.987542391143609e-05, + "loss": 0.9075, + "step": 31887 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875416079651066e-05, + "loss": 0.7952, + "step": 31888 + }, + { + "epoch": 0.03, + "learning_rate": 4.987540824762048e-05, + "loss": 1.0468, + "step": 31889 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875400415344334e-05, + "loss": 1.0539, + "step": 31890 + }, + { + "epoch": 0.03, + "learning_rate": 4.987539258282262e-05, + "loss": 1.1983, + "step": 31891 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875384750055355e-05, + "loss": 1.2786, + "step": 31892 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875376917042526e-05, + "loss": 1.0487, + "step": 31893 + }, + { + "epoch": 0.03, + "learning_rate": 4.987536908378414e-05, + "loss": 1.1543, + "step": 31894 + }, + { + "epoch": 0.03, + "learning_rate": 4.98753612502802e-05, + "loss": 0.9508, + "step": 31895 + }, + { + "epoch": 0.03, + "learning_rate": 4.987535341653069e-05, + "loss": 0.9337, + "step": 31896 + }, + { + "epoch": 0.03, + "learning_rate": 4.987534558253562e-05, + "loss": 0.9819, + "step": 31897 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875337748295e-05, + "loss": 0.7244, + "step": 31898 + }, + { + "epoch": 0.03, + "learning_rate": 4.987532991380883e-05, + "loss": 1.1801, + "step": 31899 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875322079077085e-05, + "loss": 0.9528, + "step": 31900 + }, + { + "epoch": 0.03, + "learning_rate": 4.987531424409979e-05, + "loss": 1.0706, + "step": 31901 + }, + { + "epoch": 0.03, + "learning_rate": 4.987530640887693e-05, + "loss": 1.0384, + "step": 31902 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875298573408516e-05, + "loss": 1.0503, + "step": 31903 + }, + { + "epoch": 0.03, + "learning_rate": 4.987529073769454e-05, + "loss": 1.0293, + "step": 31904 + }, + { + "epoch": 0.03, + "learning_rate": 4.987528290173501e-05, + "loss": 0.6786, + "step": 31905 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875275065529916e-05, + "loss": 0.6744, + "step": 31906 + }, + { + "epoch": 0.03, + "learning_rate": 4.987526722907927e-05, + "loss": 0.3211, + "step": 31907 + }, + { + "epoch": 0.03, + "learning_rate": 4.987525939238307e-05, + "loss": 0.9709, + "step": 31908 + }, + { + "epoch": 0.03, + "learning_rate": 4.98752515554413e-05, + "loss": 0.8232, + "step": 31909 + }, + { + "epoch": 0.03, + "learning_rate": 4.987524371825397e-05, + "loss": 1.0957, + "step": 31910 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875235880821094e-05, + "loss": 1.2013, + "step": 31911 + }, + { + "epoch": 0.03, + "learning_rate": 4.987522804314265e-05, + "loss": 1.1761, + "step": 31912 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875220205218656e-05, + "loss": 1.115, + "step": 31913 + }, + { + "epoch": 0.03, + "learning_rate": 4.987521236704911e-05, + "loss": 0.966, + "step": 31914 + }, + { + "epoch": 0.03, + "learning_rate": 4.987520452863399e-05, + "loss": 1.0529, + "step": 31915 + }, + { + "epoch": 0.03, + "learning_rate": 4.987519668997332e-05, + "loss": 0.9583, + "step": 31916 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875188851067094e-05, + "loss": 1.0661, + "step": 31917 + }, + { + "epoch": 0.03, + "learning_rate": 4.987518101191531e-05, + "loss": 0.954, + "step": 31918 + }, + { + "epoch": 0.03, + "learning_rate": 4.987517317251797e-05, + "loss": 0.9209, + "step": 31919 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875165332875075e-05, + "loss": 0.8326, + "step": 31920 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875157492986615e-05, + "loss": 0.9026, + "step": 31921 + }, + { + "epoch": 0.03, + "learning_rate": 4.98751496528526e-05, + "loss": 1.3498, + "step": 31922 + }, + { + "epoch": 0.03, + "learning_rate": 4.987514181247303e-05, + "loss": 0.8136, + "step": 31923 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875133971847904e-05, + "loss": 0.9208, + "step": 31924 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875126130977224e-05, + "loss": 1.0414, + "step": 31925 + }, + { + "epoch": 0.03, + "learning_rate": 4.987511828986098e-05, + "loss": 0.9674, + "step": 31926 + }, + { + "epoch": 0.03, + "learning_rate": 4.987511044849918e-05, + "loss": 1.0088, + "step": 31927 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875102606891834e-05, + "loss": 1.02, + "step": 31928 + }, + { + "epoch": 0.03, + "learning_rate": 4.987509476503892e-05, + "loss": 1.4472, + "step": 31929 + }, + { + "epoch": 0.03, + "learning_rate": 4.987508692294045e-05, + "loss": 0.6673, + "step": 31930 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875079080596434e-05, + "loss": 0.8934, + "step": 31931 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875071238006846e-05, + "loss": 1.029, + "step": 31932 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875063395171714e-05, + "loss": 1.2148, + "step": 31933 + }, + { + "epoch": 0.03, + "learning_rate": 4.987505555209102e-05, + "loss": 1.2076, + "step": 31934 + }, + { + "epoch": 0.03, + "learning_rate": 4.9875047708764774e-05, + "loss": 1.2639, + "step": 31935 + }, + { + "epoch": 0.03, + "learning_rate": 4.987503986519297e-05, + "loss": 2.7615, + "step": 31936 + }, + { + "epoch": 0.03, + "learning_rate": 4.987503202137561e-05, + "loss": 1.019, + "step": 31937 + }, + { + "epoch": 0.03, + "learning_rate": 4.987502417731269e-05, + "loss": 0.6664, + "step": 31938 + }, + { + "epoch": 0.03, + "learning_rate": 4.987501633300422e-05, + "loss": 0.6963, + "step": 31939 + }, + { + "epoch": 0.03, + "learning_rate": 4.987500848845019e-05, + "loss": 1.169, + "step": 31940 + }, + { + "epoch": 0.03, + "learning_rate": 4.987500064365061e-05, + "loss": 0.7437, + "step": 31941 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874992798605476e-05, + "loss": 1.224, + "step": 31942 + }, + { + "epoch": 0.03, + "learning_rate": 4.987498495331478e-05, + "loss": 1.132, + "step": 31943 + }, + { + "epoch": 0.03, + "learning_rate": 4.987497710777853e-05, + "loss": 1.0177, + "step": 31944 + }, + { + "epoch": 0.03, + "learning_rate": 4.987496926199673e-05, + "loss": 1.2099, + "step": 31945 + }, + { + "epoch": 0.03, + "learning_rate": 4.987496141596937e-05, + "loss": 0.85, + "step": 31946 + }, + { + "epoch": 0.03, + "learning_rate": 4.987495356969645e-05, + "loss": 0.9207, + "step": 31947 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874945723177976e-05, + "loss": 0.9421, + "step": 31948 + }, + { + "epoch": 0.03, + "learning_rate": 4.987493787641396e-05, + "loss": 0.9029, + "step": 31949 + }, + { + "epoch": 0.03, + "learning_rate": 4.987493002940438e-05, + "loss": 0.98, + "step": 31950 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874922182149244e-05, + "loss": 1.0686, + "step": 31951 + }, + { + "epoch": 0.03, + "learning_rate": 4.987491433464855e-05, + "loss": 0.7382, + "step": 31952 + }, + { + "epoch": 0.03, + "learning_rate": 4.987490648690231e-05, + "loss": 1.0467, + "step": 31953 + }, + { + "epoch": 0.03, + "learning_rate": 4.987489863891051e-05, + "loss": 1.1727, + "step": 31954 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874890790673156e-05, + "loss": 0.5697, + "step": 31955 + }, + { + "epoch": 0.03, + "learning_rate": 4.987488294219025e-05, + "loss": 0.4963, + "step": 31956 + }, + { + "epoch": 0.03, + "learning_rate": 4.987487509346179e-05, + "loss": 0.7733, + "step": 31957 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874867244487775e-05, + "loss": 0.9277, + "step": 31958 + }, + { + "epoch": 0.03, + "learning_rate": 4.98748593952682e-05, + "loss": 1.258, + "step": 31959 + }, + { + "epoch": 0.03, + "learning_rate": 4.987485154580308e-05, + "loss": 1.753, + "step": 31960 + }, + { + "epoch": 0.03, + "learning_rate": 4.98748436960924e-05, + "loss": 1.6512, + "step": 31961 + }, + { + "epoch": 0.03, + "learning_rate": 4.987483584613617e-05, + "loss": 0.4306, + "step": 31962 + }, + { + "epoch": 0.03, + "learning_rate": 4.987482799593438e-05, + "loss": 1.2749, + "step": 31963 + }, + { + "epoch": 0.03, + "learning_rate": 4.987482014548704e-05, + "loss": 1.0235, + "step": 31964 + }, + { + "epoch": 0.03, + "learning_rate": 4.987481229479415e-05, + "loss": 0.7891, + "step": 31965 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874804443855696e-05, + "loss": 0.7628, + "step": 31966 + }, + { + "epoch": 0.03, + "learning_rate": 4.98747965926717e-05, + "loss": 1.0068, + "step": 31967 + }, + { + "epoch": 0.03, + "learning_rate": 4.987478874124214e-05, + "loss": 0.9285, + "step": 31968 + }, + { + "epoch": 0.03, + "learning_rate": 4.987478088956703e-05, + "loss": 1.2886, + "step": 31969 + }, + { + "epoch": 0.03, + "learning_rate": 4.987477303764637e-05, + "loss": 1.0323, + "step": 31970 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874765185480154e-05, + "loss": 0.8317, + "step": 31971 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874757333068386e-05, + "loss": 1.1297, + "step": 31972 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874749480411066e-05, + "loss": 1.2266, + "step": 31973 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874741627508195e-05, + "loss": 0.6333, + "step": 31974 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874733774359766e-05, + "loss": 0.5871, + "step": 31975 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874725920965785e-05, + "loss": 1.0141, + "step": 31976 + }, + { + "epoch": 0.03, + "learning_rate": 4.987471806732625e-05, + "loss": 1.096, + "step": 31977 + }, + { + "epoch": 0.03, + "learning_rate": 4.987471021344117e-05, + "loss": 0.916, + "step": 31978 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874702359310533e-05, + "loss": 1.3655, + "step": 31979 + }, + { + "epoch": 0.03, + "learning_rate": 4.987469450493434e-05, + "loss": 0.8321, + "step": 31980 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874686650312595e-05, + "loss": 1.0193, + "step": 31981 + }, + { + "epoch": 0.03, + "learning_rate": 4.98746787954453e-05, + "loss": 0.7254, + "step": 31982 + }, + { + "epoch": 0.03, + "learning_rate": 4.987467094033245e-05, + "loss": 0.6001, + "step": 31983 + }, + { + "epoch": 0.03, + "learning_rate": 4.987466308497405e-05, + "loss": 0.6294, + "step": 31984 + }, + { + "epoch": 0.03, + "learning_rate": 4.98746552293701e-05, + "loss": 0.6862, + "step": 31985 + }, + { + "epoch": 0.03, + "learning_rate": 4.987464737352059e-05, + "loss": 0.6575, + "step": 31986 + }, + { + "epoch": 0.03, + "learning_rate": 4.987463951742554e-05, + "loss": 0.7447, + "step": 31987 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874631661084926e-05, + "loss": 0.8369, + "step": 31988 + }, + { + "epoch": 0.03, + "learning_rate": 4.987462380449876e-05, + "loss": 0.7617, + "step": 31989 + }, + { + "epoch": 0.03, + "learning_rate": 4.987461594766706e-05, + "loss": 0.9034, + "step": 31990 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874608090589795e-05, + "loss": 1.0087, + "step": 31991 + }, + { + "epoch": 0.03, + "learning_rate": 4.987460023326698e-05, + "loss": 0.9426, + "step": 31992 + }, + { + "epoch": 0.03, + "learning_rate": 4.987459237569861e-05, + "loss": 1.3042, + "step": 31993 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874584517884694e-05, + "loss": 1.0579, + "step": 31994 + }, + { + "epoch": 0.03, + "learning_rate": 4.987457665982522e-05, + "loss": 0.7256, + "step": 31995 + }, + { + "epoch": 0.03, + "learning_rate": 4.987456880152019e-05, + "loss": 0.5277, + "step": 31996 + }, + { + "epoch": 0.03, + "learning_rate": 4.987456094296963e-05, + "loss": 0.9389, + "step": 31997 + }, + { + "epoch": 0.03, + "learning_rate": 4.98745530841735e-05, + "loss": 0.959, + "step": 31998 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874545225131825e-05, + "loss": 1.1667, + "step": 31999 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874537365844595e-05, + "loss": 1.1114, + "step": 32000 + }, + { + "epoch": 0.03, + "eval_loss": 1.0753909349441528, + "eval_runtime": 126.3526, + "eval_samples_per_second": 10.961, + "eval_steps_per_second": 5.485, + "step": 32000 + }, + { + "epoch": 0.03, + "learning_rate": 4.987452950631182e-05, + "loss": 1.2136, + "step": 32001 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874521646533495e-05, + "loss": 1.2355, + "step": 32002 + }, + { + "epoch": 0.03, + "learning_rate": 4.987451378650961e-05, + "loss": 1.2057, + "step": 32003 + }, + { + "epoch": 0.03, + "learning_rate": 4.987450592624019e-05, + "loss": 1.0877, + "step": 32004 + }, + { + "epoch": 0.03, + "learning_rate": 4.98744980657252e-05, + "loss": 0.8758, + "step": 32005 + }, + { + "epoch": 0.03, + "learning_rate": 4.987449020496468e-05, + "loss": 0.877, + "step": 32006 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874482343958595e-05, + "loss": 1.0007, + "step": 32007 + }, + { + "epoch": 0.03, + "learning_rate": 4.987447448270696e-05, + "loss": 0.8432, + "step": 32008 + }, + { + "epoch": 0.03, + "learning_rate": 4.987446662120978e-05, + "loss": 0.9247, + "step": 32009 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874458759467044e-05, + "loss": 1.1426, + "step": 32010 + }, + { + "epoch": 0.03, + "learning_rate": 4.987445089747876e-05, + "loss": 1.1699, + "step": 32011 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874443035244935e-05, + "loss": 0.8658, + "step": 32012 + }, + { + "epoch": 0.03, + "learning_rate": 4.987443517276556e-05, + "loss": 1.0073, + "step": 32013 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874427310040613e-05, + "loss": 1.0153, + "step": 32014 + }, + { + "epoch": 0.03, + "learning_rate": 4.987441944707014e-05, + "loss": 0.8991, + "step": 32015 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874411583854106e-05, + "loss": 0.9898, + "step": 32016 + }, + { + "epoch": 0.03, + "learning_rate": 4.987440372039253e-05, + "loss": 0.8066, + "step": 32017 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874395856685394e-05, + "loss": 1.2558, + "step": 32018 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874387992732707e-05, + "loss": 1.0333, + "step": 32019 + }, + { + "epoch": 0.03, + "learning_rate": 4.987438012853448e-05, + "loss": 0.8304, + "step": 32020 + }, + { + "epoch": 0.03, + "learning_rate": 4.98743722640907e-05, + "loss": 1.0565, + "step": 32021 + }, + { + "epoch": 0.03, + "learning_rate": 4.987436439940138e-05, + "loss": 0.794, + "step": 32022 + }, + { + "epoch": 0.03, + "learning_rate": 4.987435653446649e-05, + "loss": 0.9759, + "step": 32023 + }, + { + "epoch": 0.03, + "learning_rate": 4.987434866928606e-05, + "loss": 0.7237, + "step": 32024 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874340803860085e-05, + "loss": 1.0472, + "step": 32025 + }, + { + "epoch": 0.03, + "learning_rate": 4.987433293818856e-05, + "loss": 0.9337, + "step": 32026 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874325072271487e-05, + "loss": 0.9919, + "step": 32027 + }, + { + "epoch": 0.03, + "learning_rate": 4.987431720610887e-05, + "loss": 0.7039, + "step": 32028 + }, + { + "epoch": 0.03, + "learning_rate": 4.987430933970069e-05, + "loss": 1.026, + "step": 32029 + }, + { + "epoch": 0.03, + "learning_rate": 4.987430147304697e-05, + "loss": 0.8702, + "step": 32030 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874293606147706e-05, + "loss": 1.022, + "step": 32031 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874285739002884e-05, + "loss": 1.2144, + "step": 32032 + }, + { + "epoch": 0.03, + "learning_rate": 4.987427787161252e-05, + "loss": 0.8762, + "step": 32033 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874270003976605e-05, + "loss": 1.0264, + "step": 32034 + }, + { + "epoch": 0.03, + "learning_rate": 4.987426213609514e-05, + "loss": 1.1591, + "step": 32035 + }, + { + "epoch": 0.03, + "learning_rate": 4.987425426796813e-05, + "loss": 0.6836, + "step": 32036 + }, + { + "epoch": 0.03, + "learning_rate": 4.987424639959557e-05, + "loss": 0.7129, + "step": 32037 + }, + { + "epoch": 0.03, + "learning_rate": 4.987423853097746e-05, + "loss": 0.9645, + "step": 32038 + }, + { + "epoch": 0.03, + "learning_rate": 4.987423066211381e-05, + "loss": 1.0317, + "step": 32039 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874222793004596e-05, + "loss": 1.0358, + "step": 32040 + }, + { + "epoch": 0.03, + "learning_rate": 4.987421492364985e-05, + "loss": 0.5857, + "step": 32041 + }, + { + "epoch": 0.03, + "learning_rate": 4.987420705404955e-05, + "loss": 0.7896, + "step": 32042 + }, + { + "epoch": 0.03, + "learning_rate": 4.987419918420371e-05, + "loss": 0.6572, + "step": 32043 + }, + { + "epoch": 0.03, + "learning_rate": 4.987419131411231e-05, + "loss": 1.0858, + "step": 32044 + }, + { + "epoch": 0.03, + "learning_rate": 4.987418344377537e-05, + "loss": 0.9656, + "step": 32045 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874175573192877e-05, + "loss": 1.0998, + "step": 32046 + }, + { + "epoch": 0.03, + "learning_rate": 4.987416770236484e-05, + "loss": 0.7939, + "step": 32047 + }, + { + "epoch": 0.03, + "learning_rate": 4.987415983129126e-05, + "loss": 0.5796, + "step": 32048 + }, + { + "epoch": 0.03, + "learning_rate": 4.987415195997213e-05, + "loss": 0.9221, + "step": 32049 + }, + { + "epoch": 0.03, + "learning_rate": 4.987414408840745e-05, + "loss": 1.0348, + "step": 32050 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874136216597223e-05, + "loss": 1.0743, + "step": 32051 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874128344541447e-05, + "loss": 1.2114, + "step": 32052 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874120472240125e-05, + "loss": 1.6453, + "step": 32053 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874112599693266e-05, + "loss": 1.1443, + "step": 32054 + }, + { + "epoch": 0.03, + "learning_rate": 4.987410472690085e-05, + "loss": 0.6813, + "step": 32055 + }, + { + "epoch": 0.03, + "learning_rate": 4.987409685386289e-05, + "loss": 0.8601, + "step": 32056 + }, + { + "epoch": 0.03, + "learning_rate": 4.987408898057938e-05, + "loss": 0.8051, + "step": 32057 + }, + { + "epoch": 0.03, + "learning_rate": 4.987408110705033e-05, + "loss": 0.8645, + "step": 32058 + }, + { + "epoch": 0.03, + "learning_rate": 4.987407323327573e-05, + "loss": 0.7239, + "step": 32059 + }, + { + "epoch": 0.03, + "learning_rate": 4.987406535925558e-05, + "loss": 0.6302, + "step": 32060 + }, + { + "epoch": 0.03, + "learning_rate": 4.987405748498989e-05, + "loss": 0.7608, + "step": 32061 + }, + { + "epoch": 0.03, + "learning_rate": 4.987404961047866e-05, + "loss": 0.5376, + "step": 32062 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874041735721864e-05, + "loss": 0.9317, + "step": 32063 + }, + { + "epoch": 0.03, + "learning_rate": 4.987403386071954e-05, + "loss": 0.9353, + "step": 32064 + }, + { + "epoch": 0.03, + "learning_rate": 4.987402598547166e-05, + "loss": 0.75, + "step": 32065 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874018109978236e-05, + "loss": 1.1904, + "step": 32066 + }, + { + "epoch": 0.03, + "learning_rate": 4.987401023423927e-05, + "loss": 1.3508, + "step": 32067 + }, + { + "epoch": 0.03, + "learning_rate": 4.9874002358254756e-05, + "loss": 0.7214, + "step": 32068 + }, + { + "epoch": 0.03, + "learning_rate": 4.98739944820247e-05, + "loss": 0.9986, + "step": 32069 + }, + { + "epoch": 0.03, + "learning_rate": 4.98739866055491e-05, + "loss": 0.9832, + "step": 32070 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873978728827945e-05, + "loss": 1.0319, + "step": 32071 + }, + { + "epoch": 0.03, + "learning_rate": 4.987397085186125e-05, + "loss": 0.767, + "step": 32072 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873962974649005e-05, + "loss": 1.0746, + "step": 32073 + }, + { + "epoch": 0.03, + "learning_rate": 4.987395509719122e-05, + "loss": 1.2047, + "step": 32074 + }, + { + "epoch": 0.03, + "learning_rate": 4.987394721948788e-05, + "loss": 0.8954, + "step": 32075 + }, + { + "epoch": 0.03, + "learning_rate": 4.987393934153901e-05, + "loss": 0.761, + "step": 32076 + }, + { + "epoch": 0.03, + "learning_rate": 4.987393146334459e-05, + "loss": 0.9105, + "step": 32077 + }, + { + "epoch": 0.03, + "learning_rate": 4.987392358490462e-05, + "loss": 1.0795, + "step": 32078 + }, + { + "epoch": 0.03, + "learning_rate": 4.987391570621911e-05, + "loss": 1.642, + "step": 32079 + }, + { + "epoch": 0.03, + "learning_rate": 4.987390782728805e-05, + "loss": 1.1692, + "step": 32080 + }, + { + "epoch": 0.03, + "learning_rate": 4.987389994811145e-05, + "loss": 0.9686, + "step": 32081 + }, + { + "epoch": 0.03, + "learning_rate": 4.98738920686893e-05, + "loss": 0.9879, + "step": 32082 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873884189021616e-05, + "loss": 1.2706, + "step": 32083 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873876309108375e-05, + "loss": 0.627, + "step": 32084 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873868428949597e-05, + "loss": 1.0496, + "step": 32085 + }, + { + "epoch": 0.03, + "learning_rate": 4.987386054854527e-05, + "loss": 1.2311, + "step": 32086 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873852667895405e-05, + "loss": 0.7827, + "step": 32087 + }, + { + "epoch": 0.03, + "learning_rate": 4.987384478699999e-05, + "loss": 0.7382, + "step": 32088 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873836905859035e-05, + "loss": 0.913, + "step": 32089 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873829024472526e-05, + "loss": 2.3012, + "step": 32090 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873821142840486e-05, + "loss": 2.4916, + "step": 32091 + }, + { + "epoch": 0.03, + "learning_rate": 4.98738132609629e-05, + "loss": 2.109, + "step": 32092 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873805378839765e-05, + "loss": 1.0196, + "step": 32093 + }, + { + "epoch": 0.03, + "learning_rate": 4.987379749647109e-05, + "loss": 1.1358, + "step": 32094 + }, + { + "epoch": 0.03, + "learning_rate": 4.987378961385687e-05, + "loss": 1.1044, + "step": 32095 + }, + { + "epoch": 0.03, + "learning_rate": 4.98737817309971e-05, + "loss": 0.842, + "step": 32096 + }, + { + "epoch": 0.03, + "learning_rate": 4.98737738478918e-05, + "loss": 0.835, + "step": 32097 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873765964540955e-05, + "loss": 0.9828, + "step": 32098 + }, + { + "epoch": 0.03, + "learning_rate": 4.987375808094456e-05, + "loss": 1.2344, + "step": 32099 + }, + { + "epoch": 0.03, + "learning_rate": 4.987375019710262e-05, + "loss": 1.125, + "step": 32100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873742313015135e-05, + "loss": 1.1052, + "step": 32101 + }, + { + "epoch": 0.03, + "learning_rate": 4.987373442868212e-05, + "loss": 1.1208, + "step": 32102 + }, + { + "epoch": 0.03, + "learning_rate": 4.987372654410355e-05, + "loss": 0.9908, + "step": 32103 + }, + { + "epoch": 0.03, + "learning_rate": 4.987371865927945e-05, + "loss": 0.8829, + "step": 32104 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873710774209795e-05, + "loss": 1.0347, + "step": 32105 + }, + { + "epoch": 0.03, + "learning_rate": 4.98737028888946e-05, + "loss": 1.0009, + "step": 32106 + }, + { + "epoch": 0.03, + "learning_rate": 4.987369500333386e-05, + "loss": 1.1277, + "step": 32107 + }, + { + "epoch": 0.03, + "learning_rate": 4.987368711752758e-05, + "loss": 1.1338, + "step": 32108 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873679231475753e-05, + "loss": 1.3359, + "step": 32109 + }, + { + "epoch": 0.03, + "learning_rate": 4.987367134517839e-05, + "loss": 1.1322, + "step": 32110 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873663458635485e-05, + "loss": 1.1468, + "step": 32111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873655571847034e-05, + "loss": 1.0133, + "step": 32112 + }, + { + "epoch": 0.03, + "learning_rate": 4.987364768481304e-05, + "loss": 0.7846, + "step": 32113 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873639797533504e-05, + "loss": 0.8665, + "step": 32114 + }, + { + "epoch": 0.03, + "learning_rate": 4.987363191000843e-05, + "loss": 0.7232, + "step": 32115 + }, + { + "epoch": 0.03, + "learning_rate": 4.987362402223781e-05, + "loss": 0.726, + "step": 32116 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873616134221654e-05, + "loss": 0.7718, + "step": 32117 + }, + { + "epoch": 0.03, + "learning_rate": 4.987360824595995e-05, + "loss": 0.7851, + "step": 32118 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873600357452704e-05, + "loss": 0.6762, + "step": 32119 + }, + { + "epoch": 0.03, + "learning_rate": 4.987359246869992e-05, + "loss": 0.2888, + "step": 32120 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873584579701596e-05, + "loss": 0.2416, + "step": 32121 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873576690457725e-05, + "loss": 0.2012, + "step": 32122 + }, + { + "epoch": 0.03, + "learning_rate": 4.987356880096831e-05, + "loss": 0.4416, + "step": 32123 + }, + { + "epoch": 0.03, + "learning_rate": 4.987356091123336e-05, + "loss": 0.9283, + "step": 32124 + }, + { + "epoch": 0.03, + "learning_rate": 4.987355302125287e-05, + "loss": 0.9956, + "step": 32125 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873545131026835e-05, + "loss": 1.0622, + "step": 32126 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873537240555254e-05, + "loss": 0.8604, + "step": 32127 + }, + { + "epoch": 0.03, + "learning_rate": 4.987352934983814e-05, + "loss": 1.3243, + "step": 32128 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873521458875485e-05, + "loss": 1.7325, + "step": 32129 + }, + { + "epoch": 0.03, + "learning_rate": 4.987351356766728e-05, + "loss": 1.6223, + "step": 32130 + }, + { + "epoch": 0.03, + "learning_rate": 4.987350567621355e-05, + "loss": 1.6283, + "step": 32131 + }, + { + "epoch": 0.03, + "learning_rate": 4.987349778451427e-05, + "loss": 1.0826, + "step": 32132 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873489892569445e-05, + "loss": 1.12, + "step": 32133 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873482000379085e-05, + "loss": 0.687, + "step": 32134 + }, + { + "epoch": 0.03, + "learning_rate": 4.987347410794319e-05, + "loss": 0.9835, + "step": 32135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873466215261744e-05, + "loss": 1.0354, + "step": 32136 + }, + { + "epoch": 0.03, + "learning_rate": 4.987345832233476e-05, + "loss": 1.08, + "step": 32137 + }, + { + "epoch": 0.03, + "learning_rate": 4.987345042916224e-05, + "loss": 0.9266, + "step": 32138 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873442535744176e-05, + "loss": 0.9142, + "step": 32139 + }, + { + "epoch": 0.03, + "learning_rate": 4.987343464208057e-05, + "loss": 0.8697, + "step": 32140 + }, + { + "epoch": 0.03, + "learning_rate": 4.987342674817142e-05, + "loss": 1.0542, + "step": 32141 + }, + { + "epoch": 0.03, + "learning_rate": 4.987341885401674e-05, + "loss": 1.0708, + "step": 32142 + }, + { + "epoch": 0.03, + "learning_rate": 4.987341095961652e-05, + "loss": 0.7928, + "step": 32143 + }, + { + "epoch": 0.03, + "learning_rate": 4.987340306497076e-05, + "loss": 0.9439, + "step": 32144 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873395170079454e-05, + "loss": 0.7858, + "step": 32145 + }, + { + "epoch": 0.03, + "learning_rate": 4.987338727494261e-05, + "loss": 0.9896, + "step": 32146 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873379379560226e-05, + "loss": 0.8663, + "step": 32147 + }, + { + "epoch": 0.03, + "learning_rate": 4.987337148393231e-05, + "loss": 0.6797, + "step": 32148 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873363588058846e-05, + "loss": 1.0168, + "step": 32149 + }, + { + "epoch": 0.03, + "learning_rate": 4.987335569193985e-05, + "loss": 1.0112, + "step": 32150 + }, + { + "epoch": 0.03, + "learning_rate": 4.987334779557531e-05, + "loss": 0.621, + "step": 32151 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873339898965224e-05, + "loss": 1.0882, + "step": 32152 + }, + { + "epoch": 0.03, + "learning_rate": 4.987333200210961e-05, + "loss": 0.469, + "step": 32153 + }, + { + "epoch": 0.03, + "learning_rate": 4.987332410500845e-05, + "loss": 0.9131, + "step": 32154 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873316207661755e-05, + "loss": 0.9622, + "step": 32155 + }, + { + "epoch": 0.03, + "learning_rate": 4.987330831006952e-05, + "loss": 1.1395, + "step": 32156 + }, + { + "epoch": 0.03, + "learning_rate": 4.987330041223175e-05, + "loss": 0.7577, + "step": 32157 + }, + { + "epoch": 0.03, + "learning_rate": 4.987329251414844e-05, + "loss": 0.9235, + "step": 32158 + }, + { + "epoch": 0.03, + "learning_rate": 4.987328461581958e-05, + "loss": 1.1154, + "step": 32159 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873276717245196e-05, + "loss": 0.9374, + "step": 32160 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873268818425265e-05, + "loss": 0.9297, + "step": 32161 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873260919359796e-05, + "loss": 1.1365, + "step": 32162 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873253020048796e-05, + "loss": 1.0673, + "step": 32163 + }, + { + "epoch": 0.03, + "learning_rate": 4.987324512049225e-05, + "loss": 1.0278, + "step": 32164 + }, + { + "epoch": 0.03, + "learning_rate": 4.987323722069017e-05, + "loss": 0.8769, + "step": 32165 + }, + { + "epoch": 0.03, + "learning_rate": 4.987322932064255e-05, + "loss": 0.9991, + "step": 32166 + }, + { + "epoch": 0.03, + "learning_rate": 4.987322142034939e-05, + "loss": 1.1742, + "step": 32167 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873213519810694e-05, + "loss": 1.2638, + "step": 32168 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873205619026466e-05, + "loss": 1.0637, + "step": 32169 + }, + { + "epoch": 0.03, + "learning_rate": 4.987319771799669e-05, + "loss": 0.957, + "step": 32170 + }, + { + "epoch": 0.03, + "learning_rate": 4.987318981672138e-05, + "loss": 1.0365, + "step": 32171 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873181915200536e-05, + "loss": 0.8617, + "step": 32172 + }, + { + "epoch": 0.03, + "learning_rate": 4.987317401343415e-05, + "loss": 0.6602, + "step": 32173 + }, + { + "epoch": 0.03, + "learning_rate": 4.987316611142223e-05, + "loss": 1.0235, + "step": 32174 + }, + { + "epoch": 0.03, + "learning_rate": 4.987315820916477e-05, + "loss": 0.985, + "step": 32175 + }, + { + "epoch": 0.03, + "learning_rate": 4.987315030666178e-05, + "loss": 1.0612, + "step": 32176 + }, + { + "epoch": 0.03, + "learning_rate": 4.987314240391324e-05, + "loss": 0.9585, + "step": 32177 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873134500919166e-05, + "loss": 0.9039, + "step": 32178 + }, + { + "epoch": 0.03, + "learning_rate": 4.987312659767956e-05, + "loss": 0.9694, + "step": 32179 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873118694194415e-05, + "loss": 0.9437, + "step": 32180 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873110790463726e-05, + "loss": 1.0862, + "step": 32181 + }, + { + "epoch": 0.03, + "learning_rate": 4.987310288648751e-05, + "loss": 1.0017, + "step": 32182 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873094982265754e-05, + "loss": 0.9497, + "step": 32183 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873087077798465e-05, + "loss": 1.0259, + "step": 32184 + }, + { + "epoch": 0.03, + "learning_rate": 4.987307917308564e-05, + "loss": 1.1924, + "step": 32185 + }, + { + "epoch": 0.03, + "learning_rate": 4.987307126812727e-05, + "loss": 1.2114, + "step": 32186 + }, + { + "epoch": 0.03, + "learning_rate": 4.987306336292337e-05, + "loss": 1.3263, + "step": 32187 + }, + { + "epoch": 0.03, + "learning_rate": 4.987305545747393e-05, + "loss": 0.8753, + "step": 32188 + }, + { + "epoch": 0.03, + "learning_rate": 4.987304755177895e-05, + "loss": 0.8218, + "step": 32189 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873039645838446e-05, + "loss": 0.7118, + "step": 32190 + }, + { + "epoch": 0.03, + "learning_rate": 4.98730317396524e-05, + "loss": 0.5357, + "step": 32191 + }, + { + "epoch": 0.03, + "learning_rate": 4.987302383322081e-05, + "loss": 0.8419, + "step": 32192 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873015926543696e-05, + "loss": 1.1089, + "step": 32193 + }, + { + "epoch": 0.03, + "learning_rate": 4.9873008019621034e-05, + "loss": 1.0913, + "step": 32194 + }, + { + "epoch": 0.03, + "learning_rate": 4.987300011245285e-05, + "loss": 1.0098, + "step": 32195 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872992205039124e-05, + "loss": 0.9657, + "step": 32196 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872984297379855e-05, + "loss": 1.0139, + "step": 32197 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872976389475056e-05, + "loss": 1.048, + "step": 32198 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872968481324725e-05, + "loss": 1.0603, + "step": 32199 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872960572928856e-05, + "loss": 0.8554, + "step": 32200 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872952664287456e-05, + "loss": 1.222, + "step": 32201 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872944755400505e-05, + "loss": 1.0251, + "step": 32202 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872936846268036e-05, + "loss": 1.1161, + "step": 32203 + }, + { + "epoch": 0.03, + "learning_rate": 4.987292893689003e-05, + "loss": 1.1518, + "step": 32204 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872921027266484e-05, + "loss": 0.7286, + "step": 32205 + }, + { + "epoch": 0.03, + "learning_rate": 4.98729131173974e-05, + "loss": 0.3899, + "step": 32206 + }, + { + "epoch": 0.03, + "learning_rate": 4.987290520728278e-05, + "loss": 1.0026, + "step": 32207 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872897296922636e-05, + "loss": 0.9712, + "step": 32208 + }, + { + "epoch": 0.03, + "learning_rate": 4.987288938631695e-05, + "loss": 0.7481, + "step": 32209 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872881475465725e-05, + "loss": 0.8154, + "step": 32210 + }, + { + "epoch": 0.03, + "learning_rate": 4.987287356436898e-05, + "loss": 1.4481, + "step": 32211 + }, + { + "epoch": 0.03, + "learning_rate": 4.987286565302669e-05, + "loss": 1.0816, + "step": 32212 + }, + { + "epoch": 0.03, + "learning_rate": 4.987285774143886e-05, + "loss": 0.987, + "step": 32213 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872849829605504e-05, + "loss": 0.9902, + "step": 32214 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872841917526614e-05, + "loss": 1.0435, + "step": 32215 + }, + { + "epoch": 0.03, + "learning_rate": 4.987283400520219e-05, + "loss": 1.0892, + "step": 32216 + }, + { + "epoch": 0.03, + "learning_rate": 4.987282609263223e-05, + "loss": 1.6691, + "step": 32217 + }, + { + "epoch": 0.03, + "learning_rate": 4.987281817981674e-05, + "loss": 0.781, + "step": 32218 + }, + { + "epoch": 0.03, + "learning_rate": 4.987281026675571e-05, + "loss": 0.739, + "step": 32219 + }, + { + "epoch": 0.03, + "learning_rate": 4.987280235344914e-05, + "loss": 0.9163, + "step": 32220 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872794439897045e-05, + "loss": 0.6466, + "step": 32221 + }, + { + "epoch": 0.03, + "learning_rate": 4.987278652609942e-05, + "loss": 0.4372, + "step": 32222 + }, + { + "epoch": 0.03, + "learning_rate": 4.987277861205626e-05, + "loss": 0.922, + "step": 32223 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872770697767554e-05, + "loss": 0.9833, + "step": 32224 + }, + { + "epoch": 0.03, + "learning_rate": 4.987276278323333e-05, + "loss": 0.8583, + "step": 32225 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872754868453566e-05, + "loss": 0.8442, + "step": 32226 + }, + { + "epoch": 0.03, + "learning_rate": 4.987274695342827e-05, + "loss": 0.9774, + "step": 32227 + }, + { + "epoch": 0.03, + "learning_rate": 4.987273903815743e-05, + "loss": 0.6232, + "step": 32228 + }, + { + "epoch": 0.03, + "learning_rate": 4.987273112264107e-05, + "loss": 1.2125, + "step": 32229 + }, + { + "epoch": 0.03, + "learning_rate": 4.987272320687918e-05, + "loss": 1.9775, + "step": 32230 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872715290871746e-05, + "loss": 2.0903, + "step": 32231 + }, + { + "epoch": 0.03, + "learning_rate": 4.987270737461878e-05, + "loss": 1.348, + "step": 32232 + }, + { + "epoch": 0.03, + "learning_rate": 4.987269945812029e-05, + "loss": 0.6764, + "step": 32233 + }, + { + "epoch": 0.03, + "learning_rate": 4.987269154137626e-05, + "loss": 1.1157, + "step": 32234 + }, + { + "epoch": 0.03, + "learning_rate": 4.987268362438669e-05, + "loss": 1.1064, + "step": 32235 + }, + { + "epoch": 0.03, + "learning_rate": 4.987267570715161e-05, + "loss": 1.077, + "step": 32236 + }, + { + "epoch": 0.03, + "learning_rate": 4.987266778967098e-05, + "loss": 0.9358, + "step": 32237 + }, + { + "epoch": 0.03, + "learning_rate": 4.987265987194482e-05, + "loss": 0.8005, + "step": 32238 + }, + { + "epoch": 0.03, + "learning_rate": 4.987265195397313e-05, + "loss": 0.9895, + "step": 32239 + }, + { + "epoch": 0.03, + "learning_rate": 4.98726440357559e-05, + "loss": 0.9428, + "step": 32240 + }, + { + "epoch": 0.03, + "learning_rate": 4.987263611729315e-05, + "loss": 1.0787, + "step": 32241 + }, + { + "epoch": 0.03, + "learning_rate": 4.987262819858486e-05, + "loss": 1.0498, + "step": 32242 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872620279631037e-05, + "loss": 0.9515, + "step": 32243 + }, + { + "epoch": 0.03, + "learning_rate": 4.987261236043169e-05, + "loss": 0.9531, + "step": 32244 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872604440986804e-05, + "loss": 0.8553, + "step": 32245 + }, + { + "epoch": 0.03, + "learning_rate": 4.987259652129639e-05, + "loss": 0.3532, + "step": 32246 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872588601360434e-05, + "loss": 0.6645, + "step": 32247 + }, + { + "epoch": 0.03, + "learning_rate": 4.987258068117896e-05, + "loss": 0.673, + "step": 32248 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872572760751946e-05, + "loss": 1.029, + "step": 32249 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872564840079405e-05, + "loss": 0.7275, + "step": 32250 + }, + { + "epoch": 0.03, + "learning_rate": 4.987255691916133e-05, + "loss": 0.7414, + "step": 32251 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872548997997724e-05, + "loss": 0.4638, + "step": 32252 + }, + { + "epoch": 0.03, + "learning_rate": 4.987254107658858e-05, + "loss": 0.6007, + "step": 32253 + }, + { + "epoch": 0.03, + "learning_rate": 4.987253315493392e-05, + "loss": 0.5074, + "step": 32254 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872525233033715e-05, + "loss": 0.4355, + "step": 32255 + }, + { + "epoch": 0.03, + "learning_rate": 4.987251731088799e-05, + "loss": 0.7918, + "step": 32256 + }, + { + "epoch": 0.03, + "learning_rate": 4.987250938849672e-05, + "loss": 1.6196, + "step": 32257 + }, + { + "epoch": 0.03, + "learning_rate": 4.987250146585993e-05, + "loss": 0.8896, + "step": 32258 + }, + { + "epoch": 0.03, + "learning_rate": 4.987249354297761e-05, + "loss": 0.6936, + "step": 32259 + }, + { + "epoch": 0.03, + "learning_rate": 4.987248561984976e-05, + "loss": 0.8433, + "step": 32260 + }, + { + "epoch": 0.03, + "learning_rate": 4.987247769647637e-05, + "loss": 0.7155, + "step": 32261 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872469772857455e-05, + "loss": 1.1458, + "step": 32262 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872461848993016e-05, + "loss": 0.9279, + "step": 32263 + }, + { + "epoch": 0.03, + "learning_rate": 4.987245392488303e-05, + "loss": 1.0874, + "step": 32264 + }, + { + "epoch": 0.03, + "learning_rate": 4.987244600052753e-05, + "loss": 1.0139, + "step": 32265 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872438075926486e-05, + "loss": 1.1876, + "step": 32266 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872430151079916e-05, + "loss": 0.5851, + "step": 32267 + }, + { + "epoch": 0.03, + "learning_rate": 4.987242222598782e-05, + "loss": 0.8631, + "step": 32268 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872414300650196e-05, + "loss": 1.2758, + "step": 32269 + }, + { + "epoch": 0.03, + "learning_rate": 4.987240637506704e-05, + "loss": 1.162, + "step": 32270 + }, + { + "epoch": 0.03, + "learning_rate": 4.987239844923836e-05, + "loss": 1.1979, + "step": 32271 + }, + { + "epoch": 0.03, + "learning_rate": 4.987239052316414e-05, + "loss": 0.9473, + "step": 32272 + }, + { + "epoch": 0.03, + "learning_rate": 4.987238259684439e-05, + "loss": 1.1872, + "step": 32273 + }, + { + "epoch": 0.03, + "learning_rate": 4.987237467027911e-05, + "loss": 0.8769, + "step": 32274 + }, + { + "epoch": 0.03, + "learning_rate": 4.98723667434683e-05, + "loss": 0.9966, + "step": 32275 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872358816411966e-05, + "loss": 0.9936, + "step": 32276 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872350889110105e-05, + "loss": 1.1498, + "step": 32277 + }, + { + "epoch": 0.03, + "learning_rate": 4.987234296156271e-05, + "loss": 0.6918, + "step": 32278 + }, + { + "epoch": 0.03, + "learning_rate": 4.987233503376978e-05, + "loss": 1.1686, + "step": 32279 + }, + { + "epoch": 0.03, + "learning_rate": 4.987232710573133e-05, + "loss": 1.0333, + "step": 32280 + }, + { + "epoch": 0.03, + "learning_rate": 4.987231917744734e-05, + "loss": 0.989, + "step": 32281 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872311248917833e-05, + "loss": 1.0477, + "step": 32282 + }, + { + "epoch": 0.03, + "learning_rate": 4.98723033201428e-05, + "loss": 1.1021, + "step": 32283 + }, + { + "epoch": 0.03, + "learning_rate": 4.987229539112223e-05, + "loss": 0.7447, + "step": 32284 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872287461856124e-05, + "loss": 1.1506, + "step": 32285 + }, + { + "epoch": 0.03, + "learning_rate": 4.98722795323445e-05, + "loss": 1.0141, + "step": 32286 + }, + { + "epoch": 0.03, + "learning_rate": 4.987227160258735e-05, + "loss": 1.0178, + "step": 32287 + }, + { + "epoch": 0.03, + "learning_rate": 4.987226367258466e-05, + "loss": 0.7663, + "step": 32288 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872255742336447e-05, + "loss": 0.8601, + "step": 32289 + }, + { + "epoch": 0.03, + "learning_rate": 4.987224781184271e-05, + "loss": 1.008, + "step": 32290 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872239881103445e-05, + "loss": 1.0658, + "step": 32291 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872231950118644e-05, + "loss": 0.7518, + "step": 32292 + }, + { + "epoch": 0.03, + "learning_rate": 4.987222401888831e-05, + "loss": 1.0048, + "step": 32293 + }, + { + "epoch": 0.03, + "learning_rate": 4.987221608741246e-05, + "loss": 0.8833, + "step": 32294 + }, + { + "epoch": 0.03, + "learning_rate": 4.987220815569108e-05, + "loss": 1.1924, + "step": 32295 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872200223724176e-05, + "loss": 0.7981, + "step": 32296 + }, + { + "epoch": 0.03, + "learning_rate": 4.987219229151173e-05, + "loss": 1.1354, + "step": 32297 + }, + { + "epoch": 0.03, + "learning_rate": 4.987218435905377e-05, + "loss": 1.507, + "step": 32298 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872176426350274e-05, + "loss": 0.1541, + "step": 32299 + }, + { + "epoch": 0.03, + "learning_rate": 4.987216849340125e-05, + "loss": 0.1656, + "step": 32300 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872160560206704e-05, + "loss": 0.1489, + "step": 32301 + }, + { + "epoch": 0.03, + "learning_rate": 4.987215262676663e-05, + "loss": 0.098, + "step": 32302 + }, + { + "epoch": 0.03, + "learning_rate": 4.987214469308102e-05, + "loss": 0.0884, + "step": 32303 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872136759149896e-05, + "loss": 0.0681, + "step": 32304 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872128824973244e-05, + "loss": 0.8575, + "step": 32305 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872120890551055e-05, + "loss": 1.4759, + "step": 32306 + }, + { + "epoch": 0.03, + "learning_rate": 4.987211295588334e-05, + "loss": 1.2485, + "step": 32307 + }, + { + "epoch": 0.03, + "learning_rate": 4.98721050209701e-05, + "loss": 1.0156, + "step": 32308 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872097085811334e-05, + "loss": 1.0028, + "step": 32309 + }, + { + "epoch": 0.03, + "learning_rate": 4.987208915040704e-05, + "loss": 0.9394, + "step": 32310 + }, + { + "epoch": 0.03, + "learning_rate": 4.987208121475722e-05, + "loss": 0.8904, + "step": 32311 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872073278861873e-05, + "loss": 0.8638, + "step": 32312 + }, + { + "epoch": 0.03, + "learning_rate": 4.9872065342721e-05, + "loss": 0.9758, + "step": 32313 + }, + { + "epoch": 0.03, + "learning_rate": 4.98720574063346e-05, + "loss": 0.8389, + "step": 32314 + }, + { + "epoch": 0.03, + "learning_rate": 4.987204946970267e-05, + "loss": 1.0429, + "step": 32315 + }, + { + "epoch": 0.03, + "learning_rate": 4.987204153282522e-05, + "loss": 0.9808, + "step": 32316 + }, + { + "epoch": 0.03, + "learning_rate": 4.987203359570224e-05, + "loss": 0.989, + "step": 32317 + }, + { + "epoch": 0.03, + "learning_rate": 4.987202565833374e-05, + "loss": 0.7948, + "step": 32318 + }, + { + "epoch": 0.03, + "learning_rate": 4.987201772071971e-05, + "loss": 1.0004, + "step": 32319 + }, + { + "epoch": 0.03, + "learning_rate": 4.987200978286015e-05, + "loss": 1.1524, + "step": 32320 + }, + { + "epoch": 0.03, + "learning_rate": 4.987200184475507e-05, + "loss": 1.2241, + "step": 32321 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871993906404455e-05, + "loss": 1.2511, + "step": 32322 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871985967808324e-05, + "loss": 1.7734, + "step": 32323 + }, + { + "epoch": 0.03, + "learning_rate": 4.987197802896666e-05, + "loss": 1.8421, + "step": 32324 + }, + { + "epoch": 0.03, + "learning_rate": 4.987197008987947e-05, + "loss": 1.3899, + "step": 32325 + }, + { + "epoch": 0.03, + "learning_rate": 4.987196215054676e-05, + "loss": 1.0676, + "step": 32326 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871954210968525e-05, + "loss": 1.0439, + "step": 32327 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871946271144766e-05, + "loss": 1.4981, + "step": 32328 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871938331075475e-05, + "loss": 1.6174, + "step": 32329 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871930390760654e-05, + "loss": 1.143, + "step": 32330 + }, + { + "epoch": 0.03, + "learning_rate": 4.987192245020032e-05, + "loss": 1.0296, + "step": 32331 + }, + { + "epoch": 0.03, + "learning_rate": 4.987191450939446e-05, + "loss": 0.924, + "step": 32332 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871906568343064e-05, + "loss": 1.2836, + "step": 32333 + }, + { + "epoch": 0.03, + "learning_rate": 4.987189862704615e-05, + "loss": 1.3367, + "step": 32334 + }, + { + "epoch": 0.03, + "learning_rate": 4.987189068550371e-05, + "loss": 1.2626, + "step": 32335 + }, + { + "epoch": 0.03, + "learning_rate": 4.987188274371575e-05, + "loss": 1.1438, + "step": 32336 + }, + { + "epoch": 0.03, + "learning_rate": 4.987187480168226e-05, + "loss": 0.5562, + "step": 32337 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871866859403246e-05, + "loss": 0.5784, + "step": 32338 + }, + { + "epoch": 0.03, + "learning_rate": 4.987185891687871e-05, + "loss": 0.8307, + "step": 32339 + }, + { + "epoch": 0.03, + "learning_rate": 4.987185097410865e-05, + "loss": 1.0153, + "step": 32340 + }, + { + "epoch": 0.03, + "learning_rate": 4.987184303109306e-05, + "loss": 1.2075, + "step": 32341 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871835087831945e-05, + "loss": 1.0312, + "step": 32342 + }, + { + "epoch": 0.03, + "learning_rate": 4.987182714432531e-05, + "loss": 0.8384, + "step": 32343 + }, + { + "epoch": 0.03, + "learning_rate": 4.987181920057316e-05, + "loss": 0.6897, + "step": 32344 + }, + { + "epoch": 0.03, + "learning_rate": 4.987181125657547e-05, + "loss": 0.5851, + "step": 32345 + }, + { + "epoch": 0.03, + "learning_rate": 4.987180331233226e-05, + "loss": 0.9492, + "step": 32346 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871795367843524e-05, + "loss": 1.1692, + "step": 32347 + }, + { + "epoch": 0.03, + "learning_rate": 4.987178742310927e-05, + "loss": 0.8737, + "step": 32348 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871779478129494e-05, + "loss": 1.014, + "step": 32349 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871771532904185e-05, + "loss": 1.108, + "step": 32350 + }, + { + "epoch": 0.03, + "learning_rate": 4.987176358743336e-05, + "loss": 1.0333, + "step": 32351 + }, + { + "epoch": 0.03, + "learning_rate": 4.987175564171701e-05, + "loss": 0.9596, + "step": 32352 + }, + { + "epoch": 0.03, + "learning_rate": 4.987174769575514e-05, + "loss": 0.9578, + "step": 32353 + }, + { + "epoch": 0.03, + "learning_rate": 4.987173974954774e-05, + "loss": 0.7098, + "step": 32354 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871731803094825e-05, + "loss": 1.264, + "step": 32355 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871723856396377e-05, + "loss": 1.0822, + "step": 32356 + }, + { + "epoch": 0.03, + "learning_rate": 4.987171590945241e-05, + "loss": 1.1312, + "step": 32357 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871707962262914e-05, + "loss": 1.2645, + "step": 32358 + }, + { + "epoch": 0.03, + "learning_rate": 4.987170001482791e-05, + "loss": 0.9485, + "step": 32359 + }, + { + "epoch": 0.03, + "learning_rate": 4.987169206714737e-05, + "loss": 0.7995, + "step": 32360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871684119221305e-05, + "loss": 1.227, + "step": 32361 + }, + { + "epoch": 0.03, + "learning_rate": 4.987167617104973e-05, + "loss": 0.8371, + "step": 32362 + }, + { + "epoch": 0.03, + "learning_rate": 4.987166822263262e-05, + "loss": 1.5139, + "step": 32363 + }, + { + "epoch": 0.03, + "learning_rate": 4.987166027397e-05, + "loss": 1.9961, + "step": 32364 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871652325061844e-05, + "loss": 2.1983, + "step": 32365 + }, + { + "epoch": 0.03, + "learning_rate": 4.987164437590817e-05, + "loss": 2.116, + "step": 32366 + }, + { + "epoch": 0.03, + "learning_rate": 4.987163642650898e-05, + "loss": 1.9973, + "step": 32367 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871628476864264e-05, + "loss": 2.0621, + "step": 32368 + }, + { + "epoch": 0.03, + "learning_rate": 4.987162052697402e-05, + "loss": 2.1445, + "step": 32369 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871612576838265e-05, + "loss": 1.8926, + "step": 32370 + }, + { + "epoch": 0.03, + "learning_rate": 4.987160462645698e-05, + "loss": 2.0815, + "step": 32371 + }, + { + "epoch": 0.03, + "learning_rate": 4.987159667583017e-05, + "loss": 1.9003, + "step": 32372 + }, + { + "epoch": 0.03, + "learning_rate": 4.987158872495784e-05, + "loss": 1.6653, + "step": 32373 + }, + { + "epoch": 0.03, + "learning_rate": 4.987158077384e-05, + "loss": 1.3378, + "step": 32374 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871572822476624e-05, + "loss": 0.8218, + "step": 32375 + }, + { + "epoch": 0.03, + "learning_rate": 4.987156487086773e-05, + "loss": 0.9538, + "step": 32376 + }, + { + "epoch": 0.03, + "learning_rate": 4.987155691901332e-05, + "loss": 1.0719, + "step": 32377 + }, + { + "epoch": 0.03, + "learning_rate": 4.987154896691338e-05, + "loss": 1.1583, + "step": 32378 + }, + { + "epoch": 0.03, + "learning_rate": 4.987154101456793e-05, + "loss": 1.1669, + "step": 32379 + }, + { + "epoch": 0.03, + "learning_rate": 4.987153306197695e-05, + "loss": 1.0038, + "step": 32380 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871525109140446e-05, + "loss": 0.8791, + "step": 32381 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871517156058426e-05, + "loss": 1.1637, + "step": 32382 + }, + { + "epoch": 0.03, + "learning_rate": 4.987150920273088e-05, + "loss": 1.1116, + "step": 32383 + }, + { + "epoch": 0.03, + "learning_rate": 4.987150124915781e-05, + "loss": 0.9417, + "step": 32384 + }, + { + "epoch": 0.03, + "learning_rate": 4.987149329533923e-05, + "loss": 1.144, + "step": 32385 + }, + { + "epoch": 0.03, + "learning_rate": 4.987148534127513e-05, + "loss": 1.3253, + "step": 32386 + }, + { + "epoch": 0.03, + "learning_rate": 4.987147738696551e-05, + "loss": 1.043, + "step": 32387 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871469432410354e-05, + "loss": 1.1765, + "step": 32388 + }, + { + "epoch": 0.03, + "learning_rate": 4.987146147760969e-05, + "loss": 1.188, + "step": 32389 + }, + { + "epoch": 0.03, + "learning_rate": 4.98714535225635e-05, + "loss": 1.0625, + "step": 32390 + }, + { + "epoch": 0.03, + "learning_rate": 4.987144556727179e-05, + "loss": 0.6026, + "step": 32391 + }, + { + "epoch": 0.03, + "learning_rate": 4.987143761173456e-05, + "loss": 0.6859, + "step": 32392 + }, + { + "epoch": 0.03, + "learning_rate": 4.987142965595182e-05, + "loss": 1.0853, + "step": 32393 + }, + { + "epoch": 0.03, + "learning_rate": 4.987142169992355e-05, + "loss": 1.6556, + "step": 32394 + }, + { + "epoch": 0.03, + "learning_rate": 4.987141374364975e-05, + "loss": 0.8779, + "step": 32395 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871405787130444e-05, + "loss": 0.9329, + "step": 32396 + }, + { + "epoch": 0.03, + "learning_rate": 4.987139783036562e-05, + "loss": 0.9895, + "step": 32397 + }, + { + "epoch": 0.03, + "learning_rate": 4.987138987335527e-05, + "loss": 0.6712, + "step": 32398 + }, + { + "epoch": 0.03, + "learning_rate": 4.987138191609939e-05, + "loss": 0.8732, + "step": 32399 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871373958598006e-05, + "loss": 1.1098, + "step": 32400 + }, + { + "epoch": 0.03, + "learning_rate": 4.98713660008511e-05, + "loss": 1.0436, + "step": 32401 + }, + { + "epoch": 0.03, + "learning_rate": 4.987135804285867e-05, + "loss": 1.1275, + "step": 32402 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871350084620714e-05, + "loss": 0.5934, + "step": 32403 + }, + { + "epoch": 0.03, + "learning_rate": 4.987134212613725e-05, + "loss": 0.917, + "step": 32404 + }, + { + "epoch": 0.03, + "learning_rate": 4.987133416740826e-05, + "loss": 0.7873, + "step": 32405 + }, + { + "epoch": 0.03, + "learning_rate": 4.987132620843376e-05, + "loss": 1.0199, + "step": 32406 + }, + { + "epoch": 0.03, + "learning_rate": 4.987131824921373e-05, + "loss": 0.9801, + "step": 32407 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871310289748184e-05, + "loss": 1.1615, + "step": 32408 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871302330037115e-05, + "loss": 0.5716, + "step": 32409 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871294370080534e-05, + "loss": 0.756, + "step": 32410 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871286409878437e-05, + "loss": 1.0281, + "step": 32411 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871278449430814e-05, + "loss": 0.9306, + "step": 32412 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871270488737674e-05, + "loss": 1.134, + "step": 32413 + }, + { + "epoch": 0.03, + "learning_rate": 4.987126252779901e-05, + "loss": 1.1205, + "step": 32414 + }, + { + "epoch": 0.03, + "learning_rate": 4.987125456661484e-05, + "loss": 1.1233, + "step": 32415 + }, + { + "epoch": 0.03, + "learning_rate": 4.987124660518514e-05, + "loss": 0.9104, + "step": 32416 + }, + { + "epoch": 0.03, + "learning_rate": 4.987123864350993e-05, + "loss": 0.9661, + "step": 32417 + }, + { + "epoch": 0.03, + "learning_rate": 4.98712306815892e-05, + "loss": 0.9432, + "step": 32418 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871222719422946e-05, + "loss": 0.9023, + "step": 32419 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871214757011176e-05, + "loss": 1.2718, + "step": 32420 + }, + { + "epoch": 0.03, + "learning_rate": 4.987120679435389e-05, + "loss": 0.9453, + "step": 32421 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871198831451084e-05, + "loss": 1.2669, + "step": 32422 + }, + { + "epoch": 0.03, + "learning_rate": 4.987119086830276e-05, + "loss": 1.1683, + "step": 32423 + }, + { + "epoch": 0.03, + "learning_rate": 4.987118290490892e-05, + "loss": 0.941, + "step": 32424 + }, + { + "epoch": 0.03, + "learning_rate": 4.987117494126956e-05, + "loss": 0.9729, + "step": 32425 + }, + { + "epoch": 0.03, + "learning_rate": 4.987116697738468e-05, + "loss": 0.7289, + "step": 32426 + }, + { + "epoch": 0.03, + "learning_rate": 4.987115901325429e-05, + "loss": 1.3169, + "step": 32427 + }, + { + "epoch": 0.03, + "learning_rate": 4.987115104887837e-05, + "loss": 0.993, + "step": 32428 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871143084256944e-05, + "loss": 0.9012, + "step": 32429 + }, + { + "epoch": 0.03, + "learning_rate": 4.987113511939e-05, + "loss": 1.0473, + "step": 32430 + }, + { + "epoch": 0.03, + "learning_rate": 4.987112715427753e-05, + "loss": 0.5538, + "step": 32431 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871119188919555e-05, + "loss": 1.0485, + "step": 32432 + }, + { + "epoch": 0.03, + "learning_rate": 4.987111122331605e-05, + "loss": 0.9065, + "step": 32433 + }, + { + "epoch": 0.03, + "learning_rate": 4.987110325746704e-05, + "loss": 0.9184, + "step": 32434 + }, + { + "epoch": 0.03, + "learning_rate": 4.987109529137251e-05, + "loss": 0.3177, + "step": 32435 + }, + { + "epoch": 0.03, + "learning_rate": 4.987108732503245e-05, + "loss": 0.2342, + "step": 32436 + }, + { + "epoch": 0.03, + "learning_rate": 4.987107935844689e-05, + "loss": 0.1621, + "step": 32437 + }, + { + "epoch": 0.03, + "learning_rate": 4.98710713916158e-05, + "loss": 0.1623, + "step": 32438 + }, + { + "epoch": 0.03, + "learning_rate": 4.98710634245392e-05, + "loss": 0.2278, + "step": 32439 + }, + { + "epoch": 0.03, + "learning_rate": 4.987105545721708e-05, + "loss": 0.343, + "step": 32440 + }, + { + "epoch": 0.03, + "learning_rate": 4.987104748964945e-05, + "loss": 0.6608, + "step": 32441 + }, + { + "epoch": 0.03, + "learning_rate": 4.98710395218363e-05, + "loss": 0.2037, + "step": 32442 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871031553777634e-05, + "loss": 0.288, + "step": 32443 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871023585473456e-05, + "loss": 0.2996, + "step": 32444 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871015616923754e-05, + "loss": 0.2001, + "step": 32445 + }, + { + "epoch": 0.03, + "learning_rate": 4.987100764812854e-05, + "loss": 0.1922, + "step": 32446 + }, + { + "epoch": 0.03, + "learning_rate": 4.98709996790878e-05, + "loss": 0.3918, + "step": 32447 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870991709801554e-05, + "loss": 1.2726, + "step": 32448 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870983740269795e-05, + "loss": 1.1299, + "step": 32449 + }, + { + "epoch": 0.03, + "learning_rate": 4.987097577049251e-05, + "loss": 1.1291, + "step": 32450 + }, + { + "epoch": 0.03, + "learning_rate": 4.987096780046972e-05, + "loss": 0.5883, + "step": 32451 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870959830201405e-05, + "loss": 0.87, + "step": 32452 + }, + { + "epoch": 0.03, + "learning_rate": 4.987095185968758e-05, + "loss": 1.0524, + "step": 32453 + }, + { + "epoch": 0.03, + "learning_rate": 4.987094388892824e-05, + "loss": 0.885, + "step": 32454 + }, + { + "epoch": 0.03, + "learning_rate": 4.987093591792338e-05, + "loss": 1.017, + "step": 32455 + }, + { + "epoch": 0.03, + "learning_rate": 4.987092794667301e-05, + "loss": 0.9161, + "step": 32456 + }, + { + "epoch": 0.03, + "learning_rate": 4.987091997517712e-05, + "loss": 1.0234, + "step": 32457 + }, + { + "epoch": 0.03, + "learning_rate": 4.987091200343572e-05, + "loss": 0.832, + "step": 32458 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870904031448797e-05, + "loss": 1.2359, + "step": 32459 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870896059216364e-05, + "loss": 1.3251, + "step": 32460 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870888086738414e-05, + "loss": 1.4955, + "step": 32461 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870880114014954e-05, + "loss": 1.0375, + "step": 32462 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870872141045975e-05, + "loss": 1.1083, + "step": 32463 + }, + { + "epoch": 0.03, + "learning_rate": 4.987086416783148e-05, + "loss": 1.0148, + "step": 32464 + }, + { + "epoch": 0.03, + "learning_rate": 4.987085619437147e-05, + "loss": 0.6818, + "step": 32465 + }, + { + "epoch": 0.03, + "learning_rate": 4.987084822066594e-05, + "loss": 0.9898, + "step": 32466 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870840246714914e-05, + "loss": 0.9855, + "step": 32467 + }, + { + "epoch": 0.03, + "learning_rate": 4.987083227251836e-05, + "loss": 1.038, + "step": 32468 + }, + { + "epoch": 0.03, + "learning_rate": 4.987082429807629e-05, + "loss": 1.0667, + "step": 32469 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870816323388716e-05, + "loss": 0.93, + "step": 32470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870808348455625e-05, + "loss": 1.0743, + "step": 32471 + }, + { + "epoch": 0.03, + "learning_rate": 4.987080037327701e-05, + "loss": 0.7369, + "step": 32472 + }, + { + "epoch": 0.03, + "learning_rate": 4.987079239785288e-05, + "loss": 0.6631, + "step": 32473 + }, + { + "epoch": 0.03, + "learning_rate": 4.987078442218325e-05, + "loss": 1.0776, + "step": 32474 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870776446268095e-05, + "loss": 1.2047, + "step": 32475 + }, + { + "epoch": 0.03, + "learning_rate": 4.987076847010743e-05, + "loss": 0.9493, + "step": 32476 + }, + { + "epoch": 0.03, + "learning_rate": 4.987076049370126e-05, + "loss": 1.1533, + "step": 32477 + }, + { + "epoch": 0.03, + "learning_rate": 4.987075251704957e-05, + "loss": 1.0636, + "step": 32478 + }, + { + "epoch": 0.03, + "learning_rate": 4.987074454015236e-05, + "loss": 0.8898, + "step": 32479 + }, + { + "epoch": 0.03, + "learning_rate": 4.987073656300964e-05, + "loss": 0.7711, + "step": 32480 + }, + { + "epoch": 0.03, + "learning_rate": 4.987072858562141e-05, + "loss": 1.0587, + "step": 32481 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870720607987664e-05, + "loss": 1.0777, + "step": 32482 + }, + { + "epoch": 0.03, + "learning_rate": 4.98707126301084e-05, + "loss": 0.9302, + "step": 32483 + }, + { + "epoch": 0.03, + "learning_rate": 4.987070465198363e-05, + "loss": 0.5962, + "step": 32484 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870696673613345e-05, + "loss": 0.435, + "step": 32485 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870688694997544e-05, + "loss": 0.7689, + "step": 32486 + }, + { + "epoch": 0.03, + "learning_rate": 4.987068071613623e-05, + "loss": 0.5232, + "step": 32487 + }, + { + "epoch": 0.03, + "learning_rate": 4.987067273702941e-05, + "loss": 0.4091, + "step": 32488 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870664757677075e-05, + "loss": 0.4924, + "step": 32489 + }, + { + "epoch": 0.03, + "learning_rate": 4.987065677807923e-05, + "loss": 0.4981, + "step": 32490 + }, + { + "epoch": 0.03, + "learning_rate": 4.987064879823586e-05, + "loss": 0.3439, + "step": 32491 + }, + { + "epoch": 0.03, + "learning_rate": 4.987064081814699e-05, + "loss": 0.3957, + "step": 32492 + }, + { + "epoch": 0.03, + "learning_rate": 4.98706328378126e-05, + "loss": 0.987, + "step": 32493 + }, + { + "epoch": 0.03, + "learning_rate": 4.98706248572327e-05, + "loss": 1.0982, + "step": 32494 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870616876407284e-05, + "loss": 0.975, + "step": 32495 + }, + { + "epoch": 0.03, + "learning_rate": 4.987060889533636e-05, + "loss": 0.7182, + "step": 32496 + }, + { + "epoch": 0.03, + "learning_rate": 4.987060091401993e-05, + "loss": 0.8848, + "step": 32497 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870592932457976e-05, + "loss": 0.8316, + "step": 32498 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870584950650514e-05, + "loss": 0.8624, + "step": 32499 + }, + { + "epoch": 0.03, + "learning_rate": 4.987057696859754e-05, + "loss": 1.244, + "step": 32500 + }, + { + "epoch": 0.03, + "eval_loss": 1.0792115926742554, + "eval_runtime": 126.9385, + "eval_samples_per_second": 10.911, + "eval_steps_per_second": 5.459, + "step": 32500 + }, + { + "epoch": 0.03, + "learning_rate": 4.987056898629906e-05, + "loss": 0.9262, + "step": 32501 + }, + { + "epoch": 0.03, + "learning_rate": 4.987056100375506e-05, + "loss": 1.0586, + "step": 32502 + }, + { + "epoch": 0.03, + "learning_rate": 4.987055302096556e-05, + "loss": 0.9858, + "step": 32503 + }, + { + "epoch": 0.03, + "learning_rate": 4.987054503793054e-05, + "loss": 0.9738, + "step": 32504 + }, + { + "epoch": 0.03, + "learning_rate": 4.987053705465e-05, + "loss": 1.1497, + "step": 32505 + }, + { + "epoch": 0.03, + "learning_rate": 4.987052907112396e-05, + "loss": 1.0458, + "step": 32506 + }, + { + "epoch": 0.03, + "learning_rate": 4.987052108735241e-05, + "loss": 0.6923, + "step": 32507 + }, + { + "epoch": 0.03, + "learning_rate": 4.987051310333534e-05, + "loss": 0.5825, + "step": 32508 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870505119072764e-05, + "loss": 1.2464, + "step": 32509 + }, + { + "epoch": 0.03, + "learning_rate": 4.987049713456468e-05, + "loss": 0.9263, + "step": 32510 + }, + { + "epoch": 0.03, + "learning_rate": 4.987048914981108e-05, + "loss": 0.8394, + "step": 32511 + }, + { + "epoch": 0.03, + "learning_rate": 4.987048116481197e-05, + "loss": 1.0763, + "step": 32512 + }, + { + "epoch": 0.03, + "learning_rate": 4.987047317956735e-05, + "loss": 1.3144, + "step": 32513 + }, + { + "epoch": 0.03, + "learning_rate": 4.987046519407722e-05, + "loss": 1.6761, + "step": 32514 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870457208341576e-05, + "loss": 0.7291, + "step": 32515 + }, + { + "epoch": 0.03, + "learning_rate": 4.987044922236042e-05, + "loss": 0.8246, + "step": 32516 + }, + { + "epoch": 0.03, + "learning_rate": 4.987044123613376e-05, + "loss": 0.8682, + "step": 32517 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870433249661585e-05, + "loss": 1.1686, + "step": 32518 + }, + { + "epoch": 0.03, + "learning_rate": 4.98704252629439e-05, + "loss": 1.0361, + "step": 32519 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870417275980704e-05, + "loss": 0.9135, + "step": 32520 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870409288772005e-05, + "loss": 1.3881, + "step": 32521 + }, + { + "epoch": 0.03, + "learning_rate": 4.987040130131778e-05, + "loss": 0.9723, + "step": 32522 + }, + { + "epoch": 0.03, + "learning_rate": 4.987039331361806e-05, + "loss": 0.9218, + "step": 32523 + }, + { + "epoch": 0.03, + "learning_rate": 4.987038532567282e-05, + "loss": 0.8795, + "step": 32524 + }, + { + "epoch": 0.03, + "learning_rate": 4.987037733748208e-05, + "loss": 1.0293, + "step": 32525 + }, + { + "epoch": 0.03, + "learning_rate": 4.987036934904582e-05, + "loss": 0.9669, + "step": 32526 + }, + { + "epoch": 0.03, + "learning_rate": 4.987036136036406e-05, + "loss": 0.9861, + "step": 32527 + }, + { + "epoch": 0.03, + "learning_rate": 4.987035337143678e-05, + "loss": 1.135, + "step": 32528 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870345382263996e-05, + "loss": 1.1283, + "step": 32529 + }, + { + "epoch": 0.03, + "learning_rate": 4.98703373928457e-05, + "loss": 1.0967, + "step": 32530 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870329403181905e-05, + "loss": 1.3808, + "step": 32531 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870321413272586e-05, + "loss": 1.414, + "step": 32532 + }, + { + "epoch": 0.03, + "learning_rate": 4.987031342311776e-05, + "loss": 1.3275, + "step": 32533 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870305432717436e-05, + "loss": 1.0497, + "step": 32534 + }, + { + "epoch": 0.03, + "learning_rate": 4.987029744207159e-05, + "loss": 0.688, + "step": 32535 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870289451180244e-05, + "loss": 1.3293, + "step": 32536 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870281460043384e-05, + "loss": 0.9543, + "step": 32537 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870273468661014e-05, + "loss": 1.0291, + "step": 32538 + }, + { + "epoch": 0.03, + "learning_rate": 4.987026547703314e-05, + "loss": 1.1207, + "step": 32539 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870257485159755e-05, + "loss": 0.9516, + "step": 32540 + }, + { + "epoch": 0.03, + "learning_rate": 4.987024949304086e-05, + "loss": 0.6811, + "step": 32541 + }, + { + "epoch": 0.03, + "learning_rate": 4.987024150067646e-05, + "loss": 0.7636, + "step": 32542 + }, + { + "epoch": 0.03, + "learning_rate": 4.987023350806654e-05, + "loss": 1.0012, + "step": 32543 + }, + { + "epoch": 0.03, + "learning_rate": 4.987022551521112e-05, + "loss": 1.3992, + "step": 32544 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870217522110196e-05, + "loss": 0.877, + "step": 32545 + }, + { + "epoch": 0.03, + "learning_rate": 4.987020952876376e-05, + "loss": 1.0162, + "step": 32546 + }, + { + "epoch": 0.03, + "learning_rate": 4.987020153517181e-05, + "loss": 0.8529, + "step": 32547 + }, + { + "epoch": 0.03, + "learning_rate": 4.987019354133436e-05, + "loss": 0.9245, + "step": 32548 + }, + { + "epoch": 0.03, + "learning_rate": 4.98701855472514e-05, + "loss": 1.2348, + "step": 32549 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870177552922934e-05, + "loss": 0.7148, + "step": 32550 + }, + { + "epoch": 0.03, + "learning_rate": 4.987016955834896e-05, + "loss": 1.3742, + "step": 32551 + }, + { + "epoch": 0.03, + "learning_rate": 4.987016156352947e-05, + "loss": 1.9925, + "step": 32552 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870153568464486e-05, + "loss": 1.1859, + "step": 32553 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870145573153983e-05, + "loss": 1.0893, + "step": 32554 + }, + { + "epoch": 0.03, + "learning_rate": 4.987013757759798e-05, + "loss": 0.8206, + "step": 32555 + }, + { + "epoch": 0.03, + "learning_rate": 4.987012958179645e-05, + "loss": 1.0804, + "step": 32556 + }, + { + "epoch": 0.03, + "learning_rate": 4.987012158574943e-05, + "loss": 1.2362, + "step": 32557 + }, + { + "epoch": 0.03, + "learning_rate": 4.98701135894569e-05, + "loss": 1.3295, + "step": 32558 + }, + { + "epoch": 0.03, + "learning_rate": 4.987010559291887e-05, + "loss": 0.9639, + "step": 32559 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870097596135324e-05, + "loss": 0.6525, + "step": 32560 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870089599106274e-05, + "loss": 0.3259, + "step": 32561 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870081601831705e-05, + "loss": 0.2271, + "step": 32562 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870073604311646e-05, + "loss": 0.2366, + "step": 32563 + }, + { + "epoch": 0.03, + "learning_rate": 4.987006560654608e-05, + "loss": 0.6203, + "step": 32564 + }, + { + "epoch": 0.03, + "learning_rate": 4.987005760853499e-05, + "loss": 0.982, + "step": 32565 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870049610278405e-05, + "loss": 0.9879, + "step": 32566 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870041611776316e-05, + "loss": 0.8842, + "step": 32567 + }, + { + "epoch": 0.03, + "learning_rate": 4.987003361302872e-05, + "loss": 0.9936, + "step": 32568 + }, + { + "epoch": 0.03, + "learning_rate": 4.987002561403561e-05, + "loss": 1.0967, + "step": 32569 + }, + { + "epoch": 0.03, + "learning_rate": 4.9870017614797e-05, + "loss": 1.0041, + "step": 32570 + }, + { + "epoch": 0.03, + "learning_rate": 4.987000961531288e-05, + "loss": 1.0296, + "step": 32571 + }, + { + "epoch": 0.03, + "learning_rate": 4.987000161558326e-05, + "loss": 1.1526, + "step": 32572 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869993615608126e-05, + "loss": 1.219, + "step": 32573 + }, + { + "epoch": 0.03, + "learning_rate": 4.986998561538749e-05, + "loss": 1.0665, + "step": 32574 + }, + { + "epoch": 0.03, + "learning_rate": 4.986997761492135e-05, + "loss": 0.7738, + "step": 32575 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869969614209696e-05, + "loss": 0.7791, + "step": 32576 + }, + { + "epoch": 0.03, + "learning_rate": 4.986996161325254e-05, + "loss": 0.913, + "step": 32577 + }, + { + "epoch": 0.03, + "learning_rate": 4.986995361204988e-05, + "loss": 1.0539, + "step": 32578 + }, + { + "epoch": 0.03, + "learning_rate": 4.986994561060171e-05, + "loss": 0.9641, + "step": 32579 + }, + { + "epoch": 0.03, + "learning_rate": 4.986993760890804e-05, + "loss": 0.9887, + "step": 32580 + }, + { + "epoch": 0.03, + "learning_rate": 4.986992960696886e-05, + "loss": 0.973, + "step": 32581 + }, + { + "epoch": 0.03, + "learning_rate": 4.986992160478418e-05, + "loss": 1.0166, + "step": 32582 + }, + { + "epoch": 0.03, + "learning_rate": 4.986991360235399e-05, + "loss": 0.5874, + "step": 32583 + }, + { + "epoch": 0.03, + "learning_rate": 4.98699055996783e-05, + "loss": 1.0478, + "step": 32584 + }, + { + "epoch": 0.03, + "learning_rate": 4.98698975967571e-05, + "loss": 0.9003, + "step": 32585 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869889593590395e-05, + "loss": 1.1639, + "step": 32586 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869881590178185e-05, + "loss": 1.256, + "step": 32587 + }, + { + "epoch": 0.03, + "learning_rate": 4.986987358652048e-05, + "loss": 1.3066, + "step": 32588 + }, + { + "epoch": 0.03, + "learning_rate": 4.986986558261725e-05, + "loss": 0.8196, + "step": 32589 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869857578468525e-05, + "loss": 1.2007, + "step": 32590 + }, + { + "epoch": 0.03, + "learning_rate": 4.98698495740743e-05, + "loss": 1.0245, + "step": 32591 + }, + { + "epoch": 0.03, + "learning_rate": 4.986984156943457e-05, + "loss": 0.8127, + "step": 32592 + }, + { + "epoch": 0.03, + "learning_rate": 4.986983356454933e-05, + "loss": 1.0742, + "step": 32593 + }, + { + "epoch": 0.03, + "learning_rate": 4.986982555941859e-05, + "loss": 0.8032, + "step": 32594 + }, + { + "epoch": 0.03, + "learning_rate": 4.986981755404234e-05, + "loss": 0.8665, + "step": 32595 + }, + { + "epoch": 0.03, + "learning_rate": 4.986980954842059e-05, + "loss": 0.9234, + "step": 32596 + }, + { + "epoch": 0.03, + "learning_rate": 4.986980154255333e-05, + "loss": 1.0193, + "step": 32597 + }, + { + "epoch": 0.03, + "learning_rate": 4.986979353644057e-05, + "loss": 1.1294, + "step": 32598 + }, + { + "epoch": 0.03, + "learning_rate": 4.986978553008231e-05, + "loss": 0.9738, + "step": 32599 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869777523478544e-05, + "loss": 1.1541, + "step": 32600 + }, + { + "epoch": 0.03, + "learning_rate": 4.986976951662927e-05, + "loss": 1.0368, + "step": 32601 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869761509534494e-05, + "loss": 1.0119, + "step": 32602 + }, + { + "epoch": 0.03, + "learning_rate": 4.986975350219422e-05, + "loss": 0.7952, + "step": 32603 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869745494608434e-05, + "loss": 0.9839, + "step": 32604 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869737486777145e-05, + "loss": 0.9227, + "step": 32605 + }, + { + "epoch": 0.03, + "learning_rate": 4.986972947870035e-05, + "loss": 1.0534, + "step": 32606 + }, + { + "epoch": 0.03, + "learning_rate": 4.986972147037806e-05, + "loss": 1.3039, + "step": 32607 + }, + { + "epoch": 0.03, + "learning_rate": 4.986971346181026e-05, + "loss": 1.5221, + "step": 32608 + }, + { + "epoch": 0.03, + "learning_rate": 4.986970545299696e-05, + "loss": 1.16, + "step": 32609 + }, + { + "epoch": 0.03, + "learning_rate": 4.986969744393816e-05, + "loss": 1.1248, + "step": 32610 + }, + { + "epoch": 0.03, + "learning_rate": 4.986968943463385e-05, + "loss": 1.387, + "step": 32611 + }, + { + "epoch": 0.03, + "learning_rate": 4.986968142508404e-05, + "loss": 1.2933, + "step": 32612 + }, + { + "epoch": 0.03, + "learning_rate": 4.986967341528873e-05, + "loss": 0.8602, + "step": 32613 + }, + { + "epoch": 0.03, + "learning_rate": 4.986966540524791e-05, + "loss": 1.1479, + "step": 32614 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869657394961594e-05, + "loss": 0.6923, + "step": 32615 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869649384429774e-05, + "loss": 0.9425, + "step": 32616 + }, + { + "epoch": 0.03, + "learning_rate": 4.986964137365245e-05, + "loss": 0.8861, + "step": 32617 + }, + { + "epoch": 0.03, + "learning_rate": 4.986963336262962e-05, + "loss": 0.9323, + "step": 32618 + }, + { + "epoch": 0.03, + "learning_rate": 4.986962535136129e-05, + "loss": 1.0681, + "step": 32619 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869617339847454e-05, + "loss": 0.8883, + "step": 32620 + }, + { + "epoch": 0.03, + "learning_rate": 4.986960932808812e-05, + "loss": 1.028, + "step": 32621 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869601316083284e-05, + "loss": 0.8639, + "step": 32622 + }, + { + "epoch": 0.03, + "learning_rate": 4.986959330383295e-05, + "loss": 1.3352, + "step": 32623 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869585291337104e-05, + "loss": 0.9881, + "step": 32624 + }, + { + "epoch": 0.03, + "learning_rate": 4.986957727859576e-05, + "loss": 1.0773, + "step": 32625 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869569265608915e-05, + "loss": 1.25, + "step": 32626 + }, + { + "epoch": 0.03, + "learning_rate": 4.986956125237657e-05, + "loss": 1.2224, + "step": 32627 + }, + { + "epoch": 0.03, + "learning_rate": 4.986955323889872e-05, + "loss": 1.2481, + "step": 32628 + }, + { + "epoch": 0.03, + "learning_rate": 4.986954522517537e-05, + "loss": 1.0396, + "step": 32629 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869537211206516e-05, + "loss": 0.6566, + "step": 32630 + }, + { + "epoch": 0.03, + "learning_rate": 4.986952919699216e-05, + "loss": 0.9755, + "step": 32631 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869521182532305e-05, + "loss": 0.9495, + "step": 32632 + }, + { + "epoch": 0.03, + "learning_rate": 4.986951316782695e-05, + "loss": 0.5497, + "step": 32633 + }, + { + "epoch": 0.03, + "learning_rate": 4.986950515287609e-05, + "loss": 0.3568, + "step": 32634 + }, + { + "epoch": 0.03, + "learning_rate": 4.986949713767973e-05, + "loss": 0.3079, + "step": 32635 + }, + { + "epoch": 0.03, + "learning_rate": 4.986948912223788e-05, + "loss": 0.3223, + "step": 32636 + }, + { + "epoch": 0.03, + "learning_rate": 4.986948110655051e-05, + "loss": 0.274, + "step": 32637 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869473090617645e-05, + "loss": 0.2427, + "step": 32638 + }, + { + "epoch": 0.03, + "learning_rate": 4.986946507443929e-05, + "loss": 0.2892, + "step": 32639 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869457058015424e-05, + "loss": 0.233, + "step": 32640 + }, + { + "epoch": 0.03, + "learning_rate": 4.986944904134606e-05, + "loss": 0.26, + "step": 32641 + }, + { + "epoch": 0.03, + "learning_rate": 4.986944102443119e-05, + "loss": 0.2979, + "step": 32642 + }, + { + "epoch": 0.03, + "learning_rate": 4.986943300727082e-05, + "loss": 0.2401, + "step": 32643 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869424989864955e-05, + "loss": 0.2478, + "step": 32644 + }, + { + "epoch": 0.03, + "learning_rate": 4.986941697221359e-05, + "loss": 0.2392, + "step": 32645 + }, + { + "epoch": 0.03, + "learning_rate": 4.986940895431672e-05, + "loss": 0.2948, + "step": 32646 + }, + { + "epoch": 0.03, + "learning_rate": 4.986940093617435e-05, + "loss": 0.3184, + "step": 32647 + }, + { + "epoch": 0.03, + "learning_rate": 4.986939291778648e-05, + "loss": 0.285, + "step": 32648 + }, + { + "epoch": 0.03, + "learning_rate": 4.986938489915312e-05, + "loss": 0.2262, + "step": 32649 + }, + { + "epoch": 0.03, + "learning_rate": 4.986937688027424e-05, + "loss": 0.2522, + "step": 32650 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869368861149876e-05, + "loss": 0.5233, + "step": 32651 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869360841780006e-05, + "loss": 0.7757, + "step": 32652 + }, + { + "epoch": 0.03, + "learning_rate": 4.986935282216464e-05, + "loss": 0.9077, + "step": 32653 + }, + { + "epoch": 0.03, + "learning_rate": 4.986934480230376e-05, + "loss": 0.8529, + "step": 32654 + }, + { + "epoch": 0.03, + "learning_rate": 4.98693367821974e-05, + "loss": 1.1712, + "step": 32655 + }, + { + "epoch": 0.03, + "learning_rate": 4.986932876184553e-05, + "loss": 0.6823, + "step": 32656 + }, + { + "epoch": 0.03, + "learning_rate": 4.986932074124816e-05, + "loss": 0.6295, + "step": 32657 + }, + { + "epoch": 0.03, + "learning_rate": 4.986931272040529e-05, + "loss": 1.1555, + "step": 32658 + }, + { + "epoch": 0.03, + "learning_rate": 4.986930469931693e-05, + "loss": 1.103, + "step": 32659 + }, + { + "epoch": 0.03, + "learning_rate": 4.986929667798307e-05, + "loss": 0.9541, + "step": 32660 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869288656403704e-05, + "loss": 0.9156, + "step": 32661 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869280634578835e-05, + "loss": 1.0801, + "step": 32662 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869272612508474e-05, + "loss": 1.1363, + "step": 32663 + }, + { + "epoch": 0.03, + "learning_rate": 4.986926459019261e-05, + "loss": 0.8403, + "step": 32664 + }, + { + "epoch": 0.03, + "learning_rate": 4.986925656763125e-05, + "loss": 0.892, + "step": 32665 + }, + { + "epoch": 0.03, + "learning_rate": 4.986924854482439e-05, + "loss": 0.9242, + "step": 32666 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869240521772034e-05, + "loss": 1.2257, + "step": 32667 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869232498474174e-05, + "loss": 0.9471, + "step": 32668 + }, + { + "epoch": 0.03, + "learning_rate": 4.986922447493082e-05, + "loss": 0.9777, + "step": 32669 + }, + { + "epoch": 0.03, + "learning_rate": 4.986921645114196e-05, + "loss": 0.9246, + "step": 32670 + }, + { + "epoch": 0.03, + "learning_rate": 4.986920842710761e-05, + "loss": 1.1279, + "step": 32671 + }, + { + "epoch": 0.03, + "learning_rate": 4.986920040282776e-05, + "loss": 1.1623, + "step": 32672 + }, + { + "epoch": 0.03, + "learning_rate": 4.986919237830241e-05, + "loss": 1.264, + "step": 32673 + }, + { + "epoch": 0.03, + "learning_rate": 4.986918435353156e-05, + "loss": 1.0087, + "step": 32674 + }, + { + "epoch": 0.03, + "learning_rate": 4.986917632851522e-05, + "loss": 1.1502, + "step": 32675 + }, + { + "epoch": 0.03, + "learning_rate": 4.986916830325338e-05, + "loss": 1.2548, + "step": 32676 + }, + { + "epoch": 0.03, + "learning_rate": 4.986916027774603e-05, + "loss": 1.0537, + "step": 32677 + }, + { + "epoch": 0.03, + "learning_rate": 4.986915225199319e-05, + "loss": 0.9079, + "step": 32678 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869144225994855e-05, + "loss": 0.8452, + "step": 32679 + }, + { + "epoch": 0.03, + "learning_rate": 4.986913619975102e-05, + "loss": 0.7205, + "step": 32680 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869128173261684e-05, + "loss": 1.2636, + "step": 32681 + }, + { + "epoch": 0.03, + "learning_rate": 4.986912014652686e-05, + "loss": 0.9043, + "step": 32682 + }, + { + "epoch": 0.03, + "learning_rate": 4.986911211954653e-05, + "loss": 1.1262, + "step": 32683 + }, + { + "epoch": 0.03, + "learning_rate": 4.986910409232071e-05, + "loss": 0.7153, + "step": 32684 + }, + { + "epoch": 0.03, + "learning_rate": 4.986909606484939e-05, + "loss": 0.8261, + "step": 32685 + }, + { + "epoch": 0.03, + "learning_rate": 4.986908803713256e-05, + "loss": 0.7673, + "step": 32686 + }, + { + "epoch": 0.03, + "learning_rate": 4.986908000917024e-05, + "loss": 0.9044, + "step": 32687 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869071980962437e-05, + "loss": 0.7701, + "step": 32688 + }, + { + "epoch": 0.03, + "learning_rate": 4.986906395250912e-05, + "loss": 0.8163, + "step": 32689 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869055923810314e-05, + "loss": 1.1761, + "step": 32690 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869047894866004e-05, + "loss": 1.1587, + "step": 32691 + }, + { + "epoch": 0.03, + "learning_rate": 4.98690398656762e-05, + "loss": 0.8697, + "step": 32692 + }, + { + "epoch": 0.03, + "learning_rate": 4.986903183624091e-05, + "loss": 0.6402, + "step": 32693 + }, + { + "epoch": 0.03, + "learning_rate": 4.986902380656011e-05, + "loss": 1.0906, + "step": 32694 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869015776633824e-05, + "loss": 1.1974, + "step": 32695 + }, + { + "epoch": 0.03, + "learning_rate": 4.986900774646204e-05, + "loss": 1.5868, + "step": 32696 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868999716044754e-05, + "loss": 1.1387, + "step": 32697 + }, + { + "epoch": 0.03, + "learning_rate": 4.986899168538197e-05, + "loss": 1.028, + "step": 32698 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868983654473695e-05, + "loss": 0.8013, + "step": 32699 + }, + { + "epoch": 0.03, + "learning_rate": 4.986897562331992e-05, + "loss": 0.992, + "step": 32700 + }, + { + "epoch": 0.03, + "learning_rate": 4.986896759192066e-05, + "loss": 1.1099, + "step": 32701 + }, + { + "epoch": 0.03, + "learning_rate": 4.986895956027589e-05, + "loss": 0.9473, + "step": 32702 + }, + { + "epoch": 0.03, + "learning_rate": 4.986895152838563e-05, + "loss": 0.9935, + "step": 32703 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868943496249875e-05, + "loss": 1.0243, + "step": 32704 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868935463868624e-05, + "loss": 1.0594, + "step": 32705 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868927431241875e-05, + "loss": 0.8347, + "step": 32706 + }, + { + "epoch": 0.03, + "learning_rate": 4.986891939836963e-05, + "loss": 1.0677, + "step": 32707 + }, + { + "epoch": 0.03, + "learning_rate": 4.986891136525189e-05, + "loss": 0.8836, + "step": 32708 + }, + { + "epoch": 0.03, + "learning_rate": 4.986890333188866e-05, + "loss": 1.0941, + "step": 32709 + }, + { + "epoch": 0.03, + "learning_rate": 4.986889529827993e-05, + "loss": 1.3279, + "step": 32710 + }, + { + "epoch": 0.03, + "learning_rate": 4.98688872644257e-05, + "loss": 1.008, + "step": 32711 + }, + { + "epoch": 0.03, + "learning_rate": 4.986887923032598e-05, + "loss": 1.1716, + "step": 32712 + }, + { + "epoch": 0.03, + "learning_rate": 4.986887119598077e-05, + "loss": 0.7559, + "step": 32713 + }, + { + "epoch": 0.03, + "learning_rate": 4.986886316139006e-05, + "loss": 0.7059, + "step": 32714 + }, + { + "epoch": 0.03, + "learning_rate": 4.986885512655385e-05, + "loss": 0.4952, + "step": 32715 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868847091472146e-05, + "loss": 0.565, + "step": 32716 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868839056144955e-05, + "loss": 0.5112, + "step": 32717 + }, + { + "epoch": 0.03, + "learning_rate": 4.986883102057227e-05, + "loss": 0.4833, + "step": 32718 + }, + { + "epoch": 0.03, + "learning_rate": 4.986882298475408e-05, + "loss": 0.3337, + "step": 32719 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868814948690404e-05, + "loss": 1.2476, + "step": 32720 + }, + { + "epoch": 0.03, + "learning_rate": 4.986880691238123e-05, + "loss": 0.8517, + "step": 32721 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868798875826566e-05, + "loss": 0.8925, + "step": 32722 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868790839026404e-05, + "loss": 1.1276, + "step": 32723 + }, + { + "epoch": 0.03, + "learning_rate": 4.986878280198075e-05, + "loss": 1.0944, + "step": 32724 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868774764689596e-05, + "loss": 1.052, + "step": 32725 + }, + { + "epoch": 0.03, + "learning_rate": 4.986876672715296e-05, + "loss": 1.0775, + "step": 32726 + }, + { + "epoch": 0.03, + "learning_rate": 4.986875868937081e-05, + "loss": 1.1323, + "step": 32727 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868750651343186e-05, + "loss": 0.7434, + "step": 32728 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868742613070054e-05, + "loss": 0.9964, + "step": 32729 + }, + { + "epoch": 0.03, + "learning_rate": 4.986873457455143e-05, + "loss": 0.661, + "step": 32730 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868726535787327e-05, + "loss": 0.9515, + "step": 32731 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868718496777724e-05, + "loss": 0.9468, + "step": 32732 + }, + { + "epoch": 0.03, + "learning_rate": 4.986871045752262e-05, + "loss": 1.0025, + "step": 32733 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868702418022026e-05, + "loss": 1.0426, + "step": 32734 + }, + { + "epoch": 0.03, + "learning_rate": 4.986869437827594e-05, + "loss": 0.8334, + "step": 32735 + }, + { + "epoch": 0.03, + "learning_rate": 4.986868633828435e-05, + "loss": 0.8879, + "step": 32736 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868678298047285e-05, + "loss": 1.6829, + "step": 32737 + }, + { + "epoch": 0.03, + "learning_rate": 4.986867025756472e-05, + "loss": 1.1365, + "step": 32738 + }, + { + "epoch": 0.03, + "learning_rate": 4.986866221683666e-05, + "loss": 1.0381, + "step": 32739 + }, + { + "epoch": 0.03, + "learning_rate": 4.986865417586311e-05, + "loss": 0.7808, + "step": 32740 + }, + { + "epoch": 0.03, + "learning_rate": 4.986864613464406e-05, + "loss": 0.996, + "step": 32741 + }, + { + "epoch": 0.03, + "learning_rate": 4.986863809317952e-05, + "loss": 0.775, + "step": 32742 + }, + { + "epoch": 0.03, + "learning_rate": 4.986863005146949e-05, + "loss": 0.9114, + "step": 32743 + }, + { + "epoch": 0.03, + "learning_rate": 4.986862200951397e-05, + "loss": 0.7647, + "step": 32744 + }, + { + "epoch": 0.03, + "learning_rate": 4.986861396731295e-05, + "loss": 0.744, + "step": 32745 + }, + { + "epoch": 0.03, + "learning_rate": 4.986860592486644e-05, + "loss": 0.5315, + "step": 32746 + }, + { + "epoch": 0.03, + "learning_rate": 4.986859788217444e-05, + "loss": 1.1544, + "step": 32747 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868589839236946e-05, + "loss": 1.0075, + "step": 32748 + }, + { + "epoch": 0.03, + "learning_rate": 4.986858179605396e-05, + "loss": 0.963, + "step": 32749 + }, + { + "epoch": 0.03, + "learning_rate": 4.986857375262548e-05, + "loss": 0.7979, + "step": 32750 + }, + { + "epoch": 0.03, + "learning_rate": 4.986856570895151e-05, + "loss": 0.9047, + "step": 32751 + }, + { + "epoch": 0.03, + "learning_rate": 4.986855766503205e-05, + "loss": 1.1423, + "step": 32752 + }, + { + "epoch": 0.03, + "learning_rate": 4.98685496208671e-05, + "loss": 1.0281, + "step": 32753 + }, + { + "epoch": 0.03, + "learning_rate": 4.986854157645665e-05, + "loss": 0.9408, + "step": 32754 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868533531800714e-05, + "loss": 0.8022, + "step": 32755 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868525486899285e-05, + "loss": 1.1286, + "step": 32756 + }, + { + "epoch": 0.03, + "learning_rate": 4.986851744175236e-05, + "loss": 0.8981, + "step": 32757 + }, + { + "epoch": 0.03, + "learning_rate": 4.986850939635995e-05, + "loss": 0.9574, + "step": 32758 + }, + { + "epoch": 0.03, + "learning_rate": 4.986850135072205e-05, + "loss": 0.8572, + "step": 32759 + }, + { + "epoch": 0.03, + "learning_rate": 4.986849330483865e-05, + "loss": 0.8855, + "step": 32760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868485258709765e-05, + "loss": 0.834, + "step": 32761 + }, + { + "epoch": 0.03, + "learning_rate": 4.986847721233539e-05, + "loss": 1.2317, + "step": 32762 + }, + { + "epoch": 0.03, + "learning_rate": 4.986846916571552e-05, + "loss": 0.7281, + "step": 32763 + }, + { + "epoch": 0.03, + "learning_rate": 4.986846111885016e-05, + "loss": 0.9301, + "step": 32764 + }, + { + "epoch": 0.03, + "learning_rate": 4.986845307173931e-05, + "loss": 0.8766, + "step": 32765 + }, + { + "epoch": 0.03, + "learning_rate": 4.986844502438297e-05, + "loss": 0.9497, + "step": 32766 + }, + { + "epoch": 0.03, + "learning_rate": 4.986843697678113e-05, + "loss": 0.8874, + "step": 32767 + }, + { + "epoch": 0.03, + "learning_rate": 4.986842892893381e-05, + "loss": 0.9153, + "step": 32768 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868420880841e-05, + "loss": 0.8775, + "step": 32769 + }, + { + "epoch": 0.03, + "learning_rate": 4.98684128325027e-05, + "loss": 0.845, + "step": 32770 + }, + { + "epoch": 0.03, + "learning_rate": 4.98684047839189e-05, + "loss": 1.1355, + "step": 32771 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868396735089616e-05, + "loss": 1.0978, + "step": 32772 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868388686014836e-05, + "loss": 1.069, + "step": 32773 + }, + { + "epoch": 0.03, + "learning_rate": 4.986838063669457e-05, + "loss": 0.9329, + "step": 32774 + }, + { + "epoch": 0.03, + "learning_rate": 4.986837258712882e-05, + "loss": 1.0344, + "step": 32775 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868364537317566e-05, + "loss": 1.0649, + "step": 32776 + }, + { + "epoch": 0.03, + "learning_rate": 4.986835648726084e-05, + "loss": 0.8957, + "step": 32777 + }, + { + "epoch": 0.03, + "learning_rate": 4.986834843695861e-05, + "loss": 1.1022, + "step": 32778 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868340386410895e-05, + "loss": 1.4241, + "step": 32779 + }, + { + "epoch": 0.03, + "learning_rate": 4.986833233561768e-05, + "loss": 0.8848, + "step": 32780 + }, + { + "epoch": 0.03, + "learning_rate": 4.986832428457899e-05, + "loss": 1.007, + "step": 32781 + }, + { + "epoch": 0.03, + "learning_rate": 4.986831623329481e-05, + "loss": 1.1292, + "step": 32782 + }, + { + "epoch": 0.03, + "learning_rate": 4.986830818176513e-05, + "loss": 0.8276, + "step": 32783 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868300129989964e-05, + "loss": 1.3835, + "step": 32784 + }, + { + "epoch": 0.03, + "learning_rate": 4.986829207796931e-05, + "loss": 0.8078, + "step": 32785 + }, + { + "epoch": 0.03, + "learning_rate": 4.986828402570317e-05, + "loss": 0.785, + "step": 32786 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868275973191535e-05, + "loss": 0.8556, + "step": 32787 + }, + { + "epoch": 0.03, + "learning_rate": 4.986826792043442e-05, + "loss": 0.9444, + "step": 32788 + }, + { + "epoch": 0.03, + "learning_rate": 4.986825986743181e-05, + "loss": 1.2487, + "step": 32789 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868251814183706e-05, + "loss": 1.3865, + "step": 32790 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868243760690124e-05, + "loss": 1.1568, + "step": 32791 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868235706951045e-05, + "loss": 1.167, + "step": 32792 + }, + { + "epoch": 0.03, + "learning_rate": 4.986822765296648e-05, + "loss": 0.9974, + "step": 32793 + }, + { + "epoch": 0.03, + "learning_rate": 4.986821959873642e-05, + "loss": 0.9881, + "step": 32794 + }, + { + "epoch": 0.03, + "learning_rate": 4.986821154426088e-05, + "loss": 0.8252, + "step": 32795 + }, + { + "epoch": 0.03, + "learning_rate": 4.986820348953985e-05, + "loss": 0.6458, + "step": 32796 + }, + { + "epoch": 0.03, + "learning_rate": 4.986819543457333e-05, + "loss": 0.3983, + "step": 32797 + }, + { + "epoch": 0.03, + "learning_rate": 4.986818737936132e-05, + "loss": 0.5553, + "step": 32798 + }, + { + "epoch": 0.03, + "learning_rate": 4.986817932390383e-05, + "loss": 0.922, + "step": 32799 + }, + { + "epoch": 0.03, + "learning_rate": 4.986817126820084e-05, + "loss": 0.6957, + "step": 32800 + }, + { + "epoch": 0.03, + "learning_rate": 4.986816321225237e-05, + "loss": 1.4219, + "step": 32801 + }, + { + "epoch": 0.03, + "learning_rate": 4.98681551560584e-05, + "loss": 0.9473, + "step": 32802 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868147099618955e-05, + "loss": 0.8883, + "step": 32803 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868139042934024e-05, + "loss": 0.9685, + "step": 32804 + }, + { + "epoch": 0.03, + "learning_rate": 4.98681309860036e-05, + "loss": 0.8416, + "step": 32805 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868122928827685e-05, + "loss": 0.9497, + "step": 32806 + }, + { + "epoch": 0.03, + "learning_rate": 4.986811487140629e-05, + "loss": 0.9441, + "step": 32807 + }, + { + "epoch": 0.03, + "learning_rate": 4.98681068137394e-05, + "loss": 1.9508, + "step": 32808 + }, + { + "epoch": 0.03, + "learning_rate": 4.986809875582703e-05, + "loss": 1.1203, + "step": 32809 + }, + { + "epoch": 0.03, + "learning_rate": 4.986809069766917e-05, + "loss": 1.2226, + "step": 32810 + }, + { + "epoch": 0.03, + "learning_rate": 4.986808263926581e-05, + "loss": 1.0671, + "step": 32811 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868074580616975e-05, + "loss": 0.9469, + "step": 32812 + }, + { + "epoch": 0.03, + "learning_rate": 4.986806652172266e-05, + "loss": 1.0367, + "step": 32813 + }, + { + "epoch": 0.03, + "learning_rate": 4.986805846258284e-05, + "loss": 1.2934, + "step": 32814 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868050403197543e-05, + "loss": 1.0082, + "step": 32815 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868042343566754e-05, + "loss": 1.0065, + "step": 32816 + }, + { + "epoch": 0.03, + "learning_rate": 4.986803428369049e-05, + "loss": 1.2302, + "step": 32817 + }, + { + "epoch": 0.03, + "learning_rate": 4.986802622356873e-05, + "loss": 1.7011, + "step": 32818 + }, + { + "epoch": 0.03, + "learning_rate": 4.9868018163201484e-05, + "loss": 0.9184, + "step": 32819 + }, + { + "epoch": 0.03, + "learning_rate": 4.986801010258876e-05, + "loss": 0.8603, + "step": 32820 + }, + { + "epoch": 0.03, + "learning_rate": 4.986800204173053e-05, + "loss": 0.9228, + "step": 32821 + }, + { + "epoch": 0.03, + "learning_rate": 4.986799398062683e-05, + "loss": 1.4186, + "step": 32822 + }, + { + "epoch": 0.03, + "learning_rate": 4.986798591927764e-05, + "loss": 1.1009, + "step": 32823 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867977857682965e-05, + "loss": 0.7349, + "step": 32824 + }, + { + "epoch": 0.03, + "learning_rate": 4.98679697958428e-05, + "loss": 1.1221, + "step": 32825 + }, + { + "epoch": 0.03, + "learning_rate": 4.986796173375715e-05, + "loss": 0.8156, + "step": 32826 + }, + { + "epoch": 0.03, + "learning_rate": 4.986795367142602e-05, + "loss": 0.7563, + "step": 32827 + }, + { + "epoch": 0.03, + "learning_rate": 4.986794560884939e-05, + "loss": 0.6787, + "step": 32828 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867937546027286e-05, + "loss": 0.5335, + "step": 32829 + }, + { + "epoch": 0.03, + "learning_rate": 4.98679294829597e-05, + "loss": 0.5028, + "step": 32830 + }, + { + "epoch": 0.03, + "learning_rate": 4.986792141964661e-05, + "loss": 0.505, + "step": 32831 + }, + { + "epoch": 0.03, + "learning_rate": 4.986791335608805e-05, + "loss": 0.3059, + "step": 32832 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867905292284003e-05, + "loss": 0.3066, + "step": 32833 + }, + { + "epoch": 0.03, + "learning_rate": 4.986789722823446e-05, + "loss": 0.6744, + "step": 32834 + }, + { + "epoch": 0.03, + "learning_rate": 4.986788916393944e-05, + "loss": 0.7872, + "step": 32835 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867881099398936e-05, + "loss": 0.805, + "step": 32836 + }, + { + "epoch": 0.03, + "learning_rate": 4.986787303461294e-05, + "loss": 1.2698, + "step": 32837 + }, + { + "epoch": 0.03, + "learning_rate": 4.986786496958147e-05, + "loss": 0.7927, + "step": 32838 + }, + { + "epoch": 0.03, + "learning_rate": 4.986785690430451e-05, + "loss": 1.5429, + "step": 32839 + }, + { + "epoch": 0.03, + "learning_rate": 4.986784883878206e-05, + "loss": 1.4712, + "step": 32840 + }, + { + "epoch": 0.03, + "learning_rate": 4.986784077301413e-05, + "loss": 1.4562, + "step": 32841 + }, + { + "epoch": 0.03, + "learning_rate": 4.986783270700071e-05, + "loss": 1.6453, + "step": 32842 + }, + { + "epoch": 0.03, + "learning_rate": 4.986782464074181e-05, + "loss": 1.4062, + "step": 32843 + }, + { + "epoch": 0.03, + "learning_rate": 4.986781657423742e-05, + "loss": 0.9093, + "step": 32844 + }, + { + "epoch": 0.03, + "learning_rate": 4.986780850748756e-05, + "loss": 0.9786, + "step": 32845 + }, + { + "epoch": 0.03, + "learning_rate": 4.98678004404922e-05, + "loss": 0.9596, + "step": 32846 + }, + { + "epoch": 0.03, + "learning_rate": 4.986779237325135e-05, + "loss": 0.9841, + "step": 32847 + }, + { + "epoch": 0.03, + "learning_rate": 4.986778430576503e-05, + "loss": 0.7686, + "step": 32848 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867776238033224e-05, + "loss": 0.8392, + "step": 32849 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867768170055935e-05, + "loss": 1.1617, + "step": 32850 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867760101833155e-05, + "loss": 0.9683, + "step": 32851 + }, + { + "epoch": 0.03, + "learning_rate": 4.98677520333649e-05, + "loss": 0.71, + "step": 32852 + }, + { + "epoch": 0.03, + "learning_rate": 4.986774396465115e-05, + "loss": 1.1089, + "step": 32853 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867735895691927e-05, + "loss": 0.8703, + "step": 32854 + }, + { + "epoch": 0.03, + "learning_rate": 4.986772782648721e-05, + "loss": 0.6506, + "step": 32855 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867719757037014e-05, + "loss": 0.7203, + "step": 32856 + }, + { + "epoch": 0.03, + "learning_rate": 4.986771168734133e-05, + "loss": 0.6353, + "step": 32857 + }, + { + "epoch": 0.03, + "learning_rate": 4.986770361740017e-05, + "loss": 0.7806, + "step": 32858 + }, + { + "epoch": 0.03, + "learning_rate": 4.986769554721352e-05, + "loss": 0.8945, + "step": 32859 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867687476781397e-05, + "loss": 0.5753, + "step": 32860 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867679406103786e-05, + "loss": 1.1495, + "step": 32861 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867671335180686e-05, + "loss": 0.9167, + "step": 32862 + }, + { + "epoch": 0.03, + "learning_rate": 4.98676632640121e-05, + "loss": 0.7187, + "step": 32863 + }, + { + "epoch": 0.03, + "learning_rate": 4.986765519259804e-05, + "loss": 1.2179, + "step": 32864 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867647120938495e-05, + "loss": 0.9852, + "step": 32865 + }, + { + "epoch": 0.03, + "learning_rate": 4.986763904903347e-05, + "loss": 1.104, + "step": 32866 + }, + { + "epoch": 0.03, + "learning_rate": 4.986763097688295e-05, + "loss": 1.0321, + "step": 32867 + }, + { + "epoch": 0.03, + "learning_rate": 4.986762290448696e-05, + "loss": 1.2008, + "step": 32868 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867614831845486e-05, + "loss": 0.8316, + "step": 32869 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867606758958515e-05, + "loss": 0.3834, + "step": 32870 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867598685826075e-05, + "loss": 0.3703, + "step": 32871 + }, + { + "epoch": 0.03, + "learning_rate": 4.986759061244816e-05, + "loss": 0.3433, + "step": 32872 + }, + { + "epoch": 0.03, + "learning_rate": 4.986758253882475e-05, + "loss": 0.3707, + "step": 32873 + }, + { + "epoch": 0.03, + "learning_rate": 4.986757446495586e-05, + "loss": 0.3672, + "step": 32874 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867566390841484e-05, + "loss": 0.4027, + "step": 32875 + }, + { + "epoch": 0.03, + "learning_rate": 4.986755831648163e-05, + "loss": 0.7761, + "step": 32876 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867550241876296e-05, + "loss": 0.9081, + "step": 32877 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867542167025476e-05, + "loss": 0.9736, + "step": 32878 + }, + { + "epoch": 0.03, + "learning_rate": 4.986753409192918e-05, + "loss": 1.2494, + "step": 32879 + }, + { + "epoch": 0.03, + "learning_rate": 4.986752601658739e-05, + "loss": 1.0593, + "step": 32880 + }, + { + "epoch": 0.03, + "learning_rate": 4.986751794100013e-05, + "loss": 1.3639, + "step": 32881 + }, + { + "epoch": 0.03, + "learning_rate": 4.986750986516739e-05, + "loss": 0.9379, + "step": 32882 + }, + { + "epoch": 0.03, + "learning_rate": 4.986750178908916e-05, + "loss": 1.0351, + "step": 32883 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867493712765456e-05, + "loss": 1.0376, + "step": 32884 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867485636196264e-05, + "loss": 1.0915, + "step": 32885 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867477559381595e-05, + "loss": 1.004, + "step": 32886 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867469482321436e-05, + "loss": 1.1297, + "step": 32887 + }, + { + "epoch": 0.03, + "learning_rate": 4.986746140501581e-05, + "loss": 1.1506, + "step": 32888 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867453327464694e-05, + "loss": 1.1691, + "step": 32889 + }, + { + "epoch": 0.03, + "learning_rate": 4.98674452496681e-05, + "loss": 0.9571, + "step": 32890 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867437171626017e-05, + "loss": 1.1372, + "step": 32891 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867429093338466e-05, + "loss": 1.0509, + "step": 32892 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867421014805425e-05, + "loss": 1.9297, + "step": 32893 + }, + { + "epoch": 0.03, + "learning_rate": 4.986741293602691e-05, + "loss": 2.0199, + "step": 32894 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867404857002905e-05, + "loss": 1.8944, + "step": 32895 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867396777733426e-05, + "loss": 0.9556, + "step": 32896 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867388698218464e-05, + "loss": 1.0078, + "step": 32897 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867380618458025e-05, + "loss": 1.0279, + "step": 32898 + }, + { + "epoch": 0.03, + "learning_rate": 4.986737253845211e-05, + "loss": 0.9194, + "step": 32899 + }, + { + "epoch": 0.03, + "learning_rate": 4.98673644582007e-05, + "loss": 0.5707, + "step": 32900 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867356377703825e-05, + "loss": 0.6351, + "step": 32901 + }, + { + "epoch": 0.03, + "learning_rate": 4.986734829696146e-05, + "loss": 1.0533, + "step": 32902 + }, + { + "epoch": 0.03, + "learning_rate": 4.986734021597362e-05, + "loss": 1.0529, + "step": 32903 + }, + { + "epoch": 0.03, + "learning_rate": 4.98673321347403e-05, + "loss": 1.0909, + "step": 32904 + }, + { + "epoch": 0.03, + "learning_rate": 4.98673240532615e-05, + "loss": 1.2387, + "step": 32905 + }, + { + "epoch": 0.03, + "learning_rate": 4.986731597153721e-05, + "loss": 0.964, + "step": 32906 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867307889567454e-05, + "loss": 1.1236, + "step": 32907 + }, + { + "epoch": 0.03, + "learning_rate": 4.986729980735222e-05, + "loss": 1.1281, + "step": 32908 + }, + { + "epoch": 0.03, + "learning_rate": 4.98672917248915e-05, + "loss": 1.2631, + "step": 32909 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867283642185296e-05, + "loss": 1.0974, + "step": 32910 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867275559233616e-05, + "loss": 1.1746, + "step": 32911 + }, + { + "epoch": 0.03, + "learning_rate": 4.986726747603646e-05, + "loss": 0.8562, + "step": 32912 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867259392593825e-05, + "loss": 1.1795, + "step": 32913 + }, + { + "epoch": 0.03, + "learning_rate": 4.986725130890571e-05, + "loss": 1.1407, + "step": 32914 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867243224972114e-05, + "loss": 0.9754, + "step": 32915 + }, + { + "epoch": 0.03, + "learning_rate": 4.986723514079304e-05, + "loss": 0.6984, + "step": 32916 + }, + { + "epoch": 0.03, + "learning_rate": 4.986722705636848e-05, + "loss": 1.4947, + "step": 32917 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867218971698455e-05, + "loss": 1.3181, + "step": 32918 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867210886782946e-05, + "loss": 0.7297, + "step": 32919 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867202801621954e-05, + "loss": 1.2042, + "step": 32920 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867194716215484e-05, + "loss": 0.9698, + "step": 32921 + }, + { + "epoch": 0.03, + "learning_rate": 4.986718663056354e-05, + "loss": 0.761, + "step": 32922 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867178544666114e-05, + "loss": 1.1502, + "step": 32923 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867170458523214e-05, + "loss": 1.2023, + "step": 32924 + }, + { + "epoch": 0.03, + "learning_rate": 4.986716237213484e-05, + "loss": 0.9473, + "step": 32925 + }, + { + "epoch": 0.03, + "learning_rate": 4.986715428550097e-05, + "loss": 0.6452, + "step": 32926 + }, + { + "epoch": 0.03, + "learning_rate": 4.986714619862164e-05, + "loss": 0.6938, + "step": 32927 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867138111496823e-05, + "loss": 0.5521, + "step": 32928 + }, + { + "epoch": 0.03, + "learning_rate": 4.986713002412653e-05, + "loss": 0.8039, + "step": 32929 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867121936510756e-05, + "loss": 0.6341, + "step": 32930 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867113848649504e-05, + "loss": 0.7803, + "step": 32931 + }, + { + "epoch": 0.03, + "learning_rate": 4.986710576054278e-05, + "loss": 0.4771, + "step": 32932 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867097672190575e-05, + "loss": 1.0048, + "step": 32933 + }, + { + "epoch": 0.03, + "learning_rate": 4.98670895835929e-05, + "loss": 1.273, + "step": 32934 + }, + { + "epoch": 0.03, + "learning_rate": 4.986708149474973e-05, + "loss": 0.9776, + "step": 32935 + }, + { + "epoch": 0.03, + "learning_rate": 4.98670734056611e-05, + "loss": 1.0554, + "step": 32936 + }, + { + "epoch": 0.03, + "learning_rate": 4.986706531632699e-05, + "loss": 1.0436, + "step": 32937 + }, + { + "epoch": 0.03, + "learning_rate": 4.98670572267474e-05, + "loss": 1.102, + "step": 32938 + }, + { + "epoch": 0.03, + "learning_rate": 4.986704913692233e-05, + "loss": 0.8194, + "step": 32939 + }, + { + "epoch": 0.03, + "learning_rate": 4.986704104685178e-05, + "loss": 1.0149, + "step": 32940 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867032956535764e-05, + "loss": 0.9471, + "step": 32941 + }, + { + "epoch": 0.03, + "learning_rate": 4.9867024865974265e-05, + "loss": 0.8109, + "step": 32942 + }, + { + "epoch": 0.03, + "learning_rate": 4.986701677516728e-05, + "loss": 1.0169, + "step": 32943 + }, + { + "epoch": 0.03, + "learning_rate": 4.986700868411484e-05, + "loss": 1.2865, + "step": 32944 + }, + { + "epoch": 0.03, + "learning_rate": 4.986700059281691e-05, + "loss": 1.3324, + "step": 32945 + }, + { + "epoch": 0.03, + "learning_rate": 4.98669925012735e-05, + "loss": 2.3429, + "step": 32946 + }, + { + "epoch": 0.03, + "learning_rate": 4.986698440948462e-05, + "loss": 2.5049, + "step": 32947 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866976317450266e-05, + "loss": 0.9527, + "step": 32948 + }, + { + "epoch": 0.03, + "learning_rate": 4.986696822517043e-05, + "loss": 0.8079, + "step": 32949 + }, + { + "epoch": 0.03, + "learning_rate": 4.986696013264512e-05, + "loss": 1.1365, + "step": 32950 + }, + { + "epoch": 0.03, + "learning_rate": 4.986695203987433e-05, + "loss": 1.2244, + "step": 32951 + }, + { + "epoch": 0.03, + "learning_rate": 4.986694394685807e-05, + "loss": 0.728, + "step": 32952 + }, + { + "epoch": 0.03, + "learning_rate": 4.986693585359633e-05, + "loss": 1.3202, + "step": 32953 + }, + { + "epoch": 0.03, + "learning_rate": 4.986692776008912e-05, + "loss": 0.9238, + "step": 32954 + }, + { + "epoch": 0.03, + "learning_rate": 4.986691966633642e-05, + "loss": 0.9817, + "step": 32955 + }, + { + "epoch": 0.03, + "learning_rate": 4.986691157233826e-05, + "loss": 0.4226, + "step": 32956 + }, + { + "epoch": 0.03, + "learning_rate": 4.986690347809462e-05, + "loss": 0.5311, + "step": 32957 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866895383605496e-05, + "loss": 0.8795, + "step": 32958 + }, + { + "epoch": 0.03, + "learning_rate": 4.98668872888709e-05, + "loss": 0.7843, + "step": 32959 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866879193890833e-05, + "loss": 0.8699, + "step": 32960 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866871098665294e-05, + "loss": 0.9423, + "step": 32961 + }, + { + "epoch": 0.03, + "learning_rate": 4.986686300319428e-05, + "loss": 0.8998, + "step": 32962 + }, + { + "epoch": 0.03, + "learning_rate": 4.986685490747778e-05, + "loss": 1.0219, + "step": 32963 + }, + { + "epoch": 0.03, + "learning_rate": 4.986684681151581e-05, + "loss": 0.8769, + "step": 32964 + }, + { + "epoch": 0.03, + "learning_rate": 4.986683871530836e-05, + "loss": 0.7328, + "step": 32965 + }, + { + "epoch": 0.03, + "learning_rate": 4.986683061885544e-05, + "loss": 1.3538, + "step": 32966 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866822522157054e-05, + "loss": 0.8418, + "step": 32967 + }, + { + "epoch": 0.03, + "learning_rate": 4.986681442521318e-05, + "loss": 0.9946, + "step": 32968 + }, + { + "epoch": 0.03, + "learning_rate": 4.986680632802384e-05, + "loss": 1.0181, + "step": 32969 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866798230589015e-05, + "loss": 1.2978, + "step": 32970 + }, + { + "epoch": 0.03, + "learning_rate": 4.986679013290873e-05, + "loss": 0.8339, + "step": 32971 + }, + { + "epoch": 0.03, + "learning_rate": 4.986678203498296e-05, + "loss": 1.2131, + "step": 32972 + }, + { + "epoch": 0.03, + "learning_rate": 4.986677393681172e-05, + "loss": 0.9509, + "step": 32973 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866765838395e-05, + "loss": 1.1187, + "step": 32974 + }, + { + "epoch": 0.03, + "learning_rate": 4.98667577397328e-05, + "loss": 1.1068, + "step": 32975 + }, + { + "epoch": 0.03, + "learning_rate": 4.986674964082514e-05, + "loss": 1.1811, + "step": 32976 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866741541672e-05, + "loss": 1.3917, + "step": 32977 + }, + { + "epoch": 0.03, + "learning_rate": 4.986673344227339e-05, + "loss": 0.4617, + "step": 32978 + }, + { + "epoch": 0.03, + "learning_rate": 4.98667253426293e-05, + "loss": 0.4081, + "step": 32979 + }, + { + "epoch": 0.03, + "learning_rate": 4.986671724273975e-05, + "loss": 0.3844, + "step": 32980 + }, + { + "epoch": 0.03, + "learning_rate": 4.986670914260471e-05, + "loss": 0.3767, + "step": 32981 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866701042224204e-05, + "loss": 0.3815, + "step": 32982 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866692941598226e-05, + "loss": 0.3987, + "step": 32983 + }, + { + "epoch": 0.03, + "learning_rate": 4.986668484072677e-05, + "loss": 0.3674, + "step": 32984 + }, + { + "epoch": 0.03, + "learning_rate": 4.986667673960984e-05, + "loss": 0.347, + "step": 32985 + }, + { + "epoch": 0.03, + "learning_rate": 4.986666863824744e-05, + "loss": 0.3379, + "step": 32986 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866660536639566e-05, + "loss": 0.3167, + "step": 32987 + }, + { + "epoch": 0.03, + "learning_rate": 4.986665243478621e-05, + "loss": 0.3468, + "step": 32988 + }, + { + "epoch": 0.03, + "learning_rate": 4.98666443326874e-05, + "loss": 0.3668, + "step": 32989 + }, + { + "epoch": 0.03, + "learning_rate": 4.98666362303431e-05, + "loss": 0.4079, + "step": 32990 + }, + { + "epoch": 0.03, + "learning_rate": 4.986662812775333e-05, + "loss": 0.3259, + "step": 32991 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866620024918086e-05, + "loss": 0.3237, + "step": 32992 + }, + { + "epoch": 0.03, + "learning_rate": 4.986661192183738e-05, + "loss": 0.3472, + "step": 32993 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866603818511196e-05, + "loss": 0.3247, + "step": 32994 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866595714939536e-05, + "loss": 0.3752, + "step": 32995 + }, + { + "epoch": 0.03, + "learning_rate": 4.98665876111224e-05, + "loss": 0.3747, + "step": 32996 + }, + { + "epoch": 0.03, + "learning_rate": 4.98665795070598e-05, + "loss": 0.3929, + "step": 32997 + }, + { + "epoch": 0.03, + "learning_rate": 4.986657140275172e-05, + "loss": 1.0142, + "step": 32998 + }, + { + "epoch": 0.03, + "learning_rate": 4.986656329819818e-05, + "loss": 1.1707, + "step": 32999 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866555193399155e-05, + "loss": 0.8089, + "step": 33000 + }, + { + "epoch": 0.03, + "eval_loss": 1.1077449321746826, + "eval_runtime": 128.1069, + "eval_samples_per_second": 10.811, + "eval_steps_per_second": 5.41, + "step": 33000 + }, + { + "epoch": 0.03, + "learning_rate": 4.986654708835467e-05, + "loss": 0.7104, + "step": 33001 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866538983064695e-05, + "loss": 0.8027, + "step": 33002 + }, + { + "epoch": 0.03, + "learning_rate": 4.986653087752926e-05, + "loss": 0.7417, + "step": 33003 + }, + { + "epoch": 0.03, + "learning_rate": 4.986652277174835e-05, + "loss": 0.475, + "step": 33004 + }, + { + "epoch": 0.03, + "learning_rate": 4.986651466572197e-05, + "loss": 0.9183, + "step": 33005 + }, + { + "epoch": 0.03, + "learning_rate": 4.986650655945012e-05, + "loss": 0.7274, + "step": 33006 + }, + { + "epoch": 0.03, + "learning_rate": 4.98664984529328e-05, + "loss": 1.03, + "step": 33007 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866490346170006e-05, + "loss": 1.1065, + "step": 33008 + }, + { + "epoch": 0.03, + "learning_rate": 4.986648223916173e-05, + "loss": 1.1011, + "step": 33009 + }, + { + "epoch": 0.03, + "learning_rate": 4.986647413190799e-05, + "loss": 1.2691, + "step": 33010 + }, + { + "epoch": 0.03, + "learning_rate": 4.986646602440879e-05, + "loss": 0.8662, + "step": 33011 + }, + { + "epoch": 0.03, + "learning_rate": 4.98664579166641e-05, + "loss": 1.0739, + "step": 33012 + }, + { + "epoch": 0.03, + "learning_rate": 4.986644980867396e-05, + "loss": 1.6486, + "step": 33013 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866441700438324e-05, + "loss": 0.9896, + "step": 33014 + }, + { + "epoch": 0.03, + "learning_rate": 4.986643359195723e-05, + "loss": 1.2565, + "step": 33015 + }, + { + "epoch": 0.03, + "learning_rate": 4.986642548323067e-05, + "loss": 0.9514, + "step": 33016 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866417374258635e-05, + "loss": 1.1112, + "step": 33017 + }, + { + "epoch": 0.03, + "learning_rate": 4.986640926504113e-05, + "loss": 1.0109, + "step": 33018 + }, + { + "epoch": 0.03, + "learning_rate": 4.986640115557814e-05, + "loss": 0.6881, + "step": 33019 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866393045869695e-05, + "loss": 0.9575, + "step": 33020 + }, + { + "epoch": 0.03, + "learning_rate": 4.986638493591578e-05, + "loss": 0.9427, + "step": 33021 + }, + { + "epoch": 0.03, + "learning_rate": 4.986637682571639e-05, + "loss": 1.9894, + "step": 33022 + }, + { + "epoch": 0.03, + "learning_rate": 4.986636871527153e-05, + "loss": 3.3986, + "step": 33023 + }, + { + "epoch": 0.03, + "learning_rate": 4.98663606045812e-05, + "loss": 0.8276, + "step": 33024 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866352493645404e-05, + "loss": 0.756, + "step": 33025 + }, + { + "epoch": 0.03, + "learning_rate": 4.986634438246413e-05, + "loss": 0.7374, + "step": 33026 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866336271037396e-05, + "loss": 0.7242, + "step": 33027 + }, + { + "epoch": 0.03, + "learning_rate": 4.986632815936518e-05, + "loss": 0.7394, + "step": 33028 + }, + { + "epoch": 0.03, + "learning_rate": 4.98663200474475e-05, + "loss": 0.7326, + "step": 33029 + }, + { + "epoch": 0.03, + "learning_rate": 4.986631193528435e-05, + "loss": 0.7098, + "step": 33030 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866303822875726e-05, + "loss": 0.7133, + "step": 33031 + }, + { + "epoch": 0.03, + "learning_rate": 4.986629571022164e-05, + "loss": 0.6875, + "step": 33032 + }, + { + "epoch": 0.03, + "learning_rate": 4.986628759732208e-05, + "loss": 0.8125, + "step": 33033 + }, + { + "epoch": 0.03, + "learning_rate": 4.986627948417705e-05, + "loss": 0.6954, + "step": 33034 + }, + { + "epoch": 0.03, + "learning_rate": 4.986627137078655e-05, + "loss": 1.0913, + "step": 33035 + }, + { + "epoch": 0.03, + "learning_rate": 4.986626325715058e-05, + "loss": 1.449, + "step": 33036 + }, + { + "epoch": 0.03, + "learning_rate": 4.986625514326915e-05, + "loss": 1.0696, + "step": 33037 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866247029142245e-05, + "loss": 1.1017, + "step": 33038 + }, + { + "epoch": 0.03, + "learning_rate": 4.986623891476987e-05, + "loss": 1.2841, + "step": 33039 + }, + { + "epoch": 0.03, + "learning_rate": 4.986623080015203e-05, + "loss": 0.8803, + "step": 33040 + }, + { + "epoch": 0.03, + "learning_rate": 4.986622268528871e-05, + "loss": 0.8335, + "step": 33041 + }, + { + "epoch": 0.03, + "learning_rate": 4.986621457017992e-05, + "loss": 0.8166, + "step": 33042 + }, + { + "epoch": 0.03, + "learning_rate": 4.986620645482568e-05, + "loss": 1.6526, + "step": 33043 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866198339225957e-05, + "loss": 1.9394, + "step": 33044 + }, + { + "epoch": 0.03, + "learning_rate": 4.986619022338077e-05, + "loss": 0.8341, + "step": 33045 + }, + { + "epoch": 0.03, + "learning_rate": 4.986618210729012e-05, + "loss": 1.001, + "step": 33046 + }, + { + "epoch": 0.03, + "learning_rate": 4.986617399095399e-05, + "loss": 1.0913, + "step": 33047 + }, + { + "epoch": 0.03, + "learning_rate": 4.98661658743724e-05, + "loss": 1.2703, + "step": 33048 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866157757545335e-05, + "loss": 1.1661, + "step": 33049 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866149640472803e-05, + "loss": 1.2495, + "step": 33050 + }, + { + "epoch": 0.03, + "learning_rate": 4.986614152315481e-05, + "loss": 1.0336, + "step": 33051 + }, + { + "epoch": 0.03, + "learning_rate": 4.986613340559134e-05, + "loss": 0.8475, + "step": 33052 + }, + { + "epoch": 0.03, + "learning_rate": 4.986612528778241e-05, + "loss": 1.0408, + "step": 33053 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866117169728e-05, + "loss": 1.2201, + "step": 33054 + }, + { + "epoch": 0.03, + "learning_rate": 4.986610905142813e-05, + "loss": 1.1473, + "step": 33055 + }, + { + "epoch": 0.03, + "learning_rate": 4.98661009328828e-05, + "loss": 1.1419, + "step": 33056 + }, + { + "epoch": 0.03, + "learning_rate": 4.986609281409199e-05, + "loss": 1.2108, + "step": 33057 + }, + { + "epoch": 0.03, + "learning_rate": 4.986608469505572e-05, + "loss": 0.9765, + "step": 33058 + }, + { + "epoch": 0.03, + "learning_rate": 4.986607657577398e-05, + "loss": 1.0187, + "step": 33059 + }, + { + "epoch": 0.03, + "learning_rate": 4.986606845624677e-05, + "loss": 1.1752, + "step": 33060 + }, + { + "epoch": 0.03, + "learning_rate": 4.98660603364741e-05, + "loss": 0.7546, + "step": 33061 + }, + { + "epoch": 0.03, + "learning_rate": 4.986605221645595e-05, + "loss": 1.0998, + "step": 33062 + }, + { + "epoch": 0.03, + "learning_rate": 4.986604409619234e-05, + "loss": 0.8017, + "step": 33063 + }, + { + "epoch": 0.03, + "learning_rate": 4.986603597568327e-05, + "loss": 0.8413, + "step": 33064 + }, + { + "epoch": 0.03, + "learning_rate": 4.986602785492872e-05, + "loss": 1.0091, + "step": 33065 + }, + { + "epoch": 0.03, + "learning_rate": 4.986601973392871e-05, + "loss": 1.1373, + "step": 33066 + }, + { + "epoch": 0.03, + "learning_rate": 4.986601161268323e-05, + "loss": 0.9977, + "step": 33067 + }, + { + "epoch": 0.03, + "learning_rate": 4.986600349119229e-05, + "loss": 1.3586, + "step": 33068 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865995369455876e-05, + "loss": 1.4936, + "step": 33069 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865987247474e-05, + "loss": 1.3752, + "step": 33070 + }, + { + "epoch": 0.03, + "learning_rate": 4.986597912524665e-05, + "loss": 1.214, + "step": 33071 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865971002773844e-05, + "loss": 1.0869, + "step": 33072 + }, + { + "epoch": 0.03, + "learning_rate": 4.986596288005556e-05, + "loss": 0.9108, + "step": 33073 + }, + { + "epoch": 0.03, + "learning_rate": 4.986595475709182e-05, + "loss": 0.7096, + "step": 33074 + }, + { + "epoch": 0.03, + "learning_rate": 4.986594663388261e-05, + "loss": 0.9052, + "step": 33075 + }, + { + "epoch": 0.03, + "learning_rate": 4.986593851042793e-05, + "loss": 0.6274, + "step": 33076 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865930386727795e-05, + "loss": 0.7202, + "step": 33077 + }, + { + "epoch": 0.03, + "learning_rate": 4.986592226278218e-05, + "loss": 0.9891, + "step": 33078 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865914138591115e-05, + "loss": 0.9417, + "step": 33079 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865906014154574e-05, + "loss": 0.969, + "step": 33080 + }, + { + "epoch": 0.03, + "learning_rate": 4.986589788947257e-05, + "loss": 0.9762, + "step": 33081 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865889764545094e-05, + "loss": 1.1048, + "step": 33082 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865881639372156e-05, + "loss": 0.9153, + "step": 33083 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865873513953754e-05, + "loss": 1.3031, + "step": 33084 + }, + { + "epoch": 0.03, + "learning_rate": 4.986586538828989e-05, + "loss": 1.3788, + "step": 33085 + }, + { + "epoch": 0.03, + "learning_rate": 4.986585726238055e-05, + "loss": 1.2371, + "step": 33086 + }, + { + "epoch": 0.03, + "learning_rate": 4.986584913622576e-05, + "loss": 1.3113, + "step": 33087 + }, + { + "epoch": 0.03, + "learning_rate": 4.98658410098255e-05, + "loss": 1.4019, + "step": 33088 + }, + { + "epoch": 0.03, + "learning_rate": 4.986583288317977e-05, + "loss": 1.1974, + "step": 33089 + }, + { + "epoch": 0.03, + "learning_rate": 4.986582475628857e-05, + "loss": 1.318, + "step": 33090 + }, + { + "epoch": 0.03, + "learning_rate": 4.986581662915192e-05, + "loss": 1.3099, + "step": 33091 + }, + { + "epoch": 0.03, + "learning_rate": 4.986580850176979e-05, + "loss": 1.112, + "step": 33092 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865800374142204e-05, + "loss": 0.9023, + "step": 33093 + }, + { + "epoch": 0.03, + "learning_rate": 4.986579224626915e-05, + "loss": 0.9732, + "step": 33094 + }, + { + "epoch": 0.03, + "learning_rate": 4.986578411815063e-05, + "loss": 0.996, + "step": 33095 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865775989786653e-05, + "loss": 1.0636, + "step": 33096 + }, + { + "epoch": 0.03, + "learning_rate": 4.98657678611772e-05, + "loss": 1.2013, + "step": 33097 + }, + { + "epoch": 0.03, + "learning_rate": 4.986575973232229e-05, + "loss": 1.0979, + "step": 33098 + }, + { + "epoch": 0.03, + "learning_rate": 4.986575160322192e-05, + "loss": 0.9918, + "step": 33099 + }, + { + "epoch": 0.03, + "learning_rate": 4.986574347387608e-05, + "loss": 1.0657, + "step": 33100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865735344284776e-05, + "loss": 0.7863, + "step": 33101 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865727214448013e-05, + "loss": 1.137, + "step": 33102 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865719084365774e-05, + "loss": 1.0307, + "step": 33103 + }, + { + "epoch": 0.03, + "learning_rate": 4.986571095403808e-05, + "loss": 1.0934, + "step": 33104 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865702823464924e-05, + "loss": 1.1962, + "step": 33105 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865694692646295e-05, + "loss": 1.1201, + "step": 33106 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865686561582215e-05, + "loss": 1.0853, + "step": 33107 + }, + { + "epoch": 0.03, + "learning_rate": 4.986567843027266e-05, + "loss": 1.1154, + "step": 33108 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865670298717645e-05, + "loss": 0.7638, + "step": 33109 + }, + { + "epoch": 0.03, + "learning_rate": 4.986566216691717e-05, + "loss": 1.0088, + "step": 33110 + }, + { + "epoch": 0.03, + "learning_rate": 4.986565403487123e-05, + "loss": 0.9251, + "step": 33111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865645902579825e-05, + "loss": 0.877, + "step": 33112 + }, + { + "epoch": 0.03, + "learning_rate": 4.986563777004296e-05, + "loss": 0.831, + "step": 33113 + }, + { + "epoch": 0.03, + "learning_rate": 4.986562963726062e-05, + "loss": 0.9163, + "step": 33114 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865621504232835e-05, + "loss": 0.9736, + "step": 33115 + }, + { + "epoch": 0.03, + "learning_rate": 4.986561337095958e-05, + "loss": 0.6199, + "step": 33116 + }, + { + "epoch": 0.03, + "learning_rate": 4.986560523744086e-05, + "loss": 1.1655, + "step": 33117 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865597103676674e-05, + "loss": 0.7523, + "step": 33118 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865588969667034e-05, + "loss": 0.9462, + "step": 33119 + }, + { + "epoch": 0.03, + "learning_rate": 4.986558083541193e-05, + "loss": 0.9223, + "step": 33120 + }, + { + "epoch": 0.03, + "learning_rate": 4.986557270091136e-05, + "loss": 1.1487, + "step": 33121 + }, + { + "epoch": 0.03, + "learning_rate": 4.986556456616533e-05, + "loss": 1.2566, + "step": 33122 + }, + { + "epoch": 0.03, + "learning_rate": 4.986555643117383e-05, + "loss": 1.2188, + "step": 33123 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865548295936876e-05, + "loss": 0.9495, + "step": 33124 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865540160454456e-05, + "loss": 1.3351, + "step": 33125 + }, + { + "epoch": 0.03, + "learning_rate": 4.986553202472657e-05, + "loss": 0.9941, + "step": 33126 + }, + { + "epoch": 0.03, + "learning_rate": 4.986552388875323e-05, + "loss": 0.9929, + "step": 33127 + }, + { + "epoch": 0.03, + "learning_rate": 4.986551575253443e-05, + "loss": 0.9276, + "step": 33128 + }, + { + "epoch": 0.03, + "learning_rate": 4.986550761607016e-05, + "loss": 1.231, + "step": 33129 + }, + { + "epoch": 0.03, + "learning_rate": 4.986549947936043e-05, + "loss": 1.0074, + "step": 33130 + }, + { + "epoch": 0.03, + "learning_rate": 4.986549134240524e-05, + "loss": 0.8524, + "step": 33131 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865483205204586e-05, + "loss": 0.8722, + "step": 33132 + }, + { + "epoch": 0.03, + "learning_rate": 4.986547506775847e-05, + "loss": 0.7019, + "step": 33133 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865466930066895e-05, + "loss": 0.7256, + "step": 33134 + }, + { + "epoch": 0.03, + "learning_rate": 4.986545879212986e-05, + "loss": 0.9898, + "step": 33135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865450653947364e-05, + "loss": 1.3975, + "step": 33136 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865442515519404e-05, + "loss": 0.775, + "step": 33137 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865434376845987e-05, + "loss": 0.9602, + "step": 33138 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865426237927106e-05, + "loss": 0.9798, + "step": 33139 + }, + { + "epoch": 0.03, + "learning_rate": 4.986541809876276e-05, + "loss": 0.8368, + "step": 33140 + }, + { + "epoch": 0.03, + "learning_rate": 4.986540995935296e-05, + "loss": 0.6477, + "step": 33141 + }, + { + "epoch": 0.03, + "learning_rate": 4.986540181969769e-05, + "loss": 0.5584, + "step": 33142 + }, + { + "epoch": 0.03, + "learning_rate": 4.986539367979697e-05, + "loss": 0.8834, + "step": 33143 + }, + { + "epoch": 0.03, + "learning_rate": 4.986538553965078e-05, + "loss": 0.8545, + "step": 33144 + }, + { + "epoch": 0.03, + "learning_rate": 4.986537739925914e-05, + "loss": 0.7762, + "step": 33145 + }, + { + "epoch": 0.03, + "learning_rate": 4.986536925862203e-05, + "loss": 1.3972, + "step": 33146 + }, + { + "epoch": 0.03, + "learning_rate": 4.986536111773946e-05, + "loss": 1.3655, + "step": 33147 + }, + { + "epoch": 0.03, + "learning_rate": 4.986535297661143e-05, + "loss": 0.8172, + "step": 33148 + }, + { + "epoch": 0.03, + "learning_rate": 4.986534483523795e-05, + "loss": 0.5255, + "step": 33149 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865336693619e-05, + "loss": 0.8313, + "step": 33150 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865328551754594e-05, + "loss": 1.1157, + "step": 33151 + }, + { + "epoch": 0.03, + "learning_rate": 4.986532040964472e-05, + "loss": 1.0486, + "step": 33152 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865312267289396e-05, + "loss": 0.7746, + "step": 33153 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865304124688606e-05, + "loss": 0.8486, + "step": 33154 + }, + { + "epoch": 0.03, + "learning_rate": 4.986529598184236e-05, + "loss": 1.0784, + "step": 33155 + }, + { + "epoch": 0.03, + "learning_rate": 4.986528783875065e-05, + "loss": 1.3022, + "step": 33156 + }, + { + "epoch": 0.03, + "learning_rate": 4.986527969541348e-05, + "loss": 0.8704, + "step": 33157 + }, + { + "epoch": 0.03, + "learning_rate": 4.986527155183086e-05, + "loss": 1.0591, + "step": 33158 + }, + { + "epoch": 0.03, + "learning_rate": 4.986526340800277e-05, + "loss": 1.0008, + "step": 33159 + }, + { + "epoch": 0.03, + "learning_rate": 4.986525526392922e-05, + "loss": 1.1886, + "step": 33160 + }, + { + "epoch": 0.03, + "learning_rate": 4.986524711961022e-05, + "loss": 0.9948, + "step": 33161 + }, + { + "epoch": 0.03, + "learning_rate": 4.986523897504576e-05, + "loss": 0.9535, + "step": 33162 + }, + { + "epoch": 0.03, + "learning_rate": 4.986523083023583e-05, + "loss": 0.9846, + "step": 33163 + }, + { + "epoch": 0.03, + "learning_rate": 4.986522268518045e-05, + "loss": 0.9877, + "step": 33164 + }, + { + "epoch": 0.03, + "learning_rate": 4.986521453987961e-05, + "loss": 1.1791, + "step": 33165 + }, + { + "epoch": 0.03, + "learning_rate": 4.986520639433331e-05, + "loss": 0.7762, + "step": 33166 + }, + { + "epoch": 0.03, + "learning_rate": 4.986519824854154e-05, + "loss": 1.0274, + "step": 33167 + }, + { + "epoch": 0.03, + "learning_rate": 4.986519010250433e-05, + "loss": 1.1417, + "step": 33168 + }, + { + "epoch": 0.03, + "learning_rate": 4.986518195622165e-05, + "loss": 0.8603, + "step": 33169 + }, + { + "epoch": 0.03, + "learning_rate": 4.986517380969352e-05, + "loss": 0.4588, + "step": 33170 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865165662919924e-05, + "loss": 0.8191, + "step": 33171 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865157515900874e-05, + "loss": 0.6148, + "step": 33172 + }, + { + "epoch": 0.03, + "learning_rate": 4.986514936863636e-05, + "loss": 1.1422, + "step": 33173 + }, + { + "epoch": 0.03, + "learning_rate": 4.986514122112639e-05, + "loss": 0.7298, + "step": 33174 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865133073370965e-05, + "loss": 0.7888, + "step": 33175 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865124925370075e-05, + "loss": 0.8774, + "step": 33176 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865116777123735e-05, + "loss": 0.8949, + "step": 33177 + }, + { + "epoch": 0.03, + "learning_rate": 4.986510862863193e-05, + "loss": 0.9649, + "step": 33178 + }, + { + "epoch": 0.03, + "learning_rate": 4.986510047989468e-05, + "loss": 1.0939, + "step": 33179 + }, + { + "epoch": 0.03, + "learning_rate": 4.986509233091196e-05, + "loss": 0.8991, + "step": 33180 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865084181683784e-05, + "loss": 1.1394, + "step": 33181 + }, + { + "epoch": 0.03, + "learning_rate": 4.986507603221015e-05, + "loss": 1.3993, + "step": 33182 + }, + { + "epoch": 0.03, + "learning_rate": 4.986506788249106e-05, + "loss": 0.9681, + "step": 33183 + }, + { + "epoch": 0.03, + "learning_rate": 4.986505973252651e-05, + "loss": 0.6848, + "step": 33184 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865051582316514e-05, + "loss": 0.4539, + "step": 33185 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865043431861045e-05, + "loss": 0.8712, + "step": 33186 + }, + { + "epoch": 0.03, + "learning_rate": 4.9865035281160126e-05, + "loss": 0.9651, + "step": 33187 + }, + { + "epoch": 0.03, + "learning_rate": 4.986502713021375e-05, + "loss": 1.1486, + "step": 33188 + }, + { + "epoch": 0.03, + "learning_rate": 4.986501897902192e-05, + "loss": 1.1465, + "step": 33189 + }, + { + "epoch": 0.03, + "learning_rate": 4.986501082758462e-05, + "loss": 0.7702, + "step": 33190 + }, + { + "epoch": 0.03, + "learning_rate": 4.986500267590188e-05, + "loss": 1.0391, + "step": 33191 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864994523973675e-05, + "loss": 1.1194, + "step": 33192 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864986371800016e-05, + "loss": 0.559, + "step": 33193 + }, + { + "epoch": 0.03, + "learning_rate": 4.98649782193809e-05, + "loss": 0.8596, + "step": 33194 + }, + { + "epoch": 0.03, + "learning_rate": 4.986497006671632e-05, + "loss": 1.1285, + "step": 33195 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864961913806294e-05, + "loss": 0.8833, + "step": 33196 + }, + { + "epoch": 0.03, + "learning_rate": 4.986495376065081e-05, + "loss": 0.8909, + "step": 33197 + }, + { + "epoch": 0.03, + "learning_rate": 4.986494560724986e-05, + "loss": 1.0831, + "step": 33198 + }, + { + "epoch": 0.03, + "learning_rate": 4.986493745360347e-05, + "loss": 1.073, + "step": 33199 + }, + { + "epoch": 0.03, + "learning_rate": 4.986492929971161e-05, + "loss": 1.0871, + "step": 33200 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864921145574296e-05, + "loss": 1.0656, + "step": 33201 + }, + { + "epoch": 0.03, + "learning_rate": 4.986491299119153e-05, + "loss": 0.869, + "step": 33202 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864904836563303e-05, + "loss": 1.1164, + "step": 33203 + }, + { + "epoch": 0.03, + "learning_rate": 4.986489668168962e-05, + "loss": 0.9311, + "step": 33204 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864888526570484e-05, + "loss": 0.9337, + "step": 33205 + }, + { + "epoch": 0.03, + "learning_rate": 4.98648803712059e-05, + "loss": 0.9319, + "step": 33206 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864872215595846e-05, + "loss": 0.8306, + "step": 33207 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864864059740345e-05, + "loss": 0.9204, + "step": 33208 + }, + { + "epoch": 0.03, + "learning_rate": 4.986485590363939e-05, + "loss": 0.8528, + "step": 33209 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864847747292974e-05, + "loss": 1.3852, + "step": 33210 + }, + { + "epoch": 0.03, + "learning_rate": 4.986483959070111e-05, + "loss": 0.8442, + "step": 33211 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864831433863776e-05, + "loss": 0.7281, + "step": 33212 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864823276781006e-05, + "loss": 1.1583, + "step": 33213 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864815119452766e-05, + "loss": 1.1781, + "step": 33214 + }, + { + "epoch": 0.03, + "learning_rate": 4.986480696187908e-05, + "loss": 0.7323, + "step": 33215 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864798804059935e-05, + "loss": 1.0798, + "step": 33216 + }, + { + "epoch": 0.03, + "learning_rate": 4.986479064599534e-05, + "loss": 1.0151, + "step": 33217 + }, + { + "epoch": 0.03, + "learning_rate": 4.986478248768528e-05, + "loss": 1.2913, + "step": 33218 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864774329129776e-05, + "loss": 1.2798, + "step": 33219 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864766170328816e-05, + "loss": 0.8936, + "step": 33220 + }, + { + "epoch": 0.03, + "learning_rate": 4.98647580112824e-05, + "loss": 0.5811, + "step": 33221 + }, + { + "epoch": 0.03, + "learning_rate": 4.986474985199052e-05, + "loss": 0.6489, + "step": 33222 + }, + { + "epoch": 0.03, + "learning_rate": 4.98647416924532e-05, + "loss": 0.988, + "step": 33223 + }, + { + "epoch": 0.03, + "learning_rate": 4.986473353267042e-05, + "loss": 0.6386, + "step": 33224 + }, + { + "epoch": 0.03, + "learning_rate": 4.986472537264218e-05, + "loss": 0.7838, + "step": 33225 + }, + { + "epoch": 0.03, + "learning_rate": 4.98647172123685e-05, + "loss": 0.8724, + "step": 33226 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864709051849355e-05, + "loss": 0.7571, + "step": 33227 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864700891084756e-05, + "loss": 0.9087, + "step": 33228 + }, + { + "epoch": 0.03, + "learning_rate": 4.98646927300747e-05, + "loss": 0.8928, + "step": 33229 + }, + { + "epoch": 0.03, + "learning_rate": 4.98646845688192e-05, + "loss": 0.9023, + "step": 33230 + }, + { + "epoch": 0.03, + "learning_rate": 4.986467640731825e-05, + "loss": 1.2251, + "step": 33231 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864668245571836e-05, + "loss": 0.9686, + "step": 33232 + }, + { + "epoch": 0.03, + "learning_rate": 4.986466008357997e-05, + "loss": 1.1483, + "step": 33233 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864651921342656e-05, + "loss": 0.4775, + "step": 33234 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864643758859875e-05, + "loss": 0.7181, + "step": 33235 + }, + { + "epoch": 0.03, + "learning_rate": 4.986463559613165e-05, + "loss": 0.9432, + "step": 33236 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864627433157976e-05, + "loss": 1.0283, + "step": 33237 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864619269938845e-05, + "loss": 0.9469, + "step": 33238 + }, + { + "epoch": 0.03, + "learning_rate": 4.986461110647426e-05, + "loss": 0.9334, + "step": 33239 + }, + { + "epoch": 0.03, + "learning_rate": 4.986460294276422e-05, + "loss": 0.9711, + "step": 33240 + }, + { + "epoch": 0.03, + "learning_rate": 4.986459477880873e-05, + "loss": 0.9848, + "step": 33241 + }, + { + "epoch": 0.03, + "learning_rate": 4.986458661460779e-05, + "loss": 0.908, + "step": 33242 + }, + { + "epoch": 0.03, + "learning_rate": 4.986457845016139e-05, + "loss": 1.0281, + "step": 33243 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864570285469546e-05, + "loss": 0.7234, + "step": 33244 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864562120532246e-05, + "loss": 0.9693, + "step": 33245 + }, + { + "epoch": 0.03, + "learning_rate": 4.986455395534949e-05, + "loss": 0.9946, + "step": 33246 + }, + { + "epoch": 0.03, + "learning_rate": 4.986454578992128e-05, + "loss": 1.405, + "step": 33247 + }, + { + "epoch": 0.03, + "learning_rate": 4.986453762424763e-05, + "loss": 1.0591, + "step": 33248 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864529458328514e-05, + "loss": 1.2111, + "step": 33249 + }, + { + "epoch": 0.03, + "learning_rate": 4.986452129216396e-05, + "loss": 0.5647, + "step": 33250 + }, + { + "epoch": 0.03, + "learning_rate": 4.986451312575394e-05, + "loss": 0.4444, + "step": 33251 + }, + { + "epoch": 0.03, + "learning_rate": 4.986450495909848e-05, + "loss": 1.0604, + "step": 33252 + }, + { + "epoch": 0.03, + "learning_rate": 4.986449679219756e-05, + "loss": 1.2643, + "step": 33253 + }, + { + "epoch": 0.03, + "learning_rate": 4.986448862505119e-05, + "loss": 0.5515, + "step": 33254 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864480457659364e-05, + "loss": 1.1929, + "step": 33255 + }, + { + "epoch": 0.03, + "learning_rate": 4.986447229002209e-05, + "loss": 1.2093, + "step": 33256 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864464122139364e-05, + "loss": 0.7555, + "step": 33257 + }, + { + "epoch": 0.03, + "learning_rate": 4.986445595401118e-05, + "loss": 0.3241, + "step": 33258 + }, + { + "epoch": 0.03, + "learning_rate": 4.986444778563756e-05, + "loss": 0.3535, + "step": 33259 + }, + { + "epoch": 0.03, + "learning_rate": 4.986443961701848e-05, + "loss": 0.2637, + "step": 33260 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864431448153946e-05, + "loss": 0.2261, + "step": 33261 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864423279043965e-05, + "loss": 0.2062, + "step": 33262 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864415109688535e-05, + "loss": 0.2562, + "step": 33263 + }, + { + "epoch": 0.03, + "learning_rate": 4.986440694008765e-05, + "loss": 0.1917, + "step": 33264 + }, + { + "epoch": 0.03, + "learning_rate": 4.986439877024132e-05, + "loss": 0.2352, + "step": 33265 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864390600149524e-05, + "loss": 0.8907, + "step": 33266 + }, + { + "epoch": 0.03, + "learning_rate": 4.986438242981229e-05, + "loss": 0.7324, + "step": 33267 + }, + { + "epoch": 0.03, + "learning_rate": 4.98643742592296e-05, + "loss": 0.994, + "step": 33268 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864366088401464e-05, + "loss": 1.0016, + "step": 33269 + }, + { + "epoch": 0.03, + "learning_rate": 4.986435791732788e-05, + "loss": 0.8314, + "step": 33270 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864349746008836e-05, + "loss": 0.7881, + "step": 33271 + }, + { + "epoch": 0.03, + "learning_rate": 4.986434157444434e-05, + "loss": 1.4851, + "step": 33272 + }, + { + "epoch": 0.03, + "learning_rate": 4.986433340263441e-05, + "loss": 1.4032, + "step": 33273 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864325230579015e-05, + "loss": 1.0758, + "step": 33274 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864317058278174e-05, + "loss": 0.9204, + "step": 33275 + }, + { + "epoch": 0.03, + "learning_rate": 4.986430888573189e-05, + "loss": 0.9876, + "step": 33276 + }, + { + "epoch": 0.03, + "learning_rate": 4.986430071294015e-05, + "loss": 1.2784, + "step": 33277 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864292539902956e-05, + "loss": 1.1692, + "step": 33278 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864284366620314e-05, + "loss": 1.1482, + "step": 33279 + }, + { + "epoch": 0.03, + "learning_rate": 4.986427619309223e-05, + "loss": 1.2649, + "step": 33280 + }, + { + "epoch": 0.03, + "learning_rate": 4.986426801931868e-05, + "loss": 1.1967, + "step": 33281 + }, + { + "epoch": 0.03, + "learning_rate": 4.98642598452997e-05, + "loss": 1.1917, + "step": 33282 + }, + { + "epoch": 0.03, + "learning_rate": 4.986425167103526e-05, + "loss": 1.0152, + "step": 33283 + }, + { + "epoch": 0.03, + "learning_rate": 4.986424349652537e-05, + "loss": 1.2074, + "step": 33284 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864235321770034e-05, + "loss": 1.0559, + "step": 33285 + }, + { + "epoch": 0.03, + "learning_rate": 4.986422714676925e-05, + "loss": 0.9479, + "step": 33286 + }, + { + "epoch": 0.03, + "learning_rate": 4.986421897152301e-05, + "loss": 1.3159, + "step": 33287 + }, + { + "epoch": 0.03, + "learning_rate": 4.986421079603133e-05, + "loss": 1.0437, + "step": 33288 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864202620294196e-05, + "loss": 0.899, + "step": 33289 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864194444311606e-05, + "loss": 1.1748, + "step": 33290 + }, + { + "epoch": 0.03, + "learning_rate": 4.986418626808358e-05, + "loss": 1.0876, + "step": 33291 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864178091610106e-05, + "loss": 1.0943, + "step": 33292 + }, + { + "epoch": 0.03, + "learning_rate": 4.986416991489117e-05, + "loss": 1.0308, + "step": 33293 + }, + { + "epoch": 0.03, + "learning_rate": 4.98641617379268e-05, + "loss": 1.0171, + "step": 33294 + }, + { + "epoch": 0.03, + "learning_rate": 4.986415356071697e-05, + "loss": 1.0481, + "step": 33295 + }, + { + "epoch": 0.03, + "learning_rate": 4.986414538326169e-05, + "loss": 1.0538, + "step": 33296 + }, + { + "epoch": 0.03, + "learning_rate": 4.986413720556097e-05, + "loss": 0.9384, + "step": 33297 + }, + { + "epoch": 0.03, + "learning_rate": 4.98641290276148e-05, + "loss": 0.5404, + "step": 33298 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864120849423177e-05, + "loss": 0.7994, + "step": 33299 + }, + { + "epoch": 0.03, + "learning_rate": 4.986411267098611e-05, + "loss": 0.7401, + "step": 33300 + }, + { + "epoch": 0.03, + "learning_rate": 4.98641044923036e-05, + "loss": 1.0383, + "step": 33301 + }, + { + "epoch": 0.03, + "learning_rate": 4.986409631337563e-05, + "loss": 0.531, + "step": 33302 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864088134202216e-05, + "loss": 0.9411, + "step": 33303 + }, + { + "epoch": 0.03, + "learning_rate": 4.986407995478336e-05, + "loss": 1.3836, + "step": 33304 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864071775119054e-05, + "loss": 1.0159, + "step": 33305 + }, + { + "epoch": 0.03, + "learning_rate": 4.98640635952093e-05, + "loss": 1.0094, + "step": 33306 + }, + { + "epoch": 0.03, + "learning_rate": 4.98640554150541e-05, + "loss": 0.8566, + "step": 33307 + }, + { + "epoch": 0.03, + "learning_rate": 4.986404723465345e-05, + "loss": 0.7517, + "step": 33308 + }, + { + "epoch": 0.03, + "learning_rate": 4.986403905400735e-05, + "loss": 0.688, + "step": 33309 + }, + { + "epoch": 0.03, + "learning_rate": 4.986403087311581e-05, + "loss": 0.4377, + "step": 33310 + }, + { + "epoch": 0.03, + "learning_rate": 4.986402269197882e-05, + "loss": 0.4737, + "step": 33311 + }, + { + "epoch": 0.03, + "learning_rate": 4.986401451059638e-05, + "loss": 0.675, + "step": 33312 + }, + { + "epoch": 0.03, + "learning_rate": 4.986400632896849e-05, + "loss": 1.0879, + "step": 33313 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863998147095156e-05, + "loss": 1.1017, + "step": 33314 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863989964976375e-05, + "loss": 1.2489, + "step": 33315 + }, + { + "epoch": 0.03, + "learning_rate": 4.986398178261215e-05, + "loss": 1.0957, + "step": 33316 + }, + { + "epoch": 0.03, + "learning_rate": 4.986397360000248e-05, + "loss": 0.6815, + "step": 33317 + }, + { + "epoch": 0.03, + "learning_rate": 4.986396541714736e-05, + "loss": 0.751, + "step": 33318 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863957234046795e-05, + "loss": 0.2276, + "step": 33319 + }, + { + "epoch": 0.03, + "learning_rate": 4.986394905070077e-05, + "loss": 0.2688, + "step": 33320 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863940867109314e-05, + "loss": 0.1528, + "step": 33321 + }, + { + "epoch": 0.03, + "learning_rate": 4.986393268327241e-05, + "loss": 0.2781, + "step": 33322 + }, + { + "epoch": 0.03, + "learning_rate": 4.986392449919006e-05, + "loss": 0.2825, + "step": 33323 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863916314862264e-05, + "loss": 0.2995, + "step": 33324 + }, + { + "epoch": 0.03, + "learning_rate": 4.986390813028902e-05, + "loss": 0.5413, + "step": 33325 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863899945470325e-05, + "loss": 1.2495, + "step": 33326 + }, + { + "epoch": 0.03, + "learning_rate": 4.986389176040619e-05, + "loss": 1.3056, + "step": 33327 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863883575096606e-05, + "loss": 1.4484, + "step": 33328 + }, + { + "epoch": 0.03, + "learning_rate": 4.986387538954158e-05, + "loss": 0.6052, + "step": 33329 + }, + { + "epoch": 0.03, + "learning_rate": 4.986386720374111e-05, + "loss": 0.7994, + "step": 33330 + }, + { + "epoch": 0.03, + "learning_rate": 4.986385901769519e-05, + "loss": 0.437, + "step": 33331 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863850831403826e-05, + "loss": 0.4779, + "step": 33332 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863842644867016e-05, + "loss": 0.6643, + "step": 33333 + }, + { + "epoch": 0.03, + "learning_rate": 4.986383445808476e-05, + "loss": 1.0035, + "step": 33334 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863826271057055e-05, + "loss": 0.9534, + "step": 33335 + }, + { + "epoch": 0.03, + "learning_rate": 4.986381808378391e-05, + "loss": 0.8191, + "step": 33336 + }, + { + "epoch": 0.03, + "learning_rate": 4.986380989626532e-05, + "loss": 0.6711, + "step": 33337 + }, + { + "epoch": 0.03, + "learning_rate": 4.986380170850128e-05, + "loss": 0.4215, + "step": 33338 + }, + { + "epoch": 0.03, + "learning_rate": 4.98637935204918e-05, + "loss": 1.0389, + "step": 33339 + }, + { + "epoch": 0.03, + "learning_rate": 4.986378533223688e-05, + "loss": 1.3317, + "step": 33340 + }, + { + "epoch": 0.03, + "learning_rate": 4.98637771437365e-05, + "loss": 1.2977, + "step": 33341 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863768954990686e-05, + "loss": 1.4446, + "step": 33342 + }, + { + "epoch": 0.03, + "learning_rate": 4.986376076599943e-05, + "loss": 1.2198, + "step": 33343 + }, + { + "epoch": 0.03, + "learning_rate": 4.986375257676272e-05, + "loss": 0.352, + "step": 33344 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863744387280576e-05, + "loss": 0.5044, + "step": 33345 + }, + { + "epoch": 0.03, + "learning_rate": 4.986373619755298e-05, + "loss": 0.3465, + "step": 33346 + }, + { + "epoch": 0.03, + "learning_rate": 4.986372800757994e-05, + "loss": 0.424, + "step": 33347 + }, + { + "epoch": 0.03, + "learning_rate": 4.986371981736146e-05, + "loss": 0.4246, + "step": 33348 + }, + { + "epoch": 0.03, + "learning_rate": 4.986371162689753e-05, + "loss": 1.3911, + "step": 33349 + }, + { + "epoch": 0.03, + "learning_rate": 4.986370343618816e-05, + "loss": 0.9933, + "step": 33350 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863695245233346e-05, + "loss": 1.2561, + "step": 33351 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863687054033085e-05, + "loss": 1.129, + "step": 33352 + }, + { + "epoch": 0.03, + "learning_rate": 4.986367886258738e-05, + "loss": 1.0901, + "step": 33353 + }, + { + "epoch": 0.03, + "learning_rate": 4.986367067089624e-05, + "loss": 0.9852, + "step": 33354 + }, + { + "epoch": 0.03, + "learning_rate": 4.986366247895965e-05, + "loss": 0.8538, + "step": 33355 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863654286777616e-05, + "loss": 1.0462, + "step": 33356 + }, + { + "epoch": 0.03, + "learning_rate": 4.986364609435013e-05, + "loss": 0.993, + "step": 33357 + }, + { + "epoch": 0.03, + "learning_rate": 4.986363790167722e-05, + "loss": 1.0222, + "step": 33358 + }, + { + "epoch": 0.03, + "learning_rate": 4.986362970875885e-05, + "loss": 1.2745, + "step": 33359 + }, + { + "epoch": 0.03, + "learning_rate": 4.986362151559505e-05, + "loss": 1.2363, + "step": 33360 + }, + { + "epoch": 0.03, + "learning_rate": 4.98636133221858e-05, + "loss": 0.9082, + "step": 33361 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863605128531105e-05, + "loss": 0.9747, + "step": 33362 + }, + { + "epoch": 0.03, + "learning_rate": 4.986359693463097e-05, + "loss": 0.659, + "step": 33363 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863588740485386e-05, + "loss": 0.4724, + "step": 33364 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863580546094366e-05, + "loss": 0.4628, + "step": 33365 + }, + { + "epoch": 0.03, + "learning_rate": 4.98635723514579e-05, + "loss": 0.8361, + "step": 33366 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863564156575995e-05, + "loss": 0.9667, + "step": 33367 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863555961448645e-05, + "loss": 1.0612, + "step": 33368 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863547766075845e-05, + "loss": 0.8664, + "step": 33369 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863539570457616e-05, + "loss": 1.1217, + "step": 33370 + }, + { + "epoch": 0.03, + "learning_rate": 4.986353137459394e-05, + "loss": 0.9522, + "step": 33371 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863523178484815e-05, + "loss": 1.0418, + "step": 33372 + }, + { + "epoch": 0.03, + "learning_rate": 4.986351498213026e-05, + "loss": 0.6988, + "step": 33373 + }, + { + "epoch": 0.03, + "learning_rate": 4.986350678553025e-05, + "loss": 0.8279, + "step": 33374 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863498588684804e-05, + "loss": 1.2838, + "step": 33375 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863490391593917e-05, + "loss": 0.5169, + "step": 33376 + }, + { + "epoch": 0.03, + "learning_rate": 4.986348219425758e-05, + "loss": 1.1723, + "step": 33377 + }, + { + "epoch": 0.03, + "learning_rate": 4.986347399667581e-05, + "loss": 0.9671, + "step": 33378 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863465798848596e-05, + "loss": 1.2153, + "step": 33379 + }, + { + "epoch": 0.03, + "learning_rate": 4.986345760077594e-05, + "loss": 1.1858, + "step": 33380 + }, + { + "epoch": 0.03, + "learning_rate": 4.986344940245785e-05, + "loss": 0.818, + "step": 33381 + }, + { + "epoch": 0.03, + "learning_rate": 4.986344120389431e-05, + "loss": 0.3974, + "step": 33382 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863433005085326e-05, + "loss": 1.079, + "step": 33383 + }, + { + "epoch": 0.03, + "learning_rate": 4.98634248060309e-05, + "loss": 1.0841, + "step": 33384 + }, + { + "epoch": 0.03, + "learning_rate": 4.986341660673104e-05, + "loss": 0.9409, + "step": 33385 + }, + { + "epoch": 0.03, + "learning_rate": 4.986340840718574e-05, + "loss": 0.7624, + "step": 33386 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863400207394995e-05, + "loss": 0.6385, + "step": 33387 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863392007358805e-05, + "loss": 0.9201, + "step": 33388 + }, + { + "epoch": 0.03, + "learning_rate": 4.986338380707718e-05, + "loss": 0.8601, + "step": 33389 + }, + { + "epoch": 0.03, + "learning_rate": 4.986337560655011e-05, + "loss": 0.8799, + "step": 33390 + }, + { + "epoch": 0.03, + "learning_rate": 4.986336740577761e-05, + "loss": 1.323, + "step": 33391 + }, + { + "epoch": 0.03, + "learning_rate": 4.986335920475965e-05, + "loss": 0.9182, + "step": 33392 + }, + { + "epoch": 0.03, + "learning_rate": 4.986335100349626e-05, + "loss": 0.9512, + "step": 33393 + }, + { + "epoch": 0.03, + "learning_rate": 4.986334280198743e-05, + "loss": 1.08, + "step": 33394 + }, + { + "epoch": 0.03, + "learning_rate": 4.986333460023316e-05, + "loss": 0.8034, + "step": 33395 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863326398233454e-05, + "loss": 0.8425, + "step": 33396 + }, + { + "epoch": 0.03, + "learning_rate": 4.98633181959883e-05, + "loss": 0.6959, + "step": 33397 + }, + { + "epoch": 0.03, + "learning_rate": 4.986330999349771e-05, + "loss": 0.8993, + "step": 33398 + }, + { + "epoch": 0.03, + "learning_rate": 4.986330179076167e-05, + "loss": 0.8056, + "step": 33399 + }, + { + "epoch": 0.03, + "learning_rate": 4.98632935877802e-05, + "loss": 1.0296, + "step": 33400 + }, + { + "epoch": 0.03, + "learning_rate": 4.986328538455329e-05, + "loss": 1.1833, + "step": 33401 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863277181080944e-05, + "loss": 1.1059, + "step": 33402 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863268977363156e-05, + "loss": 1.2165, + "step": 33403 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863260773399924e-05, + "loss": 0.7888, + "step": 33404 + }, + { + "epoch": 0.03, + "learning_rate": 4.986325256919125e-05, + "loss": 1.0692, + "step": 33405 + }, + { + "epoch": 0.03, + "learning_rate": 4.986324436473714e-05, + "loss": 1.0684, + "step": 33406 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863236160037584e-05, + "loss": 1.6892, + "step": 33407 + }, + { + "epoch": 0.03, + "learning_rate": 4.98632279550926e-05, + "loss": 0.9303, + "step": 33408 + }, + { + "epoch": 0.03, + "learning_rate": 4.986321974990217e-05, + "loss": 0.8978, + "step": 33409 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863211544466304e-05, + "loss": 0.8035, + "step": 33410 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863203338785e-05, + "loss": 1.0848, + "step": 33411 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863195132858256e-05, + "loss": 1.2578, + "step": 33412 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863186926686064e-05, + "loss": 0.8859, + "step": 33413 + }, + { + "epoch": 0.03, + "learning_rate": 4.986317872026844e-05, + "loss": 0.993, + "step": 33414 + }, + { + "epoch": 0.03, + "learning_rate": 4.986317051360538e-05, + "loss": 0.874, + "step": 33415 + }, + { + "epoch": 0.03, + "learning_rate": 4.986316230669688e-05, + "loss": 0.9763, + "step": 33416 + }, + { + "epoch": 0.03, + "learning_rate": 4.986315409954294e-05, + "loss": 1.3388, + "step": 33417 + }, + { + "epoch": 0.03, + "learning_rate": 4.986314589214356e-05, + "loss": 1.3318, + "step": 33418 + }, + { + "epoch": 0.03, + "learning_rate": 4.986313768449874e-05, + "loss": 0.8742, + "step": 33419 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863129476608485e-05, + "loss": 0.7141, + "step": 33420 + }, + { + "epoch": 0.03, + "learning_rate": 4.986312126847279e-05, + "loss": 1.1144, + "step": 33421 + }, + { + "epoch": 0.03, + "learning_rate": 4.986311306009166e-05, + "loss": 1.2839, + "step": 33422 + }, + { + "epoch": 0.03, + "learning_rate": 4.986310485146509e-05, + "loss": 1.4322, + "step": 33423 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863096642593085e-05, + "loss": 1.0171, + "step": 33424 + }, + { + "epoch": 0.03, + "learning_rate": 4.986308843347564e-05, + "loss": 0.9977, + "step": 33425 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863080224112753e-05, + "loss": 1.0725, + "step": 33426 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863072014504433e-05, + "loss": 1.6619, + "step": 33427 + }, + { + "epoch": 0.03, + "learning_rate": 4.986306380465067e-05, + "loss": 0.915, + "step": 33428 + }, + { + "epoch": 0.03, + "learning_rate": 4.986305559455147e-05, + "loss": 1.1427, + "step": 33429 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863047384206835e-05, + "loss": 0.8539, + "step": 33430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863039173616763e-05, + "loss": 1.135, + "step": 33431 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863030962781255e-05, + "loss": 0.8316, + "step": 33432 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863022751700304e-05, + "loss": 0.7879, + "step": 33433 + }, + { + "epoch": 0.03, + "learning_rate": 4.9863014540373923e-05, + "loss": 0.5543, + "step": 33434 + }, + { + "epoch": 0.03, + "learning_rate": 4.98630063288021e-05, + "loss": 0.9223, + "step": 33435 + }, + { + "epoch": 0.03, + "learning_rate": 4.986299811698484e-05, + "loss": 0.9571, + "step": 33436 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862989904922144e-05, + "loss": 0.4349, + "step": 33437 + }, + { + "epoch": 0.03, + "learning_rate": 4.986298169261401e-05, + "loss": 0.502, + "step": 33438 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862973480060435e-05, + "loss": 0.8122, + "step": 33439 + }, + { + "epoch": 0.03, + "learning_rate": 4.986296526726143e-05, + "loss": 1.1038, + "step": 33440 + }, + { + "epoch": 0.03, + "learning_rate": 4.986295705421698e-05, + "loss": 1.0493, + "step": 33441 + }, + { + "epoch": 0.03, + "learning_rate": 4.986294884092711e-05, + "loss": 0.8828, + "step": 33442 + }, + { + "epoch": 0.03, + "learning_rate": 4.986294062739179e-05, + "loss": 1.1571, + "step": 33443 + }, + { + "epoch": 0.03, + "learning_rate": 4.986293241361104e-05, + "loss": 1.1052, + "step": 33444 + }, + { + "epoch": 0.03, + "learning_rate": 4.986292419958485e-05, + "loss": 0.9533, + "step": 33445 + }, + { + "epoch": 0.03, + "learning_rate": 4.986291598531322e-05, + "loss": 0.7456, + "step": 33446 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862907770796166e-05, + "loss": 0.621, + "step": 33447 + }, + { + "epoch": 0.03, + "learning_rate": 4.986289955603366e-05, + "loss": 0.9741, + "step": 33448 + }, + { + "epoch": 0.03, + "learning_rate": 4.986289134102573e-05, + "loss": 1.5571, + "step": 33449 + }, + { + "epoch": 0.03, + "learning_rate": 4.986288312577236e-05, + "loss": 1.0426, + "step": 33450 + }, + { + "epoch": 0.03, + "learning_rate": 4.986287491027355e-05, + "loss": 0.7291, + "step": 33451 + }, + { + "epoch": 0.03, + "learning_rate": 4.986286669452931e-05, + "loss": 0.7901, + "step": 33452 + }, + { + "epoch": 0.03, + "learning_rate": 4.986285847853963e-05, + "loss": 1.2969, + "step": 33453 + }, + { + "epoch": 0.03, + "learning_rate": 4.986285026230452e-05, + "loss": 1.3182, + "step": 33454 + }, + { + "epoch": 0.03, + "learning_rate": 4.986284204582396e-05, + "loss": 1.265, + "step": 33455 + }, + { + "epoch": 0.03, + "learning_rate": 4.986283382909799e-05, + "loss": 1.2099, + "step": 33456 + }, + { + "epoch": 0.03, + "learning_rate": 4.986282561212656e-05, + "loss": 1.2938, + "step": 33457 + }, + { + "epoch": 0.03, + "learning_rate": 4.986281739490971e-05, + "loss": 1.2396, + "step": 33458 + }, + { + "epoch": 0.03, + "learning_rate": 4.986280917744742e-05, + "loss": 0.5777, + "step": 33459 + }, + { + "epoch": 0.03, + "learning_rate": 4.986280095973969e-05, + "loss": 1.2069, + "step": 33460 + }, + { + "epoch": 0.03, + "learning_rate": 4.986279274178653e-05, + "loss": 1.4246, + "step": 33461 + }, + { + "epoch": 0.03, + "learning_rate": 4.986278452358794e-05, + "loss": 1.3444, + "step": 33462 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862776305143905e-05, + "loss": 1.1258, + "step": 33463 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862768086454436e-05, + "loss": 1.3033, + "step": 33464 + }, + { + "epoch": 0.03, + "learning_rate": 4.986275986751954e-05, + "loss": 0.9861, + "step": 33465 + }, + { + "epoch": 0.03, + "learning_rate": 4.98627516483392e-05, + "loss": 0.9587, + "step": 33466 + }, + { + "epoch": 0.03, + "learning_rate": 4.986274342891343e-05, + "loss": 1.0021, + "step": 33467 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862735209242226e-05, + "loss": 0.9748, + "step": 33468 + }, + { + "epoch": 0.03, + "learning_rate": 4.986272698932559e-05, + "loss": 0.9272, + "step": 33469 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862718769163516e-05, + "loss": 0.736, + "step": 33470 + }, + { + "epoch": 0.03, + "learning_rate": 4.986271054875601e-05, + "loss": 1.1809, + "step": 33471 + }, + { + "epoch": 0.03, + "learning_rate": 4.986270232810307e-05, + "loss": 1.337, + "step": 33472 + }, + { + "epoch": 0.03, + "learning_rate": 4.986269410720469e-05, + "loss": 0.9071, + "step": 33473 + }, + { + "epoch": 0.03, + "learning_rate": 4.986268588606088e-05, + "loss": 1.0152, + "step": 33474 + }, + { + "epoch": 0.03, + "learning_rate": 4.986267766467164e-05, + "loss": 1.0829, + "step": 33475 + }, + { + "epoch": 0.03, + "learning_rate": 4.986266944303696e-05, + "loss": 0.8169, + "step": 33476 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862661221156846e-05, + "loss": 1.0335, + "step": 33477 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862652999031304e-05, + "loss": 1.1123, + "step": 33478 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862644776660325e-05, + "loss": 0.6938, + "step": 33479 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862636554043916e-05, + "loss": 1.1105, + "step": 33480 + }, + { + "epoch": 0.03, + "learning_rate": 4.986262833118207e-05, + "loss": 0.906, + "step": 33481 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862620108074784e-05, + "loss": 1.0405, + "step": 33482 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862611884722074e-05, + "loss": 1.1375, + "step": 33483 + }, + { + "epoch": 0.03, + "learning_rate": 4.986260366112393e-05, + "loss": 0.6894, + "step": 33484 + }, + { + "epoch": 0.03, + "learning_rate": 4.986259543728035e-05, + "loss": 0.974, + "step": 33485 + }, + { + "epoch": 0.03, + "learning_rate": 4.986258721319134e-05, + "loss": 1.0184, + "step": 33486 + }, + { + "epoch": 0.03, + "learning_rate": 4.98625789888569e-05, + "loss": 1.1276, + "step": 33487 + }, + { + "epoch": 0.03, + "learning_rate": 4.986257076427702e-05, + "loss": 0.9802, + "step": 33488 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862562539451705e-05, + "loss": 0.9898, + "step": 33489 + }, + { + "epoch": 0.03, + "learning_rate": 4.986255431438096e-05, + "loss": 1.0122, + "step": 33490 + }, + { + "epoch": 0.03, + "learning_rate": 4.986254608906479e-05, + "loss": 1.4325, + "step": 33491 + }, + { + "epoch": 0.03, + "learning_rate": 4.986253786350318e-05, + "loss": 1.2853, + "step": 33492 + }, + { + "epoch": 0.03, + "learning_rate": 4.986252963769614e-05, + "loss": 1.1527, + "step": 33493 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862521411643663e-05, + "loss": 1.145, + "step": 33494 + }, + { + "epoch": 0.03, + "learning_rate": 4.986251318534576e-05, + "loss": 1.0031, + "step": 33495 + }, + { + "epoch": 0.03, + "learning_rate": 4.986250495880242e-05, + "loss": 0.7876, + "step": 33496 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862496732013654e-05, + "loss": 1.0562, + "step": 33497 + }, + { + "epoch": 0.03, + "learning_rate": 4.986248850497945e-05, + "loss": 1.0032, + "step": 33498 + }, + { + "epoch": 0.03, + "learning_rate": 4.986248027769981e-05, + "loss": 0.8443, + "step": 33499 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862472050174745e-05, + "loss": 0.3432, + "step": 33500 + }, + { + "epoch": 0.03, + "eval_loss": 1.0758426189422607, + "eval_runtime": 125.6818, + "eval_samples_per_second": 11.02, + "eval_steps_per_second": 5.514, + "step": 33500 + }, + { + "epoch": 0.03, + "learning_rate": 4.986246382240425e-05, + "loss": 0.7522, + "step": 33501 + }, + { + "epoch": 0.03, + "learning_rate": 4.986245559438832e-05, + "loss": 1.1444, + "step": 33502 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862447366126956e-05, + "loss": 1.1291, + "step": 33503 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862439137620166e-05, + "loss": 1.1523, + "step": 33504 + }, + { + "epoch": 0.03, + "learning_rate": 4.986243090886794e-05, + "loss": 1.1179, + "step": 33505 + }, + { + "epoch": 0.03, + "learning_rate": 4.986242267987028e-05, + "loss": 1.1741, + "step": 33506 + }, + { + "epoch": 0.03, + "learning_rate": 4.986241445062719e-05, + "loss": 1.1039, + "step": 33507 + }, + { + "epoch": 0.03, + "learning_rate": 4.986240622113868e-05, + "loss": 1.1525, + "step": 33508 + }, + { + "epoch": 0.03, + "learning_rate": 4.986239799140473e-05, + "loss": 1.0584, + "step": 33509 + }, + { + "epoch": 0.03, + "learning_rate": 4.986238976142534e-05, + "loss": 1.0825, + "step": 33510 + }, + { + "epoch": 0.03, + "learning_rate": 4.986238153120053e-05, + "loss": 1.3499, + "step": 33511 + }, + { + "epoch": 0.03, + "learning_rate": 4.986237330073029e-05, + "loss": 1.0896, + "step": 33512 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862365070014615e-05, + "loss": 1.0143, + "step": 33513 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862356839053516e-05, + "loss": 0.9727, + "step": 33514 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862348607846974e-05, + "loss": 0.8683, + "step": 33515 + }, + { + "epoch": 0.03, + "learning_rate": 4.986234037639501e-05, + "loss": 1.1061, + "step": 33516 + }, + { + "epoch": 0.03, + "learning_rate": 4.986233214469761e-05, + "loss": 1.0572, + "step": 33517 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862323912754784e-05, + "loss": 0.8339, + "step": 33518 + }, + { + "epoch": 0.03, + "learning_rate": 4.986231568056653e-05, + "loss": 0.9006, + "step": 33519 + }, + { + "epoch": 0.03, + "learning_rate": 4.986230744813284e-05, + "loss": 0.9088, + "step": 33520 + }, + { + "epoch": 0.03, + "learning_rate": 4.986229921545372e-05, + "loss": 1.1302, + "step": 33521 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862290982529174e-05, + "loss": 1.5188, + "step": 33522 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862282749359196e-05, + "loss": 0.9108, + "step": 33523 + }, + { + "epoch": 0.03, + "learning_rate": 4.986227451594379e-05, + "loss": 1.0158, + "step": 33524 + }, + { + "epoch": 0.03, + "learning_rate": 4.986226628228295e-05, + "loss": 1.0775, + "step": 33525 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862258048376685e-05, + "loss": 1.3584, + "step": 33526 + }, + { + "epoch": 0.03, + "learning_rate": 4.986224981422498e-05, + "loss": 1.2812, + "step": 33527 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862241579827856e-05, + "loss": 0.9884, + "step": 33528 + }, + { + "epoch": 0.03, + "learning_rate": 4.98622333451853e-05, + "loss": 0.7482, + "step": 33529 + }, + { + "epoch": 0.03, + "learning_rate": 4.986222511029731e-05, + "loss": 0.8926, + "step": 33530 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862216875163896e-05, + "loss": 1.2694, + "step": 33531 + }, + { + "epoch": 0.03, + "learning_rate": 4.986220863978506e-05, + "loss": 1.1749, + "step": 33532 + }, + { + "epoch": 0.03, + "learning_rate": 4.986220040416078e-05, + "loss": 1.0435, + "step": 33533 + }, + { + "epoch": 0.03, + "learning_rate": 4.986219216829108e-05, + "loss": 0.8246, + "step": 33534 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862183932175946e-05, + "loss": 1.0531, + "step": 33535 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862175695815384e-05, + "loss": 0.8352, + "step": 33536 + }, + { + "epoch": 0.03, + "learning_rate": 4.986216745920939e-05, + "loss": 1.0182, + "step": 33537 + }, + { + "epoch": 0.03, + "learning_rate": 4.986215922235797e-05, + "loss": 1.0589, + "step": 33538 + }, + { + "epoch": 0.03, + "learning_rate": 4.986215098526112e-05, + "loss": 1.0467, + "step": 33539 + }, + { + "epoch": 0.03, + "learning_rate": 4.986214274791885e-05, + "loss": 1.1889, + "step": 33540 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862134510331135e-05, + "loss": 1.0346, + "step": 33541 + }, + { + "epoch": 0.03, + "learning_rate": 4.986212627249801e-05, + "loss": 1.0149, + "step": 33542 + }, + { + "epoch": 0.03, + "learning_rate": 4.986211803441945e-05, + "loss": 0.8937, + "step": 33543 + }, + { + "epoch": 0.03, + "learning_rate": 4.986210979609546e-05, + "loss": 1.255, + "step": 33544 + }, + { + "epoch": 0.03, + "learning_rate": 4.986210155752604e-05, + "loss": 1.2988, + "step": 33545 + }, + { + "epoch": 0.03, + "learning_rate": 4.986209331871119e-05, + "loss": 1.1519, + "step": 33546 + }, + { + "epoch": 0.03, + "learning_rate": 4.986208507965091e-05, + "loss": 0.9646, + "step": 33547 + }, + { + "epoch": 0.03, + "learning_rate": 4.986207684034521e-05, + "loss": 0.9045, + "step": 33548 + }, + { + "epoch": 0.03, + "learning_rate": 4.986206860079408e-05, + "loss": 1.1882, + "step": 33549 + }, + { + "epoch": 0.03, + "learning_rate": 4.986206036099752e-05, + "loss": 0.7448, + "step": 33550 + }, + { + "epoch": 0.03, + "learning_rate": 4.986205212095553e-05, + "loss": 0.9056, + "step": 33551 + }, + { + "epoch": 0.03, + "learning_rate": 4.986204388066812e-05, + "loss": 0.9749, + "step": 33552 + }, + { + "epoch": 0.03, + "learning_rate": 4.986203564013528e-05, + "loss": 0.8442, + "step": 33553 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862027399357005e-05, + "loss": 0.8127, + "step": 33554 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862019158333304e-05, + "loss": 0.9743, + "step": 33555 + }, + { + "epoch": 0.03, + "learning_rate": 4.986201091706419e-05, + "loss": 0.8844, + "step": 33556 + }, + { + "epoch": 0.03, + "learning_rate": 4.986200267554963e-05, + "loss": 1.0277, + "step": 33557 + }, + { + "epoch": 0.03, + "learning_rate": 4.986199443378965e-05, + "loss": 1.0833, + "step": 33558 + }, + { + "epoch": 0.03, + "learning_rate": 4.986198619178425e-05, + "loss": 0.9822, + "step": 33559 + }, + { + "epoch": 0.03, + "learning_rate": 4.986197794953341e-05, + "loss": 0.6217, + "step": 33560 + }, + { + "epoch": 0.03, + "learning_rate": 4.986196970703715e-05, + "loss": 1.5138, + "step": 33561 + }, + { + "epoch": 0.03, + "learning_rate": 4.986196146429546e-05, + "loss": 0.9536, + "step": 33562 + }, + { + "epoch": 0.03, + "learning_rate": 4.986195322130835e-05, + "loss": 0.7199, + "step": 33563 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861944978075813e-05, + "loss": 0.9798, + "step": 33564 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861936734597844e-05, + "loss": 1.1139, + "step": 33565 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861928490874446e-05, + "loss": 1.0247, + "step": 33566 + }, + { + "epoch": 0.03, + "learning_rate": 4.986192024690563e-05, + "loss": 0.9689, + "step": 33567 + }, + { + "epoch": 0.03, + "learning_rate": 4.986191200269138e-05, + "loss": 0.8621, + "step": 33568 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861903758231706e-05, + "loss": 0.9795, + "step": 33569 + }, + { + "epoch": 0.03, + "learning_rate": 4.986189551352661e-05, + "loss": 1.3599, + "step": 33570 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861887268576084e-05, + "loss": 1.2815, + "step": 33571 + }, + { + "epoch": 0.03, + "learning_rate": 4.986187902338013e-05, + "loss": 1.0573, + "step": 33572 + }, + { + "epoch": 0.03, + "learning_rate": 4.986187077793876e-05, + "loss": 1.1127, + "step": 33573 + }, + { + "epoch": 0.03, + "learning_rate": 4.986186253225195e-05, + "loss": 0.8499, + "step": 33574 + }, + { + "epoch": 0.03, + "learning_rate": 4.986185428631972e-05, + "loss": 0.946, + "step": 33575 + }, + { + "epoch": 0.03, + "learning_rate": 4.986184604014207e-05, + "loss": 0.6648, + "step": 33576 + }, + { + "epoch": 0.03, + "learning_rate": 4.986183779371899e-05, + "loss": 0.6509, + "step": 33577 + }, + { + "epoch": 0.03, + "learning_rate": 4.986182954705048e-05, + "loss": 0.8923, + "step": 33578 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861821300136545e-05, + "loss": 0.8871, + "step": 33579 + }, + { + "epoch": 0.03, + "learning_rate": 4.986181305297719e-05, + "loss": 1.0712, + "step": 33580 + }, + { + "epoch": 0.03, + "learning_rate": 4.986180480557241e-05, + "loss": 1.0254, + "step": 33581 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861796557922205e-05, + "loss": 1.2745, + "step": 33582 + }, + { + "epoch": 0.03, + "learning_rate": 4.986178831002657e-05, + "loss": 1.1973, + "step": 33583 + }, + { + "epoch": 0.03, + "learning_rate": 4.986178006188551e-05, + "loss": 1.1043, + "step": 33584 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861771813499026e-05, + "loss": 0.8993, + "step": 33585 + }, + { + "epoch": 0.03, + "learning_rate": 4.986176356486712e-05, + "loss": 0.7855, + "step": 33586 + }, + { + "epoch": 0.03, + "learning_rate": 4.986175531598979e-05, + "loss": 0.9223, + "step": 33587 + }, + { + "epoch": 0.03, + "learning_rate": 4.986174706686702e-05, + "loss": 0.8804, + "step": 33588 + }, + { + "epoch": 0.03, + "learning_rate": 4.986173881749885e-05, + "loss": 0.9589, + "step": 33589 + }, + { + "epoch": 0.03, + "learning_rate": 4.986173056788524e-05, + "loss": 1.0757, + "step": 33590 + }, + { + "epoch": 0.03, + "learning_rate": 4.98617223180262e-05, + "loss": 1.0918, + "step": 33591 + }, + { + "epoch": 0.03, + "learning_rate": 4.986171406792175e-05, + "loss": 1.1094, + "step": 33592 + }, + { + "epoch": 0.03, + "learning_rate": 4.986170581757187e-05, + "loss": 0.906, + "step": 33593 + }, + { + "epoch": 0.03, + "learning_rate": 4.986169756697657e-05, + "loss": 0.907, + "step": 33594 + }, + { + "epoch": 0.03, + "learning_rate": 4.986168931613584e-05, + "loss": 1.0866, + "step": 33595 + }, + { + "epoch": 0.03, + "learning_rate": 4.986168106504968e-05, + "loss": 0.9455, + "step": 33596 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861672813718105e-05, + "loss": 1.0826, + "step": 33597 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861664562141105e-05, + "loss": 0.7657, + "step": 33598 + }, + { + "epoch": 0.03, + "learning_rate": 4.986165631031868e-05, + "loss": 0.7774, + "step": 33599 + }, + { + "epoch": 0.03, + "learning_rate": 4.986164805825083e-05, + "loss": 0.777, + "step": 33600 + }, + { + "epoch": 0.03, + "learning_rate": 4.986163980593756e-05, + "loss": 1.0227, + "step": 33601 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861631553378864e-05, + "loss": 1.199, + "step": 33602 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861623300574744e-05, + "loss": 0.964, + "step": 33603 + }, + { + "epoch": 0.03, + "learning_rate": 4.98616150475252e-05, + "loss": 1.1142, + "step": 33604 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861606794230235e-05, + "loss": 0.9025, + "step": 33605 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861598540689846e-05, + "loss": 0.9811, + "step": 33606 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861590286904035e-05, + "loss": 0.9459, + "step": 33607 + }, + { + "epoch": 0.03, + "learning_rate": 4.98615820328728e-05, + "loss": 0.9288, + "step": 33608 + }, + { + "epoch": 0.03, + "learning_rate": 4.986157377859614e-05, + "loss": 0.514, + "step": 33609 + }, + { + "epoch": 0.03, + "learning_rate": 4.986156552407406e-05, + "loss": 0.3054, + "step": 33610 + }, + { + "epoch": 0.03, + "learning_rate": 4.986155726930655e-05, + "loss": 0.3666, + "step": 33611 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861549014293624e-05, + "loss": 0.397, + "step": 33612 + }, + { + "epoch": 0.03, + "learning_rate": 4.986154075903528e-05, + "loss": 0.3303, + "step": 33613 + }, + { + "epoch": 0.03, + "learning_rate": 4.98615325035315e-05, + "loss": 0.3109, + "step": 33614 + }, + { + "epoch": 0.03, + "learning_rate": 4.986152424778231e-05, + "loss": 0.3327, + "step": 33615 + }, + { + "epoch": 0.03, + "learning_rate": 4.986151599178769e-05, + "loss": 0.8334, + "step": 33616 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861507735547646e-05, + "loss": 0.6946, + "step": 33617 + }, + { + "epoch": 0.03, + "learning_rate": 4.986149947906218e-05, + "loss": 0.9979, + "step": 33618 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861491222331294e-05, + "loss": 0.794, + "step": 33619 + }, + { + "epoch": 0.03, + "learning_rate": 4.986148296535499e-05, + "loss": 0.8992, + "step": 33620 + }, + { + "epoch": 0.03, + "learning_rate": 4.986147470813326e-05, + "loss": 2.0438, + "step": 33621 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861466450666106e-05, + "loss": 0.7005, + "step": 33622 + }, + { + "epoch": 0.03, + "learning_rate": 4.986145819295354e-05, + "loss": 0.9095, + "step": 33623 + }, + { + "epoch": 0.03, + "learning_rate": 4.986144993499554e-05, + "loss": 1.2311, + "step": 33624 + }, + { + "epoch": 0.03, + "learning_rate": 4.986144167679212e-05, + "loss": 1.2647, + "step": 33625 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861433418343285e-05, + "loss": 0.9682, + "step": 33626 + }, + { + "epoch": 0.03, + "learning_rate": 4.986142515964902e-05, + "loss": 0.7254, + "step": 33627 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861416900709344e-05, + "loss": 1.2507, + "step": 33628 + }, + { + "epoch": 0.03, + "learning_rate": 4.986140864152424e-05, + "loss": 1.2191, + "step": 33629 + }, + { + "epoch": 0.03, + "learning_rate": 4.986140038209371e-05, + "loss": 1.0886, + "step": 33630 + }, + { + "epoch": 0.03, + "learning_rate": 4.986139212241777e-05, + "loss": 1.0491, + "step": 33631 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861383862496405e-05, + "loss": 0.8773, + "step": 33632 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861375602329616e-05, + "loss": 0.368, + "step": 33633 + }, + { + "epoch": 0.03, + "learning_rate": 4.986136734191741e-05, + "loss": 0.3699, + "step": 33634 + }, + { + "epoch": 0.03, + "learning_rate": 4.986135908125977e-05, + "loss": 0.224, + "step": 33635 + }, + { + "epoch": 0.03, + "learning_rate": 4.986135082035672e-05, + "loss": 0.3676, + "step": 33636 + }, + { + "epoch": 0.03, + "learning_rate": 4.986134255920825e-05, + "loss": 0.1872, + "step": 33637 + }, + { + "epoch": 0.03, + "learning_rate": 4.986133429781436e-05, + "loss": 0.1621, + "step": 33638 + }, + { + "epoch": 0.03, + "learning_rate": 4.986132603617505e-05, + "loss": 0.1353, + "step": 33639 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861317774290316e-05, + "loss": 0.1106, + "step": 33640 + }, + { + "epoch": 0.03, + "learning_rate": 4.986130951216016e-05, + "loss": 0.1834, + "step": 33641 + }, + { + "epoch": 0.03, + "learning_rate": 4.986130124978459e-05, + "loss": 0.1416, + "step": 33642 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861292987163586e-05, + "loss": 0.121, + "step": 33643 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861284724297175e-05, + "loss": 0.0829, + "step": 33644 + }, + { + "epoch": 0.03, + "learning_rate": 4.986127646118534e-05, + "loss": 0.0879, + "step": 33645 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861268197828084e-05, + "loss": 0.0852, + "step": 33646 + }, + { + "epoch": 0.03, + "learning_rate": 4.986125993422541e-05, + "loss": 0.0658, + "step": 33647 + }, + { + "epoch": 0.03, + "learning_rate": 4.986125167037732e-05, + "loss": 0.2018, + "step": 33648 + }, + { + "epoch": 0.03, + "learning_rate": 4.98612434062838e-05, + "loss": 0.4377, + "step": 33649 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861235141944865e-05, + "loss": 0.4105, + "step": 33650 + }, + { + "epoch": 0.03, + "learning_rate": 4.986122687736051e-05, + "loss": 0.3639, + "step": 33651 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861218612530736e-05, + "loss": 0.4641, + "step": 33652 + }, + { + "epoch": 0.03, + "learning_rate": 4.986121034745555e-05, + "loss": 0.6518, + "step": 33653 + }, + { + "epoch": 0.03, + "learning_rate": 4.986120208213494e-05, + "loss": 0.6357, + "step": 33654 + }, + { + "epoch": 0.03, + "learning_rate": 4.98611938165689e-05, + "loss": 0.8183, + "step": 33655 + }, + { + "epoch": 0.03, + "learning_rate": 4.986118555075745e-05, + "loss": 0.8038, + "step": 33656 + }, + { + "epoch": 0.03, + "learning_rate": 4.986117728470058e-05, + "loss": 0.9412, + "step": 33657 + }, + { + "epoch": 0.03, + "learning_rate": 4.986116901839829e-05, + "loss": 0.7992, + "step": 33658 + }, + { + "epoch": 0.03, + "learning_rate": 4.986116075185058e-05, + "loss": 0.9032, + "step": 33659 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861152485057454e-05, + "loss": 1.1367, + "step": 33660 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861144218018904e-05, + "loss": 0.8386, + "step": 33661 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861135950734945e-05, + "loss": 1.0348, + "step": 33662 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861127683205564e-05, + "loss": 1.2024, + "step": 33663 + }, + { + "epoch": 0.03, + "learning_rate": 4.986111941543076e-05, + "loss": 1.1814, + "step": 33664 + }, + { + "epoch": 0.03, + "learning_rate": 4.986111114741054e-05, + "loss": 0.8639, + "step": 33665 + }, + { + "epoch": 0.03, + "learning_rate": 4.98611028791449e-05, + "loss": 0.952, + "step": 33666 + }, + { + "epoch": 0.03, + "learning_rate": 4.986109461063384e-05, + "loss": 1.3304, + "step": 33667 + }, + { + "epoch": 0.03, + "learning_rate": 4.986108634187737e-05, + "loss": 1.0978, + "step": 33668 + }, + { + "epoch": 0.03, + "learning_rate": 4.986107807287548e-05, + "loss": 1.1427, + "step": 33669 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861069803628165e-05, + "loss": 0.9461, + "step": 33670 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861061534135435e-05, + "loss": 0.904, + "step": 33671 + }, + { + "epoch": 0.03, + "learning_rate": 4.986105326439728e-05, + "loss": 0.7785, + "step": 33672 + }, + { + "epoch": 0.03, + "learning_rate": 4.986104499441372e-05, + "loss": 0.8659, + "step": 33673 + }, + { + "epoch": 0.03, + "learning_rate": 4.986103672418474e-05, + "loss": 1.2259, + "step": 33674 + }, + { + "epoch": 0.03, + "learning_rate": 4.986102845371034e-05, + "loss": 0.9233, + "step": 33675 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861020182990515e-05, + "loss": 0.9296, + "step": 33676 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861011912025276e-05, + "loss": 0.9835, + "step": 33677 + }, + { + "epoch": 0.03, + "learning_rate": 4.986100364081463e-05, + "loss": 1.0317, + "step": 33678 + }, + { + "epoch": 0.03, + "learning_rate": 4.986099536935855e-05, + "loss": 1.1543, + "step": 33679 + }, + { + "epoch": 0.03, + "learning_rate": 4.986098709765707e-05, + "loss": 1.0584, + "step": 33680 + }, + { + "epoch": 0.03, + "learning_rate": 4.986097882571016e-05, + "loss": 0.968, + "step": 33681 + }, + { + "epoch": 0.03, + "learning_rate": 4.986097055351784e-05, + "loss": 1.0692, + "step": 33682 + }, + { + "epoch": 0.03, + "learning_rate": 4.98609622810801e-05, + "loss": 1.1004, + "step": 33683 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860954008396946e-05, + "loss": 0.9018, + "step": 33684 + }, + { + "epoch": 0.03, + "learning_rate": 4.986094573546837e-05, + "loss": 1.028, + "step": 33685 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860937462294384e-05, + "loss": 0.709, + "step": 33686 + }, + { + "epoch": 0.03, + "learning_rate": 4.986092918887497e-05, + "loss": 1.0071, + "step": 33687 + }, + { + "epoch": 0.03, + "learning_rate": 4.986092091521015e-05, + "loss": 0.9479, + "step": 33688 + }, + { + "epoch": 0.03, + "learning_rate": 4.986091264129991e-05, + "loss": 1.0955, + "step": 33689 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860904367144256e-05, + "loss": 1.2036, + "step": 33690 + }, + { + "epoch": 0.03, + "learning_rate": 4.986089609274318e-05, + "loss": 1.0052, + "step": 33691 + }, + { + "epoch": 0.03, + "learning_rate": 4.986088781809669e-05, + "loss": 1.0163, + "step": 33692 + }, + { + "epoch": 0.03, + "learning_rate": 4.986087954320479e-05, + "loss": 1.4665, + "step": 33693 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860871268067465e-05, + "loss": 0.6604, + "step": 33694 + }, + { + "epoch": 0.03, + "learning_rate": 4.986086299268473e-05, + "loss": 0.4471, + "step": 33695 + }, + { + "epoch": 0.03, + "learning_rate": 4.986085471705658e-05, + "loss": 0.5022, + "step": 33696 + }, + { + "epoch": 0.03, + "learning_rate": 4.986084644118301e-05, + "loss": 0.557, + "step": 33697 + }, + { + "epoch": 0.03, + "learning_rate": 4.986083816506403e-05, + "loss": 1.0043, + "step": 33698 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860829888699624e-05, + "loss": 1.0392, + "step": 33699 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860821612089805e-05, + "loss": 0.6509, + "step": 33700 + }, + { + "epoch": 0.03, + "learning_rate": 4.986081333523457e-05, + "loss": 1.024, + "step": 33701 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860805058133934e-05, + "loss": 0.8192, + "step": 33702 + }, + { + "epoch": 0.03, + "learning_rate": 4.986079678078787e-05, + "loss": 1.1838, + "step": 33703 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860788503196384e-05, + "loss": 1.1246, + "step": 33704 + }, + { + "epoch": 0.03, + "learning_rate": 4.986078022535949e-05, + "loss": 1.246, + "step": 33705 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860771947277185e-05, + "loss": 1.2903, + "step": 33706 + }, + { + "epoch": 0.03, + "learning_rate": 4.986076366894946e-05, + "loss": 0.6125, + "step": 33707 + }, + { + "epoch": 0.03, + "learning_rate": 4.986075539037632e-05, + "loss": 1.0932, + "step": 33708 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860747111557773e-05, + "loss": 0.9859, + "step": 33709 + }, + { + "epoch": 0.03, + "learning_rate": 4.98607388324938e-05, + "loss": 1.0451, + "step": 33710 + }, + { + "epoch": 0.03, + "learning_rate": 4.986073055318442e-05, + "loss": 0.9007, + "step": 33711 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860722273629625e-05, + "loss": 0.9443, + "step": 33712 + }, + { + "epoch": 0.03, + "learning_rate": 4.986071399382941e-05, + "loss": 1.0697, + "step": 33713 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860705713783784e-05, + "loss": 1.0372, + "step": 33714 + }, + { + "epoch": 0.03, + "learning_rate": 4.986069743349274e-05, + "loss": 1.0855, + "step": 33715 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860689152956286e-05, + "loss": 0.661, + "step": 33716 + }, + { + "epoch": 0.03, + "learning_rate": 4.986068087217442e-05, + "loss": 1.1058, + "step": 33717 + }, + { + "epoch": 0.03, + "learning_rate": 4.986067259114713e-05, + "loss": 0.8275, + "step": 33718 + }, + { + "epoch": 0.03, + "learning_rate": 4.986066430987444e-05, + "loss": 0.1493, + "step": 33719 + }, + { + "epoch": 0.03, + "learning_rate": 4.986065602835632e-05, + "loss": 0.0587, + "step": 33720 + }, + { + "epoch": 0.03, + "learning_rate": 4.98606477465928e-05, + "loss": 0.1474, + "step": 33721 + }, + { + "epoch": 0.03, + "learning_rate": 4.986063946458386e-05, + "loss": 0.992, + "step": 33722 + }, + { + "epoch": 0.03, + "learning_rate": 4.986063118232951e-05, + "loss": 1.3843, + "step": 33723 + }, + { + "epoch": 0.03, + "learning_rate": 4.986062289982973e-05, + "loss": 1.6819, + "step": 33724 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860614617084556e-05, + "loss": 1.3809, + "step": 33725 + }, + { + "epoch": 0.03, + "learning_rate": 4.986060633409396e-05, + "loss": 1.0346, + "step": 33726 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860598050857955e-05, + "loss": 0.8585, + "step": 33727 + }, + { + "epoch": 0.03, + "learning_rate": 4.986058976737653e-05, + "loss": 0.9606, + "step": 33728 + }, + { + "epoch": 0.03, + "learning_rate": 4.98605814836497e-05, + "loss": 0.5911, + "step": 33729 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860573199677454e-05, + "loss": 0.9488, + "step": 33730 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860564915459796e-05, + "loss": 0.7271, + "step": 33731 + }, + { + "epoch": 0.03, + "learning_rate": 4.986055663099672e-05, + "loss": 0.5435, + "step": 33732 + }, + { + "epoch": 0.03, + "learning_rate": 4.986054834628823e-05, + "loss": 0.7804, + "step": 33733 + }, + { + "epoch": 0.03, + "learning_rate": 4.986054006133433e-05, + "loss": 0.4701, + "step": 33734 + }, + { + "epoch": 0.03, + "learning_rate": 4.986053177613502e-05, + "loss": 1.1293, + "step": 33735 + }, + { + "epoch": 0.03, + "learning_rate": 4.986052349069029e-05, + "loss": 1.0277, + "step": 33736 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860515205000156e-05, + "loss": 0.9461, + "step": 33737 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860506919064607e-05, + "loss": 1.0253, + "step": 33738 + }, + { + "epoch": 0.03, + "learning_rate": 4.986049863288365e-05, + "loss": 0.9401, + "step": 33739 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860490346457266e-05, + "loss": 1.2282, + "step": 33740 + }, + { + "epoch": 0.03, + "learning_rate": 4.986048205978548e-05, + "loss": 1.3111, + "step": 33741 + }, + { + "epoch": 0.03, + "learning_rate": 4.986047377286829e-05, + "loss": 0.8451, + "step": 33742 + }, + { + "epoch": 0.03, + "learning_rate": 4.986046548570568e-05, + "loss": 0.959, + "step": 33743 + }, + { + "epoch": 0.03, + "learning_rate": 4.986045719829765e-05, + "loss": 0.8281, + "step": 33744 + }, + { + "epoch": 0.03, + "learning_rate": 4.986044891064421e-05, + "loss": 0.7696, + "step": 33745 + }, + { + "epoch": 0.03, + "learning_rate": 4.986044062274536e-05, + "loss": 0.7486, + "step": 33746 + }, + { + "epoch": 0.03, + "learning_rate": 4.98604323346011e-05, + "loss": 0.5401, + "step": 33747 + }, + { + "epoch": 0.03, + "learning_rate": 4.986042404621144e-05, + "loss": 0.9663, + "step": 33748 + }, + { + "epoch": 0.03, + "learning_rate": 4.986041575757635e-05, + "loss": 0.987, + "step": 33749 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860407468695856e-05, + "loss": 0.9324, + "step": 33750 + }, + { + "epoch": 0.03, + "learning_rate": 4.986039917956995e-05, + "loss": 0.9915, + "step": 33751 + }, + { + "epoch": 0.03, + "learning_rate": 4.986039089019864e-05, + "loss": 1.1588, + "step": 33752 + }, + { + "epoch": 0.03, + "learning_rate": 4.986038260058191e-05, + "loss": 0.8544, + "step": 33753 + }, + { + "epoch": 0.03, + "learning_rate": 4.986037431071977e-05, + "loss": 0.9916, + "step": 33754 + }, + { + "epoch": 0.03, + "learning_rate": 4.986036602061222e-05, + "loss": 0.784, + "step": 33755 + }, + { + "epoch": 0.03, + "learning_rate": 4.986035773025926e-05, + "loss": 0.4788, + "step": 33756 + }, + { + "epoch": 0.03, + "learning_rate": 4.986034943966089e-05, + "loss": 0.4185, + "step": 33757 + }, + { + "epoch": 0.03, + "learning_rate": 4.98603411488171e-05, + "loss": 0.5968, + "step": 33758 + }, + { + "epoch": 0.03, + "learning_rate": 4.986033285772791e-05, + "loss": 0.7569, + "step": 33759 + }, + { + "epoch": 0.03, + "learning_rate": 4.98603245663933e-05, + "loss": 0.8722, + "step": 33760 + }, + { + "epoch": 0.03, + "learning_rate": 4.986031627481329e-05, + "loss": 0.91, + "step": 33761 + }, + { + "epoch": 0.03, + "learning_rate": 4.986030798298786e-05, + "loss": 1.1148, + "step": 33762 + }, + { + "epoch": 0.03, + "learning_rate": 4.986029969091702e-05, + "loss": 1.3528, + "step": 33763 + }, + { + "epoch": 0.03, + "learning_rate": 4.986029139860078e-05, + "loss": 1.0234, + "step": 33764 + }, + { + "epoch": 0.03, + "learning_rate": 4.986028310603912e-05, + "loss": 0.3014, + "step": 33765 + }, + { + "epoch": 0.03, + "learning_rate": 4.986027481323205e-05, + "loss": 0.0812, + "step": 33766 + }, + { + "epoch": 0.03, + "learning_rate": 4.986026652017958e-05, + "loss": 0.1336, + "step": 33767 + }, + { + "epoch": 0.03, + "learning_rate": 4.986025822688169e-05, + "loss": 0.128, + "step": 33768 + }, + { + "epoch": 0.03, + "learning_rate": 4.986024993333839e-05, + "loss": 0.0835, + "step": 33769 + }, + { + "epoch": 0.03, + "learning_rate": 4.986024163954968e-05, + "loss": 0.0627, + "step": 33770 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860233345515565e-05, + "loss": 0.0518, + "step": 33771 + }, + { + "epoch": 0.03, + "learning_rate": 4.986022505123604e-05, + "loss": 1.0668, + "step": 33772 + }, + { + "epoch": 0.03, + "learning_rate": 4.98602167567111e-05, + "loss": 0.8585, + "step": 33773 + }, + { + "epoch": 0.03, + "learning_rate": 4.986020846194076e-05, + "loss": 0.9693, + "step": 33774 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860200166925e-05, + "loss": 0.7862, + "step": 33775 + }, + { + "epoch": 0.03, + "learning_rate": 4.986019187166383e-05, + "loss": 0.9602, + "step": 33776 + }, + { + "epoch": 0.03, + "learning_rate": 4.986018357615726e-05, + "loss": 0.9935, + "step": 33777 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860175280405276e-05, + "loss": 1.0174, + "step": 33778 + }, + { + "epoch": 0.03, + "learning_rate": 4.986016698440788e-05, + "loss": 1.1619, + "step": 33779 + }, + { + "epoch": 0.03, + "learning_rate": 4.986015868816508e-05, + "loss": 1.1411, + "step": 33780 + }, + { + "epoch": 0.03, + "learning_rate": 4.986015039167687e-05, + "loss": 1.172, + "step": 33781 + }, + { + "epoch": 0.03, + "learning_rate": 4.986014209494325e-05, + "loss": 0.9112, + "step": 33782 + }, + { + "epoch": 0.03, + "learning_rate": 4.986013379796422e-05, + "loss": 1.3467, + "step": 33783 + }, + { + "epoch": 0.03, + "learning_rate": 4.986012550073978e-05, + "loss": 1.183, + "step": 33784 + }, + { + "epoch": 0.03, + "learning_rate": 4.986011720326994e-05, + "loss": 1.4017, + "step": 33785 + }, + { + "epoch": 0.03, + "learning_rate": 4.986010890555468e-05, + "loss": 0.2349, + "step": 33786 + }, + { + "epoch": 0.03, + "learning_rate": 4.986010060759402e-05, + "loss": 0.9046, + "step": 33787 + }, + { + "epoch": 0.03, + "learning_rate": 4.986009230938794e-05, + "loss": 1.0847, + "step": 33788 + }, + { + "epoch": 0.03, + "learning_rate": 4.986008401093646e-05, + "loss": 1.1162, + "step": 33789 + }, + { + "epoch": 0.03, + "learning_rate": 4.986007571223957e-05, + "loss": 1.1838, + "step": 33790 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860067413297276e-05, + "loss": 1.1451, + "step": 33791 + }, + { + "epoch": 0.03, + "learning_rate": 4.986005911410957e-05, + "loss": 1.0351, + "step": 33792 + }, + { + "epoch": 0.03, + "learning_rate": 4.986005081467646e-05, + "loss": 0.982, + "step": 33793 + }, + { + "epoch": 0.03, + "learning_rate": 4.986004251499794e-05, + "loss": 0.6071, + "step": 33794 + }, + { + "epoch": 0.03, + "learning_rate": 4.986003421507401e-05, + "loss": 1.2957, + "step": 33795 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860025914904676e-05, + "loss": 1.2825, + "step": 33796 + }, + { + "epoch": 0.03, + "learning_rate": 4.986001761448993e-05, + "loss": 0.8359, + "step": 33797 + }, + { + "epoch": 0.03, + "learning_rate": 4.986000931382978e-05, + "loss": 1.0188, + "step": 33798 + }, + { + "epoch": 0.03, + "learning_rate": 4.9860001012924215e-05, + "loss": 0.7496, + "step": 33799 + }, + { + "epoch": 0.03, + "learning_rate": 4.985999271177325e-05, + "loss": 0.8832, + "step": 33800 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859984410376875e-05, + "loss": 1.0659, + "step": 33801 + }, + { + "epoch": 0.03, + "learning_rate": 4.98599761087351e-05, + "loss": 1.2253, + "step": 33802 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859967806847905e-05, + "loss": 1.702, + "step": 33803 + }, + { + "epoch": 0.03, + "learning_rate": 4.985995950471531e-05, + "loss": 1.4522, + "step": 33804 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859951202337306e-05, + "loss": 1.1522, + "step": 33805 + }, + { + "epoch": 0.03, + "learning_rate": 4.98599428997139e-05, + "loss": 1.34, + "step": 33806 + }, + { + "epoch": 0.03, + "learning_rate": 4.985993459684508e-05, + "loss": 1.2799, + "step": 33807 + }, + { + "epoch": 0.03, + "learning_rate": 4.985992629373085e-05, + "loss": 0.8788, + "step": 33808 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859917990371224e-05, + "loss": 0.7812, + "step": 33809 + }, + { + "epoch": 0.03, + "learning_rate": 4.985990968676619e-05, + "loss": 0.8997, + "step": 33810 + }, + { + "epoch": 0.03, + "learning_rate": 4.985990138291574e-05, + "loss": 0.9491, + "step": 33811 + }, + { + "epoch": 0.03, + "learning_rate": 4.98598930788199e-05, + "loss": 0.8777, + "step": 33812 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859884774478636e-05, + "loss": 1.0163, + "step": 33813 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859876469891976e-05, + "loss": 1.0322, + "step": 33814 + }, + { + "epoch": 0.03, + "learning_rate": 4.985986816505991e-05, + "loss": 0.8996, + "step": 33815 + }, + { + "epoch": 0.03, + "learning_rate": 4.985985985998243e-05, + "loss": 1.084, + "step": 33816 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859851554659555e-05, + "loss": 1.078, + "step": 33817 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859843249091266e-05, + "loss": 1.1194, + "step": 33818 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859834943277574e-05, + "loss": 0.9613, + "step": 33819 + }, + { + "epoch": 0.03, + "learning_rate": 4.985982663721847e-05, + "loss": 0.9705, + "step": 33820 + }, + { + "epoch": 0.03, + "learning_rate": 4.985981833091396e-05, + "loss": 1.2384, + "step": 33821 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859810024364056e-05, + "loss": 1.1025, + "step": 33822 + }, + { + "epoch": 0.03, + "learning_rate": 4.985980171756874e-05, + "loss": 1.1178, + "step": 33823 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859793410528024e-05, + "loss": 1.2171, + "step": 33824 + }, + { + "epoch": 0.03, + "learning_rate": 4.985978510324189e-05, + "loss": 1.4061, + "step": 33825 + }, + { + "epoch": 0.03, + "learning_rate": 4.985977679571036e-05, + "loss": 1.3367, + "step": 33826 + }, + { + "epoch": 0.03, + "learning_rate": 4.985976848793342e-05, + "loss": 1.0902, + "step": 33827 + }, + { + "epoch": 0.03, + "learning_rate": 4.985976017991108e-05, + "loss": 1.1648, + "step": 33828 + }, + { + "epoch": 0.03, + "learning_rate": 4.985975187164333e-05, + "loss": 0.9861, + "step": 33829 + }, + { + "epoch": 0.03, + "learning_rate": 4.985974356313018e-05, + "loss": 1.1153, + "step": 33830 + }, + { + "epoch": 0.03, + "learning_rate": 4.985973525437162e-05, + "loss": 1.094, + "step": 33831 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859726945367656e-05, + "loss": 0.9598, + "step": 33832 + }, + { + "epoch": 0.03, + "learning_rate": 4.985971863611829e-05, + "loss": 1.0594, + "step": 33833 + }, + { + "epoch": 0.03, + "learning_rate": 4.985971032662352e-05, + "loss": 0.936, + "step": 33834 + }, + { + "epoch": 0.03, + "learning_rate": 4.985970201688335e-05, + "loss": 0.7627, + "step": 33835 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859693706897764e-05, + "loss": 0.884, + "step": 33836 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859685396666775e-05, + "loss": 0.6189, + "step": 33837 + }, + { + "epoch": 0.03, + "learning_rate": 4.985967708619039e-05, + "loss": 0.7422, + "step": 33838 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859668775468596e-05, + "loss": 0.4087, + "step": 33839 + }, + { + "epoch": 0.03, + "learning_rate": 4.98596604645014e-05, + "loss": 1.3426, + "step": 33840 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859652153288794e-05, + "loss": 1.0961, + "step": 33841 + }, + { + "epoch": 0.03, + "learning_rate": 4.985964384183079e-05, + "loss": 0.9081, + "step": 33842 + }, + { + "epoch": 0.03, + "learning_rate": 4.985963553012738e-05, + "loss": 1.0201, + "step": 33843 + }, + { + "epoch": 0.03, + "learning_rate": 4.985962721817857e-05, + "loss": 1.1648, + "step": 33844 + }, + { + "epoch": 0.03, + "learning_rate": 4.985961890598435e-05, + "loss": 0.7337, + "step": 33845 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859610593544724e-05, + "loss": 0.8561, + "step": 33846 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859602280859705e-05, + "loss": 1.0508, + "step": 33847 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859593967929276e-05, + "loss": 0.8947, + "step": 33848 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859585654753444e-05, + "loss": 1.1932, + "step": 33849 + }, + { + "epoch": 0.03, + "learning_rate": 4.985957734133221e-05, + "loss": 0.8381, + "step": 33850 + }, + { + "epoch": 0.03, + "learning_rate": 4.985956902766557e-05, + "loss": 1.1564, + "step": 33851 + }, + { + "epoch": 0.03, + "learning_rate": 4.985956071375353e-05, + "loss": 1.4844, + "step": 33852 + }, + { + "epoch": 0.03, + "learning_rate": 4.985955239959608e-05, + "loss": 1.0528, + "step": 33853 + }, + { + "epoch": 0.03, + "learning_rate": 4.985954408519323e-05, + "loss": 1.0026, + "step": 33854 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859535770544986e-05, + "loss": 1.2354, + "step": 33855 + }, + { + "epoch": 0.03, + "learning_rate": 4.985952745565133e-05, + "loss": 1.1073, + "step": 33856 + }, + { + "epoch": 0.03, + "learning_rate": 4.985951914051228e-05, + "loss": 0.8309, + "step": 33857 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859510825127815e-05, + "loss": 0.7578, + "step": 33858 + }, + { + "epoch": 0.03, + "learning_rate": 4.985950250949796e-05, + "loss": 0.9832, + "step": 33859 + }, + { + "epoch": 0.03, + "learning_rate": 4.985949419362269e-05, + "loss": 0.721, + "step": 33860 + }, + { + "epoch": 0.03, + "learning_rate": 4.985948587750202e-05, + "loss": 0.7769, + "step": 33861 + }, + { + "epoch": 0.03, + "learning_rate": 4.985947756113595e-05, + "loss": 0.7705, + "step": 33862 + }, + { + "epoch": 0.03, + "learning_rate": 4.985946924452448e-05, + "loss": 0.8211, + "step": 33863 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859460927667614e-05, + "loss": 0.7427, + "step": 33864 + }, + { + "epoch": 0.03, + "learning_rate": 4.985945261056533e-05, + "loss": 1.0492, + "step": 33865 + }, + { + "epoch": 0.03, + "learning_rate": 4.985944429321765e-05, + "loss": 0.5871, + "step": 33866 + }, + { + "epoch": 0.03, + "learning_rate": 4.985943597562458e-05, + "loss": 0.9724, + "step": 33867 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859427657786095e-05, + "loss": 0.9327, + "step": 33868 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859419339702216e-05, + "loss": 1.1056, + "step": 33869 + }, + { + "epoch": 0.03, + "learning_rate": 4.985941102137293e-05, + "loss": 0.9406, + "step": 33870 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859402702798244e-05, + "loss": 0.9978, + "step": 33871 + }, + { + "epoch": 0.03, + "learning_rate": 4.985939438397815e-05, + "loss": 1.2994, + "step": 33872 + }, + { + "epoch": 0.03, + "learning_rate": 4.985938606491266e-05, + "loss": 0.7051, + "step": 33873 + }, + { + "epoch": 0.03, + "learning_rate": 4.985937774560177e-05, + "loss": 0.8009, + "step": 33874 + }, + { + "epoch": 0.03, + "learning_rate": 4.985936942604548e-05, + "loss": 0.9114, + "step": 33875 + }, + { + "epoch": 0.03, + "learning_rate": 4.985936110624378e-05, + "loss": 1.0395, + "step": 33876 + }, + { + "epoch": 0.03, + "learning_rate": 4.985935278619669e-05, + "loss": 1.5542, + "step": 33877 + }, + { + "epoch": 0.03, + "learning_rate": 4.98593444659042e-05, + "loss": 1.1879, + "step": 33878 + }, + { + "epoch": 0.03, + "learning_rate": 4.98593361453663e-05, + "loss": 1.033, + "step": 33879 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859327824583e-05, + "loss": 1.0282, + "step": 33880 + }, + { + "epoch": 0.03, + "learning_rate": 4.98593195035543e-05, + "loss": 1.1073, + "step": 33881 + }, + { + "epoch": 0.03, + "learning_rate": 4.98593111822802e-05, + "loss": 1.0908, + "step": 33882 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859302860760706e-05, + "loss": 1.0893, + "step": 33883 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859294538995805e-05, + "loss": 1.0371, + "step": 33884 + }, + { + "epoch": 0.03, + "learning_rate": 4.98592862169855e-05, + "loss": 0.9711, + "step": 33885 + }, + { + "epoch": 0.03, + "learning_rate": 4.98592778947298e-05, + "loss": 0.9939, + "step": 33886 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859269572228694e-05, + "loss": 0.7634, + "step": 33887 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859261249482196e-05, + "loss": 0.8912, + "step": 33888 + }, + { + "epoch": 0.03, + "learning_rate": 4.98592529264903e-05, + "loss": 0.9574, + "step": 33889 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859244603252995e-05, + "loss": 0.77, + "step": 33890 + }, + { + "epoch": 0.03, + "learning_rate": 4.98592362797703e-05, + "loss": 0.954, + "step": 33891 + }, + { + "epoch": 0.03, + "learning_rate": 4.985922795604219e-05, + "loss": 1.0971, + "step": 33892 + }, + { + "epoch": 0.03, + "learning_rate": 4.985921963206869e-05, + "loss": 0.9333, + "step": 33893 + }, + { + "epoch": 0.03, + "learning_rate": 4.985921130784978e-05, + "loss": 0.8151, + "step": 33894 + }, + { + "epoch": 0.03, + "learning_rate": 4.985920298338548e-05, + "loss": 0.9034, + "step": 33895 + }, + { + "epoch": 0.03, + "learning_rate": 4.985919465867579e-05, + "loss": 1.2069, + "step": 33896 + }, + { + "epoch": 0.03, + "learning_rate": 4.985918633372069e-05, + "loss": 1.0328, + "step": 33897 + }, + { + "epoch": 0.03, + "learning_rate": 4.985917800852019e-05, + "loss": 1.1812, + "step": 33898 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859169683074284e-05, + "loss": 1.1403, + "step": 33899 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859161357382986e-05, + "loss": 0.9704, + "step": 33900 + }, + { + "epoch": 0.03, + "learning_rate": 4.985915303144629e-05, + "loss": 0.6214, + "step": 33901 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859144705264195e-05, + "loss": 0.6818, + "step": 33902 + }, + { + "epoch": 0.03, + "learning_rate": 4.98591363788367e-05, + "loss": 1.3086, + "step": 33903 + }, + { + "epoch": 0.03, + "learning_rate": 4.985912805216381e-05, + "loss": 0.7616, + "step": 33904 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859119725245504e-05, + "loss": 1.4608, + "step": 33905 + }, + { + "epoch": 0.03, + "learning_rate": 4.985911139808182e-05, + "loss": 0.9855, + "step": 33906 + }, + { + "epoch": 0.03, + "learning_rate": 4.985910307067273e-05, + "loss": 0.7321, + "step": 33907 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859094743018245e-05, + "loss": 0.9514, + "step": 33908 + }, + { + "epoch": 0.03, + "learning_rate": 4.985908641511835e-05, + "loss": 0.957, + "step": 33909 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859078086973063e-05, + "loss": 0.9312, + "step": 33910 + }, + { + "epoch": 0.03, + "learning_rate": 4.985906975858238e-05, + "loss": 0.8321, + "step": 33911 + }, + { + "epoch": 0.03, + "learning_rate": 4.98590614299463e-05, + "loss": 0.7319, + "step": 33912 + }, + { + "epoch": 0.03, + "learning_rate": 4.985905310106482e-05, + "loss": 0.9653, + "step": 33913 + }, + { + "epoch": 0.03, + "learning_rate": 4.985904477193793e-05, + "loss": 1.1554, + "step": 33914 + }, + { + "epoch": 0.03, + "learning_rate": 4.985903644256566e-05, + "loss": 0.9241, + "step": 33915 + }, + { + "epoch": 0.03, + "learning_rate": 4.985902811294799e-05, + "loss": 1.1628, + "step": 33916 + }, + { + "epoch": 0.03, + "learning_rate": 4.985901978308492e-05, + "loss": 1.0895, + "step": 33917 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859011452976444e-05, + "loss": 0.9313, + "step": 33918 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859003122622574e-05, + "loss": 1.1098, + "step": 33919 + }, + { + "epoch": 0.03, + "learning_rate": 4.985899479202331e-05, + "loss": 0.9051, + "step": 33920 + }, + { + "epoch": 0.03, + "learning_rate": 4.985898646117865e-05, + "loss": 0.4782, + "step": 33921 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858978130088585e-05, + "loss": 0.8414, + "step": 33922 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858969798753126e-05, + "loss": 1.0064, + "step": 33923 + }, + { + "epoch": 0.03, + "learning_rate": 4.985896146717227e-05, + "loss": 0.7264, + "step": 33924 + }, + { + "epoch": 0.03, + "learning_rate": 4.985895313534602e-05, + "loss": 0.8513, + "step": 33925 + }, + { + "epoch": 0.03, + "learning_rate": 4.985894480327436e-05, + "loss": 0.9792, + "step": 33926 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858936470957315e-05, + "loss": 0.9771, + "step": 33927 + }, + { + "epoch": 0.03, + "learning_rate": 4.985892813839488e-05, + "loss": 0.4654, + "step": 33928 + }, + { + "epoch": 0.03, + "learning_rate": 4.985891980558703e-05, + "loss": 0.6709, + "step": 33929 + }, + { + "epoch": 0.03, + "learning_rate": 4.98589114725338e-05, + "loss": 1.1448, + "step": 33930 + }, + { + "epoch": 0.03, + "learning_rate": 4.985890313923516e-05, + "loss": 1.0911, + "step": 33931 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858894805691134e-05, + "loss": 0.8967, + "step": 33932 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858886471901706e-05, + "loss": 1.1825, + "step": 33933 + }, + { + "epoch": 0.03, + "learning_rate": 4.985887813786688e-05, + "loss": 1.3135, + "step": 33934 + }, + { + "epoch": 0.03, + "learning_rate": 4.985886980358666e-05, + "loss": 0.9031, + "step": 33935 + }, + { + "epoch": 0.03, + "learning_rate": 4.985886146906105e-05, + "loss": 0.9792, + "step": 33936 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858853134290036e-05, + "loss": 1.2157, + "step": 33937 + }, + { + "epoch": 0.03, + "learning_rate": 4.985884479927362e-05, + "loss": 1.0864, + "step": 33938 + }, + { + "epoch": 0.03, + "learning_rate": 4.985883646401182e-05, + "loss": 0.8543, + "step": 33939 + }, + { + "epoch": 0.03, + "learning_rate": 4.985882812850462e-05, + "loss": 1.0935, + "step": 33940 + }, + { + "epoch": 0.03, + "learning_rate": 4.985881979275202e-05, + "loss": 0.9041, + "step": 33941 + }, + { + "epoch": 0.03, + "learning_rate": 4.985881145675403e-05, + "loss": 0.8985, + "step": 33942 + }, + { + "epoch": 0.03, + "learning_rate": 4.985880312051064e-05, + "loss": 1.0251, + "step": 33943 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858794784021856e-05, + "loss": 0.5386, + "step": 33944 + }, + { + "epoch": 0.03, + "learning_rate": 4.985878644728768e-05, + "loss": 1.267, + "step": 33945 + }, + { + "epoch": 0.03, + "learning_rate": 4.98587781103081e-05, + "loss": 1.1633, + "step": 33946 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858769773083135e-05, + "loss": 1.142, + "step": 33947 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858761435612766e-05, + "loss": 0.8097, + "step": 33948 + }, + { + "epoch": 0.03, + "learning_rate": 4.985875309789701e-05, + "loss": 0.661, + "step": 33949 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858744759935854e-05, + "loss": 0.5319, + "step": 33950 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858736421729305e-05, + "loss": 0.388, + "step": 33951 + }, + { + "epoch": 0.03, + "learning_rate": 4.985872808327735e-05, + "loss": 0.409, + "step": 33952 + }, + { + "epoch": 0.03, + "learning_rate": 4.985871974458002e-05, + "loss": 0.3321, + "step": 33953 + }, + { + "epoch": 0.03, + "learning_rate": 4.985871140563728e-05, + "loss": 0.2946, + "step": 33954 + }, + { + "epoch": 0.03, + "learning_rate": 4.985870306644915e-05, + "loss": 0.2411, + "step": 33955 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858694727015624e-05, + "loss": 0.2682, + "step": 33956 + }, + { + "epoch": 0.03, + "learning_rate": 4.985868638733671e-05, + "loss": 0.2444, + "step": 33957 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858678047412395e-05, + "loss": 0.1786, + "step": 33958 + }, + { + "epoch": 0.03, + "learning_rate": 4.985866970724268e-05, + "loss": 0.2111, + "step": 33959 + }, + { + "epoch": 0.03, + "learning_rate": 4.985866136682758e-05, + "loss": 0.9598, + "step": 33960 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858653026167086e-05, + "loss": 1.1848, + "step": 33961 + }, + { + "epoch": 0.03, + "learning_rate": 4.985864468526119e-05, + "loss": 1.0258, + "step": 33962 + }, + { + "epoch": 0.03, + "learning_rate": 4.985863634410991e-05, + "loss": 1.3284, + "step": 33963 + }, + { + "epoch": 0.03, + "learning_rate": 4.985862800271322e-05, + "loss": 1.5762, + "step": 33964 + }, + { + "epoch": 0.03, + "learning_rate": 4.985861966107116e-05, + "loss": 0.866, + "step": 33965 + }, + { + "epoch": 0.03, + "learning_rate": 4.985861131918369e-05, + "loss": 1.4419, + "step": 33966 + }, + { + "epoch": 0.03, + "learning_rate": 4.985860297705083e-05, + "loss": 1.0207, + "step": 33967 + }, + { + "epoch": 0.03, + "learning_rate": 4.985859463467257e-05, + "loss": 0.9778, + "step": 33968 + }, + { + "epoch": 0.03, + "learning_rate": 4.985858629204892e-05, + "loss": 0.5675, + "step": 33969 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858577949179884e-05, + "loss": 1.0873, + "step": 33970 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858569606065456e-05, + "loss": 0.9447, + "step": 33971 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858561262705626e-05, + "loss": 1.1241, + "step": 33972 + }, + { + "epoch": 0.03, + "learning_rate": 4.985855291910041e-05, + "loss": 1.2523, + "step": 33973 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858544575249786e-05, + "loss": 1.0991, + "step": 33974 + }, + { + "epoch": 0.03, + "learning_rate": 4.985853623115378e-05, + "loss": 1.0947, + "step": 33975 + }, + { + "epoch": 0.03, + "learning_rate": 4.985852788681238e-05, + "loss": 1.1238, + "step": 33976 + }, + { + "epoch": 0.03, + "learning_rate": 4.985851954222559e-05, + "loss": 0.7725, + "step": 33977 + }, + { + "epoch": 0.03, + "learning_rate": 4.98585111973934e-05, + "loss": 0.9548, + "step": 33978 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858502852315827e-05, + "loss": 0.8732, + "step": 33979 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858494506992845e-05, + "loss": 1.0026, + "step": 33980 + }, + { + "epoch": 0.03, + "learning_rate": 4.985848616142449e-05, + "loss": 1.1166, + "step": 33981 + }, + { + "epoch": 0.03, + "learning_rate": 4.985847781561073e-05, + "loss": 1.0227, + "step": 33982 + }, + { + "epoch": 0.03, + "learning_rate": 4.985846946955158e-05, + "loss": 1.0714, + "step": 33983 + }, + { + "epoch": 0.03, + "learning_rate": 4.985846112324704e-05, + "loss": 0.9952, + "step": 33984 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858452776697106e-05, + "loss": 0.9611, + "step": 33985 + }, + { + "epoch": 0.03, + "learning_rate": 4.985844442990178e-05, + "loss": 1.0078, + "step": 33986 + }, + { + "epoch": 0.03, + "learning_rate": 4.985843608286106e-05, + "loss": 0.9117, + "step": 33987 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858427735574956e-05, + "loss": 1.0656, + "step": 33988 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858419388043454e-05, + "loss": 0.5659, + "step": 33989 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858411040266564e-05, + "loss": 0.643, + "step": 33990 + }, + { + "epoch": 0.03, + "learning_rate": 4.985840269224427e-05, + "loss": 0.6672, + "step": 33991 + }, + { + "epoch": 0.03, + "learning_rate": 4.98583943439766e-05, + "loss": 0.9634, + "step": 33992 + }, + { + "epoch": 0.03, + "learning_rate": 4.985838599546353e-05, + "loss": 0.949, + "step": 33993 + }, + { + "epoch": 0.03, + "learning_rate": 4.985837764670507e-05, + "loss": 0.9597, + "step": 33994 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858369297701214e-05, + "loss": 0.8844, + "step": 33995 + }, + { + "epoch": 0.03, + "learning_rate": 4.985836094845197e-05, + "loss": 1.3336, + "step": 33996 + }, + { + "epoch": 0.03, + "learning_rate": 4.985835259895734e-05, + "loss": 1.492, + "step": 33997 + }, + { + "epoch": 0.03, + "learning_rate": 4.985834424921732e-05, + "loss": 1.0724, + "step": 33998 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858335899231894e-05, + "loss": 0.9564, + "step": 33999 + }, + { + "epoch": 0.03, + "learning_rate": 4.985832754900109e-05, + "loss": 0.8507, + "step": 34000 + }, + { + "epoch": 0.03, + "eval_loss": 1.0717129707336426, + "eval_runtime": 126.1661, + "eval_samples_per_second": 10.978, + "eval_steps_per_second": 5.493, + "step": 34000 + }, + { + "epoch": 0.03, + "learning_rate": 4.985831919852489e-05, + "loss": 1.2421, + "step": 34001 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858310847803304e-05, + "loss": 1.1952, + "step": 34002 + }, + { + "epoch": 0.03, + "learning_rate": 4.985830249683632e-05, + "loss": 0.5941, + "step": 34003 + }, + { + "epoch": 0.03, + "learning_rate": 4.985829414562395e-05, + "loss": 1.0543, + "step": 34004 + }, + { + "epoch": 0.03, + "learning_rate": 4.985828579416619e-05, + "loss": 1.0804, + "step": 34005 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858277442463036e-05, + "loss": 1.046, + "step": 34006 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858269090514495e-05, + "loss": 1.0272, + "step": 34007 + }, + { + "epoch": 0.03, + "learning_rate": 4.985826073832056e-05, + "loss": 1.2933, + "step": 34008 + }, + { + "epoch": 0.03, + "learning_rate": 4.985825238588124e-05, + "loss": 0.9753, + "step": 34009 + }, + { + "epoch": 0.03, + "learning_rate": 4.985824403319652e-05, + "loss": 1.1986, + "step": 34010 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858235680266416e-05, + "loss": 1.0474, + "step": 34011 + }, + { + "epoch": 0.03, + "learning_rate": 4.985822732709093e-05, + "loss": 1.0874, + "step": 34012 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858218973670044e-05, + "loss": 0.8016, + "step": 34013 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858210620003774e-05, + "loss": 1.2613, + "step": 34014 + }, + { + "epoch": 0.03, + "learning_rate": 4.985820226609211e-05, + "loss": 0.925, + "step": 34015 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858193911935055e-05, + "loss": 1.1572, + "step": 34016 + }, + { + "epoch": 0.03, + "learning_rate": 4.985818555753261e-05, + "loss": 1.245, + "step": 34017 + }, + { + "epoch": 0.03, + "learning_rate": 4.985817720288478e-05, + "loss": 1.166, + "step": 34018 + }, + { + "epoch": 0.03, + "learning_rate": 4.985816884799156e-05, + "loss": 0.9613, + "step": 34019 + }, + { + "epoch": 0.03, + "learning_rate": 4.985816049285295e-05, + "loss": 1.0077, + "step": 34020 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858152137468946e-05, + "loss": 0.9783, + "step": 34021 + }, + { + "epoch": 0.03, + "learning_rate": 4.985814378183956e-05, + "loss": 1.0221, + "step": 34022 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858135425964784e-05, + "loss": 1.2304, + "step": 34023 + }, + { + "epoch": 0.03, + "learning_rate": 4.985812706984462e-05, + "loss": 0.8828, + "step": 34024 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858118713479066e-05, + "loss": 0.9252, + "step": 34025 + }, + { + "epoch": 0.03, + "learning_rate": 4.985811035686812e-05, + "loss": 1.2788, + "step": 34026 + }, + { + "epoch": 0.03, + "learning_rate": 4.985810200001178e-05, + "loss": 0.8731, + "step": 34027 + }, + { + "epoch": 0.03, + "learning_rate": 4.985809364291006e-05, + "loss": 1.0749, + "step": 34028 + }, + { + "epoch": 0.03, + "learning_rate": 4.985808528556295e-05, + "loss": 1.1176, + "step": 34029 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858076927970446e-05, + "loss": 1.0732, + "step": 34030 + }, + { + "epoch": 0.03, + "learning_rate": 4.985806857013256e-05, + "loss": 0.8841, + "step": 34031 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858060212049284e-05, + "loss": 1.068, + "step": 34032 + }, + { + "epoch": 0.03, + "learning_rate": 4.985805185372062e-05, + "loss": 0.9875, + "step": 34033 + }, + { + "epoch": 0.03, + "learning_rate": 4.985804349514657e-05, + "loss": 0.8731, + "step": 34034 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858035136327126e-05, + "loss": 0.578, + "step": 34035 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858026777262295e-05, + "loss": 0.7443, + "step": 34036 + }, + { + "epoch": 0.03, + "learning_rate": 4.985801841795208e-05, + "loss": 0.8692, + "step": 34037 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858010058396474e-05, + "loss": 0.9967, + "step": 34038 + }, + { + "epoch": 0.03, + "learning_rate": 4.985800169859548e-05, + "loss": 1.1445, + "step": 34039 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857993338549105e-05, + "loss": 1.0685, + "step": 34040 + }, + { + "epoch": 0.03, + "learning_rate": 4.985798497825733e-05, + "loss": 0.9293, + "step": 34041 + }, + { + "epoch": 0.03, + "learning_rate": 4.985797661772017e-05, + "loss": 0.8336, + "step": 34042 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857968256937634e-05, + "loss": 0.6361, + "step": 34043 + }, + { + "epoch": 0.03, + "learning_rate": 4.98579598959097e-05, + "loss": 0.3616, + "step": 34044 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857951534636383e-05, + "loss": 0.39, + "step": 34045 + }, + { + "epoch": 0.03, + "learning_rate": 4.985794317311768e-05, + "loss": 1.0298, + "step": 34046 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857934811353584e-05, + "loss": 1.0045, + "step": 34047 + }, + { + "epoch": 0.03, + "learning_rate": 4.985792644934411e-05, + "loss": 0.9766, + "step": 34048 + }, + { + "epoch": 0.03, + "learning_rate": 4.985791808708924e-05, + "loss": 0.8153, + "step": 34049 + }, + { + "epoch": 0.03, + "learning_rate": 4.985790972458899e-05, + "loss": 0.7792, + "step": 34050 + }, + { + "epoch": 0.03, + "learning_rate": 4.985790136184335e-05, + "loss": 1.2586, + "step": 34051 + }, + { + "epoch": 0.03, + "learning_rate": 4.985789299885232e-05, + "loss": 0.9309, + "step": 34052 + }, + { + "epoch": 0.03, + "learning_rate": 4.98578846356159e-05, + "loss": 1.3097, + "step": 34053 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857876272134105e-05, + "loss": 1.0393, + "step": 34054 + }, + { + "epoch": 0.03, + "learning_rate": 4.985786790840692e-05, + "loss": 1.2942, + "step": 34055 + }, + { + "epoch": 0.03, + "learning_rate": 4.985785954443435e-05, + "loss": 1.1817, + "step": 34056 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857851180216385e-05, + "loss": 0.9757, + "step": 34057 + }, + { + "epoch": 0.03, + "learning_rate": 4.985784281575304e-05, + "loss": 1.0618, + "step": 34058 + }, + { + "epoch": 0.03, + "learning_rate": 4.985783445104431e-05, + "loss": 1.2726, + "step": 34059 + }, + { + "epoch": 0.03, + "learning_rate": 4.985782608609019e-05, + "loss": 1.2011, + "step": 34060 + }, + { + "epoch": 0.03, + "learning_rate": 4.985781772089069e-05, + "loss": 0.967, + "step": 34061 + }, + { + "epoch": 0.03, + "learning_rate": 4.98578093554458e-05, + "loss": 1.0221, + "step": 34062 + }, + { + "epoch": 0.03, + "learning_rate": 4.985780098975553e-05, + "loss": 1.0006, + "step": 34063 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857792623819864e-05, + "loss": 0.8386, + "step": 34064 + }, + { + "epoch": 0.03, + "learning_rate": 4.985778425763882e-05, + "loss": 0.877, + "step": 34065 + }, + { + "epoch": 0.03, + "learning_rate": 4.985777589121239e-05, + "loss": 1.0558, + "step": 34066 + }, + { + "epoch": 0.03, + "learning_rate": 4.985776752454057e-05, + "loss": 1.2105, + "step": 34067 + }, + { + "epoch": 0.03, + "learning_rate": 4.985775915762336e-05, + "loss": 1.256, + "step": 34068 + }, + { + "epoch": 0.03, + "learning_rate": 4.985775079046078e-05, + "loss": 1.1192, + "step": 34069 + }, + { + "epoch": 0.03, + "learning_rate": 4.98577424230528e-05, + "loss": 0.9003, + "step": 34070 + }, + { + "epoch": 0.03, + "learning_rate": 4.985773405539944e-05, + "loss": 0.5294, + "step": 34071 + }, + { + "epoch": 0.03, + "learning_rate": 4.98577256875007e-05, + "loss": 0.4211, + "step": 34072 + }, + { + "epoch": 0.03, + "learning_rate": 4.985771731935657e-05, + "loss": 0.4752, + "step": 34073 + }, + { + "epoch": 0.03, + "learning_rate": 4.985770895096705e-05, + "loss": 1.1204, + "step": 34074 + }, + { + "epoch": 0.03, + "learning_rate": 4.985770058233216e-05, + "loss": 1.0566, + "step": 34075 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857692213451876e-05, + "loss": 1.0663, + "step": 34076 + }, + { + "epoch": 0.03, + "learning_rate": 4.985768384432621e-05, + "loss": 1.2969, + "step": 34077 + }, + { + "epoch": 0.03, + "learning_rate": 4.985767547495516e-05, + "loss": 1.3024, + "step": 34078 + }, + { + "epoch": 0.03, + "learning_rate": 4.985766710533871e-05, + "loss": 1.0548, + "step": 34079 + }, + { + "epoch": 0.03, + "learning_rate": 4.98576587354769e-05, + "loss": 1.0848, + "step": 34080 + }, + { + "epoch": 0.03, + "learning_rate": 4.985765036536969e-05, + "loss": 1.159, + "step": 34081 + }, + { + "epoch": 0.03, + "learning_rate": 4.98576419950171e-05, + "loss": 1.3518, + "step": 34082 + }, + { + "epoch": 0.03, + "learning_rate": 4.985763362441912e-05, + "loss": 0.7376, + "step": 34083 + }, + { + "epoch": 0.03, + "learning_rate": 4.985762525357577e-05, + "loss": 0.6289, + "step": 34084 + }, + { + "epoch": 0.03, + "learning_rate": 4.985761688248702e-05, + "loss": 0.8696, + "step": 34085 + }, + { + "epoch": 0.03, + "learning_rate": 4.98576085111529e-05, + "loss": 1.3835, + "step": 34086 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857600139573386e-05, + "loss": 0.7254, + "step": 34087 + }, + { + "epoch": 0.03, + "learning_rate": 4.985759176774849e-05, + "loss": 0.9754, + "step": 34088 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857583395678216e-05, + "loss": 0.9852, + "step": 34089 + }, + { + "epoch": 0.03, + "learning_rate": 4.985757502336256e-05, + "loss": 1.0626, + "step": 34090 + }, + { + "epoch": 0.03, + "learning_rate": 4.985756665080151e-05, + "loss": 1.0363, + "step": 34091 + }, + { + "epoch": 0.03, + "learning_rate": 4.985755827799508e-05, + "loss": 1.0363, + "step": 34092 + }, + { + "epoch": 0.03, + "learning_rate": 4.985754990494327e-05, + "loss": 0.9619, + "step": 34093 + }, + { + "epoch": 0.03, + "learning_rate": 4.985754153164607e-05, + "loss": 0.9903, + "step": 34094 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857533158103496e-05, + "loss": 1.1122, + "step": 34095 + }, + { + "epoch": 0.03, + "learning_rate": 4.985752478431553e-05, + "loss": 1.3097, + "step": 34096 + }, + { + "epoch": 0.03, + "learning_rate": 4.985751641028219e-05, + "loss": 1.2131, + "step": 34097 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857508036003464e-05, + "loss": 1.0639, + "step": 34098 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857499661479354e-05, + "loss": 1.123, + "step": 34099 + }, + { + "epoch": 0.03, + "learning_rate": 4.985749128670986e-05, + "loss": 1.2371, + "step": 34100 + }, + { + "epoch": 0.03, + "learning_rate": 4.985748291169499e-05, + "loss": 0.9314, + "step": 34101 + }, + { + "epoch": 0.03, + "learning_rate": 4.985747453643473e-05, + "loss": 1.0645, + "step": 34102 + }, + { + "epoch": 0.03, + "learning_rate": 4.985746616092909e-05, + "loss": 1.198, + "step": 34103 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857457785178065e-05, + "loss": 1.2635, + "step": 34104 + }, + { + "epoch": 0.03, + "learning_rate": 4.985744940918166e-05, + "loss": 0.4297, + "step": 34105 + }, + { + "epoch": 0.03, + "learning_rate": 4.985744103293987e-05, + "loss": 0.3384, + "step": 34106 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857432656452705e-05, + "loss": 0.766, + "step": 34107 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857424279720156e-05, + "loss": 1.0877, + "step": 34108 + }, + { + "epoch": 0.03, + "learning_rate": 4.985741590274222e-05, + "loss": 1.1859, + "step": 34109 + }, + { + "epoch": 0.03, + "learning_rate": 4.98574075255189e-05, + "loss": 1.2097, + "step": 34110 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857399148050204e-05, + "loss": 1.1592, + "step": 34111 + }, + { + "epoch": 0.03, + "learning_rate": 4.985739077033613e-05, + "loss": 1.091, + "step": 34112 + }, + { + "epoch": 0.03, + "learning_rate": 4.985738239237666e-05, + "loss": 1.1913, + "step": 34113 + }, + { + "epoch": 0.03, + "learning_rate": 4.985737401417182e-05, + "loss": 1.0178, + "step": 34114 + }, + { + "epoch": 0.03, + "learning_rate": 4.98573656357216e-05, + "loss": 1.0277, + "step": 34115 + }, + { + "epoch": 0.03, + "learning_rate": 4.985735725702599e-05, + "loss": 0.7696, + "step": 34116 + }, + { + "epoch": 0.03, + "learning_rate": 4.985734887808501e-05, + "loss": 0.8873, + "step": 34117 + }, + { + "epoch": 0.03, + "learning_rate": 4.985734049889864e-05, + "loss": 1.2984, + "step": 34118 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857332119466884e-05, + "loss": 0.8285, + "step": 34119 + }, + { + "epoch": 0.03, + "learning_rate": 4.985732373978976e-05, + "loss": 0.9298, + "step": 34120 + }, + { + "epoch": 0.03, + "learning_rate": 4.985731535986725e-05, + "loss": 1.191, + "step": 34121 + }, + { + "epoch": 0.03, + "learning_rate": 4.985730697969935e-05, + "loss": 1.1721, + "step": 34122 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857298599286085e-05, + "loss": 1.4129, + "step": 34123 + }, + { + "epoch": 0.03, + "learning_rate": 4.985729021862743e-05, + "loss": 1.312, + "step": 34124 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857281837723394e-05, + "loss": 1.1403, + "step": 34125 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857273456573975e-05, + "loss": 1.0886, + "step": 34126 + }, + { + "epoch": 0.03, + "learning_rate": 4.985726507517918e-05, + "loss": 0.8522, + "step": 34127 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857256693539e-05, + "loss": 1.3873, + "step": 34128 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857248311653446e-05, + "loss": 0.9957, + "step": 34129 + }, + { + "epoch": 0.03, + "learning_rate": 4.985723992952251e-05, + "loss": 0.7155, + "step": 34130 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857231547146197e-05, + "loss": 1.1852, + "step": 34131 + }, + { + "epoch": 0.03, + "learning_rate": 4.98572231645245e-05, + "loss": 0.9499, + "step": 34132 + }, + { + "epoch": 0.03, + "learning_rate": 4.985721478165742e-05, + "loss": 1.1504, + "step": 34133 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857206398544964e-05, + "loss": 1.0706, + "step": 34134 + }, + { + "epoch": 0.03, + "learning_rate": 4.985719801518712e-05, + "loss": 0.9297, + "step": 34135 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857189631583914e-05, + "loss": 0.7788, + "step": 34136 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857181247735306e-05, + "loss": 0.4967, + "step": 34137 + }, + { + "epoch": 0.03, + "learning_rate": 4.985717286364133e-05, + "loss": 0.4504, + "step": 34138 + }, + { + "epoch": 0.03, + "learning_rate": 4.985716447930198e-05, + "loss": 0.8724, + "step": 34139 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857156094717244e-05, + "loss": 0.7781, + "step": 34140 + }, + { + "epoch": 0.03, + "learning_rate": 4.985714770988713e-05, + "loss": 1.0526, + "step": 34141 + }, + { + "epoch": 0.03, + "learning_rate": 4.985713932481164e-05, + "loss": 1.1431, + "step": 34142 + }, + { + "epoch": 0.03, + "learning_rate": 4.985713093949076e-05, + "loss": 1.0364, + "step": 34143 + }, + { + "epoch": 0.03, + "learning_rate": 4.985712255392451e-05, + "loss": 1.0444, + "step": 34144 + }, + { + "epoch": 0.03, + "learning_rate": 4.985711416811288e-05, + "loss": 1.2504, + "step": 34145 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857105782055865e-05, + "loss": 1.3723, + "step": 34146 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857097395753484e-05, + "loss": 0.9566, + "step": 34147 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857089009205714e-05, + "loss": 1.0383, + "step": 34148 + }, + { + "epoch": 0.03, + "learning_rate": 4.985708062241257e-05, + "loss": 1.083, + "step": 34149 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857072235374035e-05, + "loss": 1.1264, + "step": 34150 + }, + { + "epoch": 0.03, + "learning_rate": 4.985706384809013e-05, + "loss": 1.1129, + "step": 34151 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857055460560854e-05, + "loss": 0.759, + "step": 34152 + }, + { + "epoch": 0.03, + "learning_rate": 4.985704707278619e-05, + "loss": 1.1998, + "step": 34153 + }, + { + "epoch": 0.03, + "learning_rate": 4.985703868476615e-05, + "loss": 1.1012, + "step": 34154 + }, + { + "epoch": 0.03, + "learning_rate": 4.985703029650074e-05, + "loss": 1.0915, + "step": 34155 + }, + { + "epoch": 0.03, + "learning_rate": 4.985702190798994e-05, + "loss": 0.8362, + "step": 34156 + }, + { + "epoch": 0.03, + "learning_rate": 4.985701351923377e-05, + "loss": 1.4004, + "step": 34157 + }, + { + "epoch": 0.03, + "learning_rate": 4.985700513023222e-05, + "loss": 1.291, + "step": 34158 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856996740985286e-05, + "loss": 0.7945, + "step": 34159 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856988351492985e-05, + "loss": 1.0423, + "step": 34160 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856979961755296e-05, + "loss": 0.8977, + "step": 34161 + }, + { + "epoch": 0.03, + "learning_rate": 4.985697157177224e-05, + "loss": 0.8891, + "step": 34162 + }, + { + "epoch": 0.03, + "learning_rate": 4.985696318154379e-05, + "loss": 0.663, + "step": 34163 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856954791069976e-05, + "loss": 1.3667, + "step": 34164 + }, + { + "epoch": 0.03, + "learning_rate": 4.985694640035078e-05, + "loss": 1.1049, + "step": 34165 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856938009386214e-05, + "loss": 1.0675, + "step": 34166 + }, + { + "epoch": 0.03, + "learning_rate": 4.985692961817626e-05, + "loss": 0.8174, + "step": 34167 + }, + { + "epoch": 0.03, + "learning_rate": 4.985692122672093e-05, + "loss": 0.8236, + "step": 34168 + }, + { + "epoch": 0.03, + "learning_rate": 4.985691283502023e-05, + "loss": 1.1985, + "step": 34169 + }, + { + "epoch": 0.03, + "learning_rate": 4.985690444307415e-05, + "loss": 0.5289, + "step": 34170 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856896050882695e-05, + "loss": 0.8096, + "step": 34171 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856887658445864e-05, + "loss": 1.0001, + "step": 34172 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856879265763644e-05, + "loss": 0.9507, + "step": 34173 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856870872836056e-05, + "loss": 0.7616, + "step": 34174 + }, + { + "epoch": 0.03, + "learning_rate": 4.98568624796631e-05, + "loss": 0.3165, + "step": 34175 + }, + { + "epoch": 0.03, + "learning_rate": 4.985685408624476e-05, + "loss": 0.3665, + "step": 34176 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856845692581046e-05, + "loss": 0.8369, + "step": 34177 + }, + { + "epoch": 0.03, + "learning_rate": 4.985683729867194e-05, + "loss": 0.9192, + "step": 34178 + }, + { + "epoch": 0.03, + "learning_rate": 4.985682890451748e-05, + "loss": 0.5573, + "step": 34179 + }, + { + "epoch": 0.03, + "learning_rate": 4.985682051011763e-05, + "loss": 0.7076, + "step": 34180 + }, + { + "epoch": 0.03, + "learning_rate": 4.985681211547241e-05, + "loss": 0.9464, + "step": 34181 + }, + { + "epoch": 0.03, + "learning_rate": 4.985680372058181e-05, + "loss": 0.8228, + "step": 34182 + }, + { + "epoch": 0.03, + "learning_rate": 4.985679532544584e-05, + "loss": 0.9316, + "step": 34183 + }, + { + "epoch": 0.03, + "learning_rate": 4.985678693006449e-05, + "loss": 1.3122, + "step": 34184 + }, + { + "epoch": 0.03, + "learning_rate": 4.985677853443776e-05, + "loss": 0.9066, + "step": 34185 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856770138565664e-05, + "loss": 1.1784, + "step": 34186 + }, + { + "epoch": 0.03, + "learning_rate": 4.985676174244819e-05, + "loss": 1.004, + "step": 34187 + }, + { + "epoch": 0.03, + "learning_rate": 4.985675334608534e-05, + "loss": 0.9785, + "step": 34188 + }, + { + "epoch": 0.03, + "learning_rate": 4.985674494947711e-05, + "loss": 1.1564, + "step": 34189 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856736552623504e-05, + "loss": 0.8512, + "step": 34190 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856728155524536e-05, + "loss": 1.2155, + "step": 34191 + }, + { + "epoch": 0.03, + "learning_rate": 4.985671975818018e-05, + "loss": 1.1769, + "step": 34192 + }, + { + "epoch": 0.03, + "learning_rate": 4.985671136059045e-05, + "loss": 1.1814, + "step": 34193 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856702962755346e-05, + "loss": 1.1414, + "step": 34194 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856694564674863e-05, + "loss": 1.2505, + "step": 34195 + }, + { + "epoch": 0.03, + "learning_rate": 4.985668616634902e-05, + "loss": 1.1978, + "step": 34196 + }, + { + "epoch": 0.03, + "learning_rate": 4.985667776777779e-05, + "loss": 1.0414, + "step": 34197 + }, + { + "epoch": 0.03, + "learning_rate": 4.985666936896119e-05, + "loss": 0.6639, + "step": 34198 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856660969899214e-05, + "loss": 0.4471, + "step": 34199 + }, + { + "epoch": 0.03, + "learning_rate": 4.985665257059186e-05, + "loss": 0.7835, + "step": 34200 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856644171039134e-05, + "loss": 0.874, + "step": 34201 + }, + { + "epoch": 0.03, + "learning_rate": 4.985663577124103e-05, + "loss": 1.0678, + "step": 34202 + }, + { + "epoch": 0.03, + "learning_rate": 4.985662737119756e-05, + "loss": 1.1088, + "step": 34203 + }, + { + "epoch": 0.03, + "learning_rate": 4.985661897090871e-05, + "loss": 1.064, + "step": 34204 + }, + { + "epoch": 0.03, + "learning_rate": 4.98566105703745e-05, + "loss": 0.8118, + "step": 34205 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856602169594893e-05, + "loss": 0.9566, + "step": 34206 + }, + { + "epoch": 0.03, + "learning_rate": 4.985659376856993e-05, + "loss": 1.026, + "step": 34207 + }, + { + "epoch": 0.03, + "learning_rate": 4.985658536729958e-05, + "loss": 0.5102, + "step": 34208 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856576965783864e-05, + "loss": 1.0517, + "step": 34209 + }, + { + "epoch": 0.03, + "learning_rate": 4.985656856402278e-05, + "loss": 0.9208, + "step": 34210 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856560162016305e-05, + "loss": 0.9962, + "step": 34211 + }, + { + "epoch": 0.03, + "learning_rate": 4.985655175976447e-05, + "loss": 1.1236, + "step": 34212 + }, + { + "epoch": 0.03, + "learning_rate": 4.985654335726726e-05, + "loss": 0.9571, + "step": 34213 + }, + { + "epoch": 0.03, + "learning_rate": 4.985653495452467e-05, + "loss": 1.1326, + "step": 34214 + }, + { + "epoch": 0.03, + "learning_rate": 4.985652655153672e-05, + "loss": 1.2767, + "step": 34215 + }, + { + "epoch": 0.03, + "learning_rate": 4.985651814830339e-05, + "loss": 1.0918, + "step": 34216 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856509744824684e-05, + "loss": 0.8839, + "step": 34217 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856501341100604e-05, + "loss": 0.9258, + "step": 34218 + }, + { + "epoch": 0.03, + "learning_rate": 4.985649293713115e-05, + "loss": 0.8017, + "step": 34219 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856484532916324e-05, + "loss": 0.9739, + "step": 34220 + }, + { + "epoch": 0.03, + "learning_rate": 4.985647612845613e-05, + "loss": 0.9212, + "step": 34221 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856467723750564e-05, + "loss": 1.295, + "step": 34222 + }, + { + "epoch": 0.03, + "learning_rate": 4.985645931879963e-05, + "loss": 1.0226, + "step": 34223 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856450913603316e-05, + "loss": 0.9896, + "step": 34224 + }, + { + "epoch": 0.03, + "learning_rate": 4.985644250816163e-05, + "loss": 0.933, + "step": 34225 + }, + { + "epoch": 0.03, + "learning_rate": 4.985643410247457e-05, + "loss": 1.1689, + "step": 34226 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856425696542136e-05, + "loss": 1.3671, + "step": 34227 + }, + { + "epoch": 0.03, + "learning_rate": 4.985641729036433e-05, + "loss": 1.4373, + "step": 34228 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856408883941155e-05, + "loss": 1.5735, + "step": 34229 + }, + { + "epoch": 0.03, + "learning_rate": 4.985640047727261e-05, + "loss": 1.0086, + "step": 34230 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856392070358694e-05, + "loss": 1.1646, + "step": 34231 + }, + { + "epoch": 0.03, + "learning_rate": 4.98563836631994e-05, + "loss": 0.8673, + "step": 34232 + }, + { + "epoch": 0.03, + "learning_rate": 4.985637525579474e-05, + "loss": 1.1534, + "step": 34233 + }, + { + "epoch": 0.03, + "learning_rate": 4.985636684814471e-05, + "loss": 1.0859, + "step": 34234 + }, + { + "epoch": 0.03, + "learning_rate": 4.98563584402493e-05, + "loss": 0.8189, + "step": 34235 + }, + { + "epoch": 0.03, + "learning_rate": 4.985635003210852e-05, + "loss": 1.0769, + "step": 34236 + }, + { + "epoch": 0.03, + "learning_rate": 4.985634162372237e-05, + "loss": 0.9307, + "step": 34237 + }, + { + "epoch": 0.03, + "learning_rate": 4.985633321509085e-05, + "loss": 0.6439, + "step": 34238 + }, + { + "epoch": 0.03, + "learning_rate": 4.985632480621396e-05, + "loss": 0.6983, + "step": 34239 + }, + { + "epoch": 0.03, + "learning_rate": 4.98563163970917e-05, + "loss": 0.6137, + "step": 34240 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856307987724074e-05, + "loss": 1.0349, + "step": 34241 + }, + { + "epoch": 0.03, + "learning_rate": 4.985629957811107e-05, + "loss": 0.9781, + "step": 34242 + }, + { + "epoch": 0.03, + "learning_rate": 4.985629116825269e-05, + "loss": 1.1246, + "step": 34243 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856282758148945e-05, + "loss": 0.8216, + "step": 34244 + }, + { + "epoch": 0.03, + "learning_rate": 4.985627434779983e-05, + "loss": 0.8708, + "step": 34245 + }, + { + "epoch": 0.03, + "learning_rate": 4.985626593720534e-05, + "loss": 1.1545, + "step": 34246 + }, + { + "epoch": 0.03, + "learning_rate": 4.985625752636548e-05, + "loss": 1.0479, + "step": 34247 + }, + { + "epoch": 0.03, + "learning_rate": 4.985624911528025e-05, + "loss": 0.9403, + "step": 34248 + }, + { + "epoch": 0.03, + "learning_rate": 4.985624070394965e-05, + "loss": 0.9316, + "step": 34249 + }, + { + "epoch": 0.03, + "learning_rate": 4.985623229237368e-05, + "loss": 0.9982, + "step": 34250 + }, + { + "epoch": 0.03, + "learning_rate": 4.985622388055235e-05, + "loss": 0.8623, + "step": 34251 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856215468485636e-05, + "loss": 1.1968, + "step": 34252 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856207056173556e-05, + "loss": 1.0425, + "step": 34253 + }, + { + "epoch": 0.03, + "learning_rate": 4.98561986436161e-05, + "loss": 0.911, + "step": 34254 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856190230813285e-05, + "loss": 1.2585, + "step": 34255 + }, + { + "epoch": 0.03, + "learning_rate": 4.98561818177651e-05, + "loss": 0.9833, + "step": 34256 + }, + { + "epoch": 0.03, + "learning_rate": 4.985617340447154e-05, + "loss": 1.0908, + "step": 34257 + }, + { + "epoch": 0.03, + "learning_rate": 4.985616499093261e-05, + "loss": 0.4708, + "step": 34258 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856156577148306e-05, + "loss": 1.5452, + "step": 34259 + }, + { + "epoch": 0.03, + "learning_rate": 4.985614816311864e-05, + "loss": 1.3441, + "step": 34260 + }, + { + "epoch": 0.03, + "learning_rate": 4.98561397488436e-05, + "loss": 1.1102, + "step": 34261 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856131334323196e-05, + "loss": 1.3881, + "step": 34262 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856122919557425e-05, + "loss": 1.0704, + "step": 34263 + }, + { + "epoch": 0.03, + "learning_rate": 4.985611450454628e-05, + "loss": 1.3388, + "step": 34264 + }, + { + "epoch": 0.03, + "learning_rate": 4.985610608928976e-05, + "loss": 1.1653, + "step": 34265 + }, + { + "epoch": 0.03, + "learning_rate": 4.985609767378788e-05, + "loss": 1.3242, + "step": 34266 + }, + { + "epoch": 0.03, + "learning_rate": 4.985608925804063e-05, + "loss": 1.4148, + "step": 34267 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856080842048e-05, + "loss": 1.2191, + "step": 34268 + }, + { + "epoch": 0.03, + "learning_rate": 4.985607242581002e-05, + "loss": 1.1026, + "step": 34269 + }, + { + "epoch": 0.03, + "learning_rate": 4.985606400932665e-05, + "loss": 1.3693, + "step": 34270 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856055592597927e-05, + "loss": 1.3435, + "step": 34271 + }, + { + "epoch": 0.03, + "learning_rate": 4.985604717562383e-05, + "loss": 1.2944, + "step": 34272 + }, + { + "epoch": 0.03, + "learning_rate": 4.985603875840437e-05, + "loss": 1.6247, + "step": 34273 + }, + { + "epoch": 0.03, + "learning_rate": 4.9856030340939544e-05, + "loss": 1.8307, + "step": 34274 + }, + { + "epoch": 0.03, + "learning_rate": 4.985602192322934e-05, + "loss": 1.1555, + "step": 34275 + }, + { + "epoch": 0.03, + "learning_rate": 4.985601350527377e-05, + "loss": 0.8411, + "step": 34276 + }, + { + "epoch": 0.03, + "learning_rate": 4.985600508707283e-05, + "loss": 0.9162, + "step": 34277 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855996668626525e-05, + "loss": 0.9209, + "step": 34278 + }, + { + "epoch": 0.03, + "learning_rate": 4.985598824993485e-05, + "loss": 0.5336, + "step": 34279 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855979830997815e-05, + "loss": 1.0009, + "step": 34280 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855971411815404e-05, + "loss": 1.2116, + "step": 34281 + }, + { + "epoch": 0.03, + "learning_rate": 4.985596299238763e-05, + "loss": 0.9517, + "step": 34282 + }, + { + "epoch": 0.03, + "learning_rate": 4.985595457271448e-05, + "loss": 0.9045, + "step": 34283 + }, + { + "epoch": 0.03, + "learning_rate": 4.985594615279597e-05, + "loss": 0.6312, + "step": 34284 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855937732632094e-05, + "loss": 0.8536, + "step": 34285 + }, + { + "epoch": 0.03, + "learning_rate": 4.985592931222285e-05, + "loss": 0.8172, + "step": 34286 + }, + { + "epoch": 0.03, + "learning_rate": 4.985592089156824e-05, + "loss": 0.4302, + "step": 34287 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855912470668254e-05, + "loss": 0.1916, + "step": 34288 + }, + { + "epoch": 0.03, + "learning_rate": 4.985590404952291e-05, + "loss": 0.7564, + "step": 34289 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855895628132194e-05, + "loss": 0.3074, + "step": 34290 + }, + { + "epoch": 0.03, + "learning_rate": 4.985588720649611e-05, + "loss": 0.8341, + "step": 34291 + }, + { + "epoch": 0.03, + "learning_rate": 4.985587878461466e-05, + "loss": 1.4017, + "step": 34292 + }, + { + "epoch": 0.03, + "learning_rate": 4.985587036248785e-05, + "loss": 0.6299, + "step": 34293 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855861940115666e-05, + "loss": 0.3603, + "step": 34294 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855853517498116e-05, + "loss": 0.3237, + "step": 34295 + }, + { + "epoch": 0.03, + "learning_rate": 4.98558450946352e-05, + "loss": 0.8422, + "step": 34296 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855836671526924e-05, + "loss": 1.0106, + "step": 34297 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855828248173275e-05, + "loss": 1.1143, + "step": 34298 + }, + { + "epoch": 0.03, + "learning_rate": 4.985581982457426e-05, + "loss": 1.0248, + "step": 34299 + }, + { + "epoch": 0.03, + "learning_rate": 4.985581140072988e-05, + "loss": 0.9387, + "step": 34300 + }, + { + "epoch": 0.03, + "learning_rate": 4.985580297664013e-05, + "loss": 0.728, + "step": 34301 + }, + { + "epoch": 0.03, + "learning_rate": 4.985579455230502e-05, + "loss": 0.7976, + "step": 34302 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855786127724545e-05, + "loss": 1.0563, + "step": 34303 + }, + { + "epoch": 0.03, + "learning_rate": 4.98557777028987e-05, + "loss": 1.0928, + "step": 34304 + }, + { + "epoch": 0.03, + "learning_rate": 4.985576927782749e-05, + "loss": 1.0562, + "step": 34305 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855760852510915e-05, + "loss": 1.0957, + "step": 34306 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855752426948976e-05, + "loss": 0.6408, + "step": 34307 + }, + { + "epoch": 0.03, + "learning_rate": 4.985574400114167e-05, + "loss": 0.7327, + "step": 34308 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855735575088995e-05, + "loss": 0.8643, + "step": 34309 + }, + { + "epoch": 0.03, + "learning_rate": 4.985572714879095e-05, + "loss": 1.6271, + "step": 34310 + }, + { + "epoch": 0.03, + "learning_rate": 4.985571872224756e-05, + "loss": 1.1626, + "step": 34311 + }, + { + "epoch": 0.03, + "learning_rate": 4.985571029545879e-05, + "loss": 0.7844, + "step": 34312 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855701868424656e-05, + "loss": 0.9146, + "step": 34313 + }, + { + "epoch": 0.03, + "learning_rate": 4.985569344114516e-05, + "loss": 0.8226, + "step": 34314 + }, + { + "epoch": 0.03, + "learning_rate": 4.985568501362029e-05, + "loss": 1.0037, + "step": 34315 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855676585850066e-05, + "loss": 0.835, + "step": 34316 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855668157834476e-05, + "loss": 0.3416, + "step": 34317 + }, + { + "epoch": 0.03, + "learning_rate": 4.985565972957352e-05, + "loss": 0.7585, + "step": 34318 + }, + { + "epoch": 0.03, + "learning_rate": 4.98556513010672e-05, + "loss": 1.0998, + "step": 34319 + }, + { + "epoch": 0.03, + "learning_rate": 4.985564287231551e-05, + "loss": 1.0285, + "step": 34320 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855634443318454e-05, + "loss": 0.9299, + "step": 34321 + }, + { + "epoch": 0.03, + "learning_rate": 4.985562601407604e-05, + "loss": 1.0964, + "step": 34322 + }, + { + "epoch": 0.03, + "learning_rate": 4.985561758458826e-05, + "loss": 0.8882, + "step": 34323 + }, + { + "epoch": 0.03, + "learning_rate": 4.985560915485512e-05, + "loss": 0.9216, + "step": 34324 + }, + { + "epoch": 0.03, + "learning_rate": 4.985560072487662e-05, + "loss": 1.3295, + "step": 34325 + }, + { + "epoch": 0.03, + "learning_rate": 4.985559229465274e-05, + "loss": 0.8855, + "step": 34326 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855583864183505e-05, + "loss": 0.8746, + "step": 34327 + }, + { + "epoch": 0.03, + "learning_rate": 4.98555754334689e-05, + "loss": 1.1478, + "step": 34328 + }, + { + "epoch": 0.03, + "learning_rate": 4.985556700250894e-05, + "loss": 0.6992, + "step": 34329 + }, + { + "epoch": 0.03, + "learning_rate": 4.985555857130362e-05, + "loss": 0.9328, + "step": 34330 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855550139852926e-05, + "loss": 0.8405, + "step": 34331 + }, + { + "epoch": 0.03, + "learning_rate": 4.985554170815687e-05, + "loss": 0.5517, + "step": 34332 + }, + { + "epoch": 0.03, + "learning_rate": 4.985553327621545e-05, + "loss": 1.3978, + "step": 34333 + }, + { + "epoch": 0.03, + "learning_rate": 4.985552484402868e-05, + "loss": 0.9248, + "step": 34334 + }, + { + "epoch": 0.03, + "learning_rate": 4.985551641159653e-05, + "loss": 0.9484, + "step": 34335 + }, + { + "epoch": 0.03, + "learning_rate": 4.985550797891902e-05, + "loss": 0.7576, + "step": 34336 + }, + { + "epoch": 0.03, + "learning_rate": 4.985549954599615e-05, + "loss": 1.1318, + "step": 34337 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855491112827925e-05, + "loss": 0.7991, + "step": 34338 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855482679414324e-05, + "loss": 1.0941, + "step": 34339 + }, + { + "epoch": 0.03, + "learning_rate": 4.985547424575536e-05, + "loss": 1.0493, + "step": 34340 + }, + { + "epoch": 0.03, + "learning_rate": 4.985546581185104e-05, + "loss": 0.9269, + "step": 34341 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855457377701356e-05, + "loss": 1.0096, + "step": 34342 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855448943306314e-05, + "loss": 1.0721, + "step": 34343 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855440508665904e-05, + "loss": 0.8061, + "step": 34344 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855432073780125e-05, + "loss": 0.813, + "step": 34345 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855423638649e-05, + "loss": 0.804, + "step": 34346 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855415203272496e-05, + "loss": 1.4011, + "step": 34347 + }, + { + "epoch": 0.03, + "learning_rate": 4.985540676765064e-05, + "loss": 1.5175, + "step": 34348 + }, + { + "epoch": 0.03, + "learning_rate": 4.985539833178342e-05, + "loss": 0.9888, + "step": 34349 + }, + { + "epoch": 0.03, + "learning_rate": 4.985538989567083e-05, + "loss": 1.0581, + "step": 34350 + }, + { + "epoch": 0.03, + "learning_rate": 4.985538145931289e-05, + "loss": 1.1492, + "step": 34351 + }, + { + "epoch": 0.03, + "learning_rate": 4.985537302270959e-05, + "loss": 1.1928, + "step": 34352 + }, + { + "epoch": 0.03, + "learning_rate": 4.985536458586092e-05, + "loss": 0.6623, + "step": 34353 + }, + { + "epoch": 0.03, + "learning_rate": 4.985535614876689e-05, + "loss": 1.0827, + "step": 34354 + }, + { + "epoch": 0.03, + "learning_rate": 4.985534771142749e-05, + "loss": 0.6672, + "step": 34355 + }, + { + "epoch": 0.03, + "learning_rate": 4.985533927384274e-05, + "loss": 1.0316, + "step": 34356 + }, + { + "epoch": 0.03, + "learning_rate": 4.985533083601263e-05, + "loss": 0.9444, + "step": 34357 + }, + { + "epoch": 0.03, + "learning_rate": 4.985532239793715e-05, + "loss": 0.8993, + "step": 34358 + }, + { + "epoch": 0.03, + "learning_rate": 4.985531395961631e-05, + "loss": 0.9944, + "step": 34359 + }, + { + "epoch": 0.03, + "learning_rate": 4.985530552105011e-05, + "loss": 0.6894, + "step": 34360 + }, + { + "epoch": 0.03, + "learning_rate": 4.985529708223855e-05, + "loss": 0.5894, + "step": 34361 + }, + { + "epoch": 0.03, + "learning_rate": 4.985528864318163e-05, + "loss": 0.8135, + "step": 34362 + }, + { + "epoch": 0.03, + "learning_rate": 4.985528020387935e-05, + "loss": 0.6002, + "step": 34363 + }, + { + "epoch": 0.03, + "learning_rate": 4.985527176433171e-05, + "loss": 0.2754, + "step": 34364 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855263324538704e-05, + "loss": 0.5544, + "step": 34365 + }, + { + "epoch": 0.03, + "learning_rate": 4.985525488450035e-05, + "loss": 0.5523, + "step": 34366 + }, + { + "epoch": 0.03, + "learning_rate": 4.985524644421662e-05, + "loss": 1.1401, + "step": 34367 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855238003687535e-05, + "loss": 1.0845, + "step": 34368 + }, + { + "epoch": 0.03, + "learning_rate": 4.985522956291309e-05, + "loss": 1.5092, + "step": 34369 + }, + { + "epoch": 0.03, + "learning_rate": 4.985522112189328e-05, + "loss": 0.8884, + "step": 34370 + }, + { + "epoch": 0.03, + "learning_rate": 4.985521268062812e-05, + "loss": 1.0539, + "step": 34371 + }, + { + "epoch": 0.03, + "learning_rate": 4.985520423911759e-05, + "loss": 0.9563, + "step": 34372 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855195797361705e-05, + "loss": 0.941, + "step": 34373 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855187355360457e-05, + "loss": 1.2124, + "step": 34374 + }, + { + "epoch": 0.03, + "learning_rate": 4.985517891311385e-05, + "loss": 1.1158, + "step": 34375 + }, + { + "epoch": 0.03, + "learning_rate": 4.985517047062188e-05, + "loss": 1.053, + "step": 34376 + }, + { + "epoch": 0.03, + "learning_rate": 4.985516202788456e-05, + "loss": 1.1563, + "step": 34377 + }, + { + "epoch": 0.03, + "learning_rate": 4.985515358490187e-05, + "loss": 1.1935, + "step": 34378 + }, + { + "epoch": 0.03, + "learning_rate": 4.985514514167383e-05, + "loss": 1.0544, + "step": 34379 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855136698200424e-05, + "loss": 1.1916, + "step": 34380 + }, + { + "epoch": 0.03, + "learning_rate": 4.985512825448166e-05, + "loss": 1.3111, + "step": 34381 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855119810517535e-05, + "loss": 1.2643, + "step": 34382 + }, + { + "epoch": 0.03, + "learning_rate": 4.985511136630805e-05, + "loss": 1.0434, + "step": 34383 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855102921853206e-05, + "loss": 0.9914, + "step": 34384 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855094477153005e-05, + "loss": 1.0398, + "step": 34385 + }, + { + "epoch": 0.03, + "learning_rate": 4.985508603220745e-05, + "loss": 1.2846, + "step": 34386 + }, + { + "epoch": 0.03, + "learning_rate": 4.985507758701653e-05, + "loss": 0.9126, + "step": 34387 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855069141580254e-05, + "loss": 0.941, + "step": 34388 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855060695898614e-05, + "loss": 1.1884, + "step": 34389 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855052249971625e-05, + "loss": 1.126, + "step": 34390 + }, + { + "epoch": 0.03, + "learning_rate": 4.985504380379926e-05, + "loss": 1.3957, + "step": 34391 + }, + { + "epoch": 0.03, + "learning_rate": 4.985503535738155e-05, + "loss": 1.042, + "step": 34392 + }, + { + "epoch": 0.03, + "learning_rate": 4.985502691071848e-05, + "loss": 1.1398, + "step": 34393 + }, + { + "epoch": 0.03, + "learning_rate": 4.985501846381005e-05, + "loss": 1.1774, + "step": 34394 + }, + { + "epoch": 0.03, + "learning_rate": 4.985501001665627e-05, + "loss": 1.4098, + "step": 34395 + }, + { + "epoch": 0.03, + "learning_rate": 4.985500156925712e-05, + "loss": 0.8969, + "step": 34396 + }, + { + "epoch": 0.03, + "learning_rate": 4.985499312161262e-05, + "loss": 0.9181, + "step": 34397 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854984673722755e-05, + "loss": 0.9174, + "step": 34398 + }, + { + "epoch": 0.03, + "learning_rate": 4.985497622558754e-05, + "loss": 0.814, + "step": 34399 + }, + { + "epoch": 0.03, + "learning_rate": 4.985496777720696e-05, + "loss": 1.3122, + "step": 34400 + }, + { + "epoch": 0.03, + "learning_rate": 4.985495932858102e-05, + "loss": 1.3095, + "step": 34401 + }, + { + "epoch": 0.03, + "learning_rate": 4.985495087970973e-05, + "loss": 1.3991, + "step": 34402 + }, + { + "epoch": 0.03, + "learning_rate": 4.985494243059308e-05, + "loss": 1.1026, + "step": 34403 + }, + { + "epoch": 0.03, + "learning_rate": 4.985493398123107e-05, + "loss": 1.265, + "step": 34404 + }, + { + "epoch": 0.03, + "learning_rate": 4.98549255316237e-05, + "loss": 0.7274, + "step": 34405 + }, + { + "epoch": 0.03, + "learning_rate": 4.985491708177098e-05, + "loss": 0.8237, + "step": 34406 + }, + { + "epoch": 0.03, + "learning_rate": 4.98549086316729e-05, + "loss": 0.619, + "step": 34407 + }, + { + "epoch": 0.03, + "learning_rate": 4.985490018132946e-05, + "loss": 1.1602, + "step": 34408 + }, + { + "epoch": 0.03, + "learning_rate": 4.985489173074067e-05, + "loss": 0.9617, + "step": 34409 + }, + { + "epoch": 0.03, + "learning_rate": 4.985488327990652e-05, + "loss": 0.8076, + "step": 34410 + }, + { + "epoch": 0.03, + "learning_rate": 4.985487482882701e-05, + "loss": 0.7022, + "step": 34411 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854866377502144e-05, + "loss": 1.0199, + "step": 34412 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854857925931917e-05, + "loss": 1.1818, + "step": 34413 + }, + { + "epoch": 0.03, + "learning_rate": 4.985484947411634e-05, + "loss": 0.9537, + "step": 34414 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854841022055404e-05, + "loss": 0.791, + "step": 34415 + }, + { + "epoch": 0.03, + "learning_rate": 4.985483256974911e-05, + "loss": 0.618, + "step": 34416 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854824117197465e-05, + "loss": 0.909, + "step": 34417 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854815664400464e-05, + "loss": 1.1024, + "step": 34418 + }, + { + "epoch": 0.03, + "learning_rate": 4.98548072113581e-05, + "loss": 1.1297, + "step": 34419 + }, + { + "epoch": 0.03, + "learning_rate": 4.985479875807038e-05, + "loss": 1.1283, + "step": 34420 + }, + { + "epoch": 0.03, + "learning_rate": 4.985479030453731e-05, + "loss": 0.9951, + "step": 34421 + }, + { + "epoch": 0.03, + "learning_rate": 4.985478185075888e-05, + "loss": 1.303, + "step": 34422 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854773396735096e-05, + "loss": 0.8492, + "step": 34423 + }, + { + "epoch": 0.03, + "learning_rate": 4.985476494246595e-05, + "loss": 0.8708, + "step": 34424 + }, + { + "epoch": 0.03, + "learning_rate": 4.985475648795146e-05, + "loss": 1.1991, + "step": 34425 + }, + { + "epoch": 0.03, + "learning_rate": 4.98547480331916e-05, + "loss": 1.0628, + "step": 34426 + }, + { + "epoch": 0.03, + "learning_rate": 4.985473957818639e-05, + "loss": 1.0638, + "step": 34427 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854731122935835e-05, + "loss": 0.9732, + "step": 34428 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854722667439915e-05, + "loss": 0.7582, + "step": 34429 + }, + { + "epoch": 0.03, + "learning_rate": 4.985471421169864e-05, + "loss": 0.876, + "step": 34430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854705755712004e-05, + "loss": 1.0918, + "step": 34431 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854697299480026e-05, + "loss": 0.9135, + "step": 34432 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854688843002686e-05, + "loss": 1.0965, + "step": 34433 + }, + { + "epoch": 0.03, + "learning_rate": 4.985468038627999e-05, + "loss": 1.1302, + "step": 34434 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854671929311935e-05, + "loss": 0.6638, + "step": 34435 + }, + { + "epoch": 0.03, + "learning_rate": 4.985466347209854e-05, + "loss": 0.7862, + "step": 34436 + }, + { + "epoch": 0.03, + "learning_rate": 4.985465501463977e-05, + "loss": 0.717, + "step": 34437 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854646556935657e-05, + "loss": 0.9856, + "step": 34438 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854638098986194e-05, + "loss": 0.9981, + "step": 34439 + }, + { + "epoch": 0.03, + "learning_rate": 4.985462964079137e-05, + "loss": 1.201, + "step": 34440 + }, + { + "epoch": 0.03, + "learning_rate": 4.985462118235119e-05, + "loss": 0.9518, + "step": 34441 + }, + { + "epoch": 0.03, + "learning_rate": 4.985461272366566e-05, + "loss": 0.7803, + "step": 34442 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854604264734774e-05, + "loss": 1.148, + "step": 34443 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854595805558536e-05, + "loss": 1.0653, + "step": 34444 + }, + { + "epoch": 0.03, + "learning_rate": 4.985458734613694e-05, + "loss": 0.9986, + "step": 34445 + }, + { + "epoch": 0.03, + "learning_rate": 4.985457888646999e-05, + "loss": 0.6619, + "step": 34446 + }, + { + "epoch": 0.03, + "learning_rate": 4.985457042655769e-05, + "loss": 0.9033, + "step": 34447 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854561966400024e-05, + "loss": 0.7544, + "step": 34448 + }, + { + "epoch": 0.03, + "learning_rate": 4.985455350599702e-05, + "loss": 0.9976, + "step": 34449 + }, + { + "epoch": 0.03, + "learning_rate": 4.985454504534866e-05, + "loss": 1.0565, + "step": 34450 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854536584454936e-05, + "loss": 1.3199, + "step": 34451 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854528123315866e-05, + "loss": 0.6467, + "step": 34452 + }, + { + "epoch": 0.03, + "learning_rate": 4.985451966193144e-05, + "loss": 0.5988, + "step": 34453 + }, + { + "epoch": 0.03, + "learning_rate": 4.985451120030167e-05, + "loss": 1.1813, + "step": 34454 + }, + { + "epoch": 0.03, + "learning_rate": 4.985450273842653e-05, + "loss": 0.8955, + "step": 34455 + }, + { + "epoch": 0.03, + "learning_rate": 4.985449427630605e-05, + "loss": 0.893, + "step": 34456 + }, + { + "epoch": 0.03, + "learning_rate": 4.985448581394021e-05, + "loss": 0.8624, + "step": 34457 + }, + { + "epoch": 0.03, + "learning_rate": 4.985447735132902e-05, + "loss": 1.3402, + "step": 34458 + }, + { + "epoch": 0.03, + "learning_rate": 4.985446888847248e-05, + "loss": 0.8585, + "step": 34459 + }, + { + "epoch": 0.03, + "learning_rate": 4.985446042537059e-05, + "loss": 1.048, + "step": 34460 + }, + { + "epoch": 0.03, + "learning_rate": 4.985445196202333e-05, + "loss": 0.9036, + "step": 34461 + }, + { + "epoch": 0.03, + "learning_rate": 4.985444349843073e-05, + "loss": 1.3569, + "step": 34462 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854435034592775e-05, + "loss": 1.221, + "step": 34463 + }, + { + "epoch": 0.03, + "learning_rate": 4.985442657050947e-05, + "loss": 1.0226, + "step": 34464 + }, + { + "epoch": 0.03, + "learning_rate": 4.985441810618081e-05, + "loss": 0.9111, + "step": 34465 + }, + { + "epoch": 0.03, + "learning_rate": 4.98544096416068e-05, + "loss": 0.9322, + "step": 34466 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854401176787436e-05, + "loss": 1.0081, + "step": 34467 + }, + { + "epoch": 0.03, + "learning_rate": 4.985439271172272e-05, + "loss": 1.0926, + "step": 34468 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854384246412656e-05, + "loss": 0.8183, + "step": 34469 + }, + { + "epoch": 0.03, + "learning_rate": 4.985437578085723e-05, + "loss": 0.4543, + "step": 34470 + }, + { + "epoch": 0.03, + "learning_rate": 4.985436731505646e-05, + "loss": 0.7401, + "step": 34471 + }, + { + "epoch": 0.03, + "learning_rate": 4.985435884901034e-05, + "loss": 1.1046, + "step": 34472 + }, + { + "epoch": 0.03, + "learning_rate": 4.985435038271886e-05, + "loss": 1.1276, + "step": 34473 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854341916182034e-05, + "loss": 1.1131, + "step": 34474 + }, + { + "epoch": 0.03, + "learning_rate": 4.985433344939986e-05, + "loss": 0.7169, + "step": 34475 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854324982372326e-05, + "loss": 0.5857, + "step": 34476 + }, + { + "epoch": 0.03, + "learning_rate": 4.985431651509944e-05, + "loss": 0.9316, + "step": 34477 + }, + { + "epoch": 0.03, + "learning_rate": 4.985430804758121e-05, + "loss": 1.2391, + "step": 34478 + }, + { + "epoch": 0.03, + "learning_rate": 4.985429957981763e-05, + "loss": 0.9589, + "step": 34479 + }, + { + "epoch": 0.03, + "learning_rate": 4.985429111180869e-05, + "loss": 0.9729, + "step": 34480 + }, + { + "epoch": 0.03, + "learning_rate": 4.985428264355441e-05, + "loss": 0.7087, + "step": 34481 + }, + { + "epoch": 0.03, + "learning_rate": 4.985427417505477e-05, + "loss": 0.9523, + "step": 34482 + }, + { + "epoch": 0.03, + "learning_rate": 4.985426570630978e-05, + "loss": 1.7054, + "step": 34483 + }, + { + "epoch": 0.03, + "learning_rate": 4.985425723731944e-05, + "loss": 0.7755, + "step": 34484 + }, + { + "epoch": 0.03, + "learning_rate": 4.985424876808375e-05, + "loss": 1.0261, + "step": 34485 + }, + { + "epoch": 0.03, + "learning_rate": 4.985424029860271e-05, + "loss": 0.7073, + "step": 34486 + }, + { + "epoch": 0.03, + "learning_rate": 4.985423182887632e-05, + "loss": 0.9958, + "step": 34487 + }, + { + "epoch": 0.03, + "learning_rate": 4.985422335890458e-05, + "loss": 1.1362, + "step": 34488 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854214888687486e-05, + "loss": 0.9782, + "step": 34489 + }, + { + "epoch": 0.03, + "learning_rate": 4.985420641822504e-05, + "loss": 1.3148, + "step": 34490 + }, + { + "epoch": 0.03, + "learning_rate": 4.985419794751725e-05, + "loss": 1.004, + "step": 34491 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854189476564105e-05, + "loss": 0.7481, + "step": 34492 + }, + { + "epoch": 0.03, + "learning_rate": 4.985418100536561e-05, + "loss": 0.9865, + "step": 34493 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854172533921775e-05, + "loss": 1.1892, + "step": 34494 + }, + { + "epoch": 0.03, + "learning_rate": 4.985416406223258e-05, + "loss": 1.218, + "step": 34495 + }, + { + "epoch": 0.03, + "learning_rate": 4.985415559029804e-05, + "loss": 1.1642, + "step": 34496 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854147118118144e-05, + "loss": 1.4681, + "step": 34497 + }, + { + "epoch": 0.03, + "learning_rate": 4.98541386456929e-05, + "loss": 1.2973, + "step": 34498 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854130173022305e-05, + "loss": 0.5564, + "step": 34499 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854121700106373e-05, + "loss": 0.9956, + "step": 34500 + }, + { + "epoch": 0.03, + "eval_loss": 1.070044755935669, + "eval_runtime": 128.0614, + "eval_samples_per_second": 10.815, + "eval_steps_per_second": 5.411, + "step": 34500 + }, + { + "epoch": 0.03, + "learning_rate": 4.985411322694507e-05, + "loss": 0.9002, + "step": 34501 + }, + { + "epoch": 0.03, + "learning_rate": 4.985410475353843e-05, + "loss": 1.0467, + "step": 34502 + }, + { + "epoch": 0.03, + "learning_rate": 4.985409627988645e-05, + "loss": 0.806, + "step": 34503 + }, + { + "epoch": 0.03, + "learning_rate": 4.985408780598911e-05, + "loss": 0.9523, + "step": 34504 + }, + { + "epoch": 0.03, + "learning_rate": 4.985407933184642e-05, + "loss": 1.0966, + "step": 34505 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854070857458383e-05, + "loss": 1.0425, + "step": 34506 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854062382824994e-05, + "loss": 1.2458, + "step": 34507 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854053907946264e-05, + "loss": 1.0693, + "step": 34508 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854045432822185e-05, + "loss": 0.8901, + "step": 34509 + }, + { + "epoch": 0.03, + "learning_rate": 4.985403695745275e-05, + "loss": 0.5852, + "step": 34510 + }, + { + "epoch": 0.03, + "learning_rate": 4.985402848183797e-05, + "loss": 1.2944, + "step": 34511 + }, + { + "epoch": 0.03, + "learning_rate": 4.985402000597784e-05, + "loss": 0.6807, + "step": 34512 + }, + { + "epoch": 0.03, + "learning_rate": 4.985401152987236e-05, + "loss": 0.368, + "step": 34513 + }, + { + "epoch": 0.03, + "learning_rate": 4.9854003053521534e-05, + "loss": 0.8308, + "step": 34514 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853994576925366e-05, + "loss": 1.1826, + "step": 34515 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853986100083843e-05, + "loss": 1.1465, + "step": 34516 + }, + { + "epoch": 0.03, + "learning_rate": 4.985397762299697e-05, + "loss": 0.9345, + "step": 34517 + }, + { + "epoch": 0.03, + "learning_rate": 4.985396914566476e-05, + "loss": 1.1336, + "step": 34518 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853960668087186e-05, + "loss": 1.0758, + "step": 34519 + }, + { + "epoch": 0.03, + "learning_rate": 4.985395219026428e-05, + "loss": 1.0667, + "step": 34520 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853943712196013e-05, + "loss": 1.264, + "step": 34521 + }, + { + "epoch": 0.03, + "learning_rate": 4.98539352338824e-05, + "loss": 0.8498, + "step": 34522 + }, + { + "epoch": 0.03, + "learning_rate": 4.985392675532345e-05, + "loss": 1.0258, + "step": 34523 + }, + { + "epoch": 0.03, + "learning_rate": 4.985391827651914e-05, + "loss": 1.319, + "step": 34524 + }, + { + "epoch": 0.03, + "learning_rate": 4.985390979746949e-05, + "loss": 0.6629, + "step": 34525 + }, + { + "epoch": 0.03, + "learning_rate": 4.985390131817449e-05, + "loss": 1.2153, + "step": 34526 + }, + { + "epoch": 0.03, + "learning_rate": 4.985389283863415e-05, + "loss": 1.1488, + "step": 34527 + }, + { + "epoch": 0.03, + "learning_rate": 4.985388435884846e-05, + "loss": 1.4977, + "step": 34528 + }, + { + "epoch": 0.03, + "learning_rate": 4.985387587881741e-05, + "loss": 1.187, + "step": 34529 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853867398541026e-05, + "loss": 1.0183, + "step": 34530 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853858918019295e-05, + "loss": 1.2348, + "step": 34531 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853850437252214e-05, + "loss": 1.1398, + "step": 34532 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853841956239786e-05, + "loss": 0.9759, + "step": 34533 + }, + { + "epoch": 0.03, + "learning_rate": 4.985383347498201e-05, + "loss": 1.4542, + "step": 34534 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853824993478885e-05, + "loss": 1.2052, + "step": 34535 + }, + { + "epoch": 0.03, + "learning_rate": 4.985381651173042e-05, + "loss": 1.1986, + "step": 34536 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853808029736605e-05, + "loss": 1.0271, + "step": 34537 + }, + { + "epoch": 0.03, + "learning_rate": 4.985379954749745e-05, + "loss": 1.5995, + "step": 34538 + }, + { + "epoch": 0.03, + "learning_rate": 4.985379106501294e-05, + "loss": 0.966, + "step": 34539 + }, + { + "epoch": 0.03, + "learning_rate": 4.985378258228309e-05, + "loss": 0.9658, + "step": 34540 + }, + { + "epoch": 0.03, + "learning_rate": 4.985377409930789e-05, + "loss": 0.8953, + "step": 34541 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853765616087345e-05, + "loss": 1.0207, + "step": 34542 + }, + { + "epoch": 0.03, + "learning_rate": 4.985375713262145e-05, + "loss": 0.9769, + "step": 34543 + }, + { + "epoch": 0.03, + "learning_rate": 4.985374864891022e-05, + "loss": 0.8908, + "step": 34544 + }, + { + "epoch": 0.03, + "learning_rate": 4.985374016495363e-05, + "loss": 1.0936, + "step": 34545 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853731680751704e-05, + "loss": 1.1127, + "step": 34546 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853723196304435e-05, + "loss": 0.7548, + "step": 34547 + }, + { + "epoch": 0.03, + "learning_rate": 4.985371471161181e-05, + "loss": 0.9137, + "step": 34548 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853706226673846e-05, + "loss": 0.8141, + "step": 34549 + }, + { + "epoch": 0.03, + "learning_rate": 4.985369774149054e-05, + "loss": 1.2328, + "step": 34550 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853689256061885e-05, + "loss": 1.1128, + "step": 34551 + }, + { + "epoch": 0.03, + "learning_rate": 4.985368077038788e-05, + "loss": 1.0795, + "step": 34552 + }, + { + "epoch": 0.03, + "learning_rate": 4.985367228446854e-05, + "loss": 0.8949, + "step": 34553 + }, + { + "epoch": 0.03, + "learning_rate": 4.985366379830385e-05, + "loss": 0.8482, + "step": 34554 + }, + { + "epoch": 0.03, + "learning_rate": 4.985365531189382e-05, + "loss": 0.9408, + "step": 34555 + }, + { + "epoch": 0.03, + "learning_rate": 4.985364682523843e-05, + "loss": 0.9731, + "step": 34556 + }, + { + "epoch": 0.03, + "learning_rate": 4.985363833833771e-05, + "loss": 0.9034, + "step": 34557 + }, + { + "epoch": 0.03, + "learning_rate": 4.985362985119164e-05, + "loss": 0.9098, + "step": 34558 + }, + { + "epoch": 0.03, + "learning_rate": 4.985362136380023e-05, + "loss": 0.9689, + "step": 34559 + }, + { + "epoch": 0.03, + "learning_rate": 4.985361287616347e-05, + "loss": 1.0274, + "step": 34560 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853604388281364e-05, + "loss": 0.918, + "step": 34561 + }, + { + "epoch": 0.03, + "learning_rate": 4.985359590015392e-05, + "loss": 0.956, + "step": 34562 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853587411781135e-05, + "loss": 0.8151, + "step": 34563 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853578923162994e-05, + "loss": 0.9388, + "step": 34564 + }, + { + "epoch": 0.03, + "learning_rate": 4.985357043429951e-05, + "loss": 1.1012, + "step": 34565 + }, + { + "epoch": 0.03, + "learning_rate": 4.985356194519069e-05, + "loss": 0.8647, + "step": 34566 + }, + { + "epoch": 0.03, + "learning_rate": 4.985355345583652e-05, + "loss": 0.7398, + "step": 34567 + }, + { + "epoch": 0.03, + "learning_rate": 4.985354496623701e-05, + "loss": 1.0564, + "step": 34568 + }, + { + "epoch": 0.03, + "learning_rate": 4.985353647639216e-05, + "loss": 0.8831, + "step": 34569 + }, + { + "epoch": 0.03, + "learning_rate": 4.985352798630196e-05, + "loss": 1.0952, + "step": 34570 + }, + { + "epoch": 0.03, + "learning_rate": 4.985351949596642e-05, + "loss": 0.7677, + "step": 34571 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853511005385533e-05, + "loss": 0.5903, + "step": 34572 + }, + { + "epoch": 0.03, + "learning_rate": 4.98535025145593e-05, + "loss": 0.9774, + "step": 34573 + }, + { + "epoch": 0.03, + "learning_rate": 4.985349402348773e-05, + "loss": 0.7482, + "step": 34574 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853485532170816e-05, + "loss": 0.9648, + "step": 34575 + }, + { + "epoch": 0.03, + "learning_rate": 4.985347704060856e-05, + "loss": 0.8624, + "step": 34576 + }, + { + "epoch": 0.03, + "learning_rate": 4.985346854880096e-05, + "loss": 1.1364, + "step": 34577 + }, + { + "epoch": 0.03, + "learning_rate": 4.985346005674801e-05, + "loss": 1.2188, + "step": 34578 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853451564449724e-05, + "loss": 0.9729, + "step": 34579 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853443071906094e-05, + "loss": 0.869, + "step": 34580 + }, + { + "epoch": 0.03, + "learning_rate": 4.985343457911712e-05, + "loss": 1.0993, + "step": 34581 + }, + { + "epoch": 0.03, + "learning_rate": 4.985342608608281e-05, + "loss": 0.9205, + "step": 34582 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853417592803156e-05, + "loss": 0.8578, + "step": 34583 + }, + { + "epoch": 0.03, + "learning_rate": 4.985340909927815e-05, + "loss": 0.0849, + "step": 34584 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853400605507804e-05, + "loss": 0.0617, + "step": 34585 + }, + { + "epoch": 0.03, + "learning_rate": 4.985339211149212e-05, + "loss": 0.1339, + "step": 34586 + }, + { + "epoch": 0.03, + "learning_rate": 4.98533836172311e-05, + "loss": 0.139, + "step": 34587 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853375122724724e-05, + "loss": 0.0544, + "step": 34588 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853366627973015e-05, + "loss": 0.7058, + "step": 34589 + }, + { + "epoch": 0.03, + "learning_rate": 4.985335813297596e-05, + "loss": 0.9635, + "step": 34590 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853349637733566e-05, + "loss": 0.8457, + "step": 34591 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853341142245825e-05, + "loss": 1.3173, + "step": 34592 + }, + { + "epoch": 0.03, + "learning_rate": 4.985333264651275e-05, + "loss": 0.6016, + "step": 34593 + }, + { + "epoch": 0.03, + "learning_rate": 4.985332415053433e-05, + "loss": 0.5378, + "step": 34594 + }, + { + "epoch": 0.03, + "learning_rate": 4.985331565431056e-05, + "loss": 0.4166, + "step": 34595 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853307157841464e-05, + "loss": 0.8026, + "step": 34596 + }, + { + "epoch": 0.03, + "learning_rate": 4.985329866112702e-05, + "loss": 0.4371, + "step": 34597 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853290164167235e-05, + "loss": 0.2057, + "step": 34598 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853281666962105e-05, + "loss": 0.1412, + "step": 34599 + }, + { + "epoch": 0.03, + "learning_rate": 4.985327316951164e-05, + "loss": 0.2341, + "step": 34600 + }, + { + "epoch": 0.03, + "learning_rate": 4.985326467181583e-05, + "loss": 0.4327, + "step": 34601 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853256173874674e-05, + "loss": 0.2939, + "step": 34602 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853247675688185e-05, + "loss": 0.6735, + "step": 34603 + }, + { + "epoch": 0.03, + "learning_rate": 4.985323917725635e-05, + "loss": 0.5166, + "step": 34604 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853230678579176e-05, + "loss": 0.7071, + "step": 34605 + }, + { + "epoch": 0.03, + "learning_rate": 4.985322217965667e-05, + "loss": 0.3008, + "step": 34606 + }, + { + "epoch": 0.03, + "learning_rate": 4.985321368048881e-05, + "loss": 0.4482, + "step": 34607 + }, + { + "epoch": 0.03, + "learning_rate": 4.985320518107561e-05, + "loss": 0.5374, + "step": 34608 + }, + { + "epoch": 0.03, + "learning_rate": 4.985319668141708e-05, + "loss": 0.3154, + "step": 34609 + }, + { + "epoch": 0.03, + "learning_rate": 4.985318818151321e-05, + "loss": 1.0703, + "step": 34610 + }, + { + "epoch": 0.03, + "learning_rate": 4.985317968136399e-05, + "loss": 1.2456, + "step": 34611 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853171180969436e-05, + "loss": 0.9171, + "step": 34612 + }, + { + "epoch": 0.03, + "learning_rate": 4.985316268032954e-05, + "loss": 0.9166, + "step": 34613 + }, + { + "epoch": 0.03, + "learning_rate": 4.98531541794443e-05, + "loss": 0.8101, + "step": 34614 + }, + { + "epoch": 0.03, + "learning_rate": 4.985314567831373e-05, + "loss": 1.1228, + "step": 34615 + }, + { + "epoch": 0.03, + "learning_rate": 4.985313717693782e-05, + "loss": 0.921, + "step": 34616 + }, + { + "epoch": 0.03, + "learning_rate": 4.985312867531656e-05, + "loss": 1.251, + "step": 34617 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853120173449966e-05, + "loss": 1.3111, + "step": 34618 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853111671338035e-05, + "loss": 0.9955, + "step": 34619 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853103168980756e-05, + "loss": 1.1905, + "step": 34620 + }, + { + "epoch": 0.03, + "learning_rate": 4.985309466637814e-05, + "loss": 0.9228, + "step": 34621 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853086163530194e-05, + "loss": 1.2264, + "step": 34622 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853077660436904e-05, + "loss": 1.607, + "step": 34623 + }, + { + "epoch": 0.03, + "learning_rate": 4.985306915709827e-05, + "loss": 1.1561, + "step": 34624 + }, + { + "epoch": 0.03, + "learning_rate": 4.985306065351431e-05, + "loss": 0.6487, + "step": 34625 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853052149685e-05, + "loss": 0.8514, + "step": 34626 + }, + { + "epoch": 0.03, + "learning_rate": 4.9853043645610345e-05, + "loss": 0.8816, + "step": 34627 + }, + { + "epoch": 0.03, + "learning_rate": 4.985303514129036e-05, + "loss": 0.8512, + "step": 34628 + }, + { + "epoch": 0.03, + "learning_rate": 4.985302663672504e-05, + "loss": 0.829, + "step": 34629 + }, + { + "epoch": 0.03, + "learning_rate": 4.985301813191438e-05, + "loss": 1.0589, + "step": 34630 + }, + { + "epoch": 0.03, + "learning_rate": 4.985300962685837e-05, + "loss": 0.5872, + "step": 34631 + }, + { + "epoch": 0.03, + "learning_rate": 4.985300112155703e-05, + "loss": 0.9229, + "step": 34632 + }, + { + "epoch": 0.03, + "learning_rate": 4.985299261601035e-05, + "loss": 1.1925, + "step": 34633 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852984110218334e-05, + "loss": 0.9117, + "step": 34634 + }, + { + "epoch": 0.03, + "learning_rate": 4.985297560418098e-05, + "loss": 0.572, + "step": 34635 + }, + { + "epoch": 0.03, + "learning_rate": 4.985296709789829e-05, + "loss": 1.1107, + "step": 34636 + }, + { + "epoch": 0.03, + "learning_rate": 4.985295859137026e-05, + "loss": 1.0939, + "step": 34637 + }, + { + "epoch": 0.03, + "learning_rate": 4.985295008459689e-05, + "loss": 1.1331, + "step": 34638 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852941577578184e-05, + "loss": 1.0943, + "step": 34639 + }, + { + "epoch": 0.03, + "learning_rate": 4.985293307031413e-05, + "loss": 0.4492, + "step": 34640 + }, + { + "epoch": 0.03, + "learning_rate": 4.985292456280475e-05, + "loss": 0.4913, + "step": 34641 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852916055050035e-05, + "loss": 1.2316, + "step": 34642 + }, + { + "epoch": 0.03, + "learning_rate": 4.985290754704998e-05, + "loss": 1.1206, + "step": 34643 + }, + { + "epoch": 0.03, + "learning_rate": 4.985289903880458e-05, + "loss": 0.8769, + "step": 34644 + }, + { + "epoch": 0.03, + "learning_rate": 4.985289053031385e-05, + "loss": 1.2616, + "step": 34645 + }, + { + "epoch": 0.03, + "learning_rate": 4.985288202157779e-05, + "loss": 0.9927, + "step": 34646 + }, + { + "epoch": 0.03, + "learning_rate": 4.985287351259638e-05, + "loss": 0.8877, + "step": 34647 + }, + { + "epoch": 0.03, + "learning_rate": 4.985286500336963e-05, + "loss": 0.8737, + "step": 34648 + }, + { + "epoch": 0.03, + "learning_rate": 4.985285649389755e-05, + "loss": 0.8282, + "step": 34649 + }, + { + "epoch": 0.03, + "learning_rate": 4.985284798418013e-05, + "loss": 0.7544, + "step": 34650 + }, + { + "epoch": 0.03, + "learning_rate": 4.985283947421738e-05, + "loss": 0.7669, + "step": 34651 + }, + { + "epoch": 0.03, + "learning_rate": 4.985283096400929e-05, + "loss": 0.8811, + "step": 34652 + }, + { + "epoch": 0.03, + "learning_rate": 4.985282245355586e-05, + "loss": 1.6391, + "step": 34653 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852813942857094e-05, + "loss": 1.6844, + "step": 34654 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852805431912994e-05, + "loss": 1.0814, + "step": 34655 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852796920723554e-05, + "loss": 1.0355, + "step": 34656 + }, + { + "epoch": 0.03, + "learning_rate": 4.985278840928878e-05, + "loss": 0.718, + "step": 34657 + }, + { + "epoch": 0.03, + "learning_rate": 4.985277989760867e-05, + "loss": 0.677, + "step": 34658 + }, + { + "epoch": 0.03, + "learning_rate": 4.985277138568323e-05, + "loss": 1.1842, + "step": 34659 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852762873512445e-05, + "loss": 1.1563, + "step": 34660 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852754361096324e-05, + "loss": 1.064, + "step": 34661 + }, + { + "epoch": 0.03, + "learning_rate": 4.985274584843487e-05, + "loss": 0.9928, + "step": 34662 + }, + { + "epoch": 0.03, + "learning_rate": 4.985273733552808e-05, + "loss": 0.844, + "step": 34663 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852728822375955e-05, + "loss": 0.896, + "step": 34664 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852720308978496e-05, + "loss": 1.0529, + "step": 34665 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852711795335695e-05, + "loss": 0.6596, + "step": 34666 + }, + { + "epoch": 0.03, + "learning_rate": 4.985270328144757e-05, + "loss": 0.7632, + "step": 34667 + }, + { + "epoch": 0.03, + "learning_rate": 4.98526947673141e-05, + "loss": 0.4963, + "step": 34668 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852686252935286e-05, + "loss": 0.5454, + "step": 34669 + }, + { + "epoch": 0.03, + "learning_rate": 4.985267773831115e-05, + "loss": 0.9525, + "step": 34670 + }, + { + "epoch": 0.03, + "learning_rate": 4.985266922344168e-05, + "loss": 1.1025, + "step": 34671 + }, + { + "epoch": 0.03, + "learning_rate": 4.985266070832687e-05, + "loss": 0.593, + "step": 34672 + }, + { + "epoch": 0.03, + "learning_rate": 4.985265219296672e-05, + "loss": 0.8715, + "step": 34673 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852643677361245e-05, + "loss": 1.0766, + "step": 34674 + }, + { + "epoch": 0.03, + "learning_rate": 4.985263516151043e-05, + "loss": 1.0143, + "step": 34675 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852626645414287e-05, + "loss": 1.2449, + "step": 34676 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852618129072805e-05, + "loss": 0.9316, + "step": 34677 + }, + { + "epoch": 0.03, + "learning_rate": 4.985260961248598e-05, + "loss": 1.14, + "step": 34678 + }, + { + "epoch": 0.03, + "learning_rate": 4.985260109565383e-05, + "loss": 1.0306, + "step": 34679 + }, + { + "epoch": 0.03, + "learning_rate": 4.985259257857635e-05, + "loss": 0.8377, + "step": 34680 + }, + { + "epoch": 0.03, + "learning_rate": 4.985258406125353e-05, + "loss": 0.5928, + "step": 34681 + }, + { + "epoch": 0.03, + "learning_rate": 4.985257554368537e-05, + "loss": 1.1061, + "step": 34682 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852567025871884e-05, + "loss": 0.8376, + "step": 34683 + }, + { + "epoch": 0.03, + "learning_rate": 4.985255850781306e-05, + "loss": 1.5607, + "step": 34684 + }, + { + "epoch": 0.03, + "learning_rate": 4.98525499895089e-05, + "loss": 1.0265, + "step": 34685 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852541470959415e-05, + "loss": 1.1198, + "step": 34686 + }, + { + "epoch": 0.03, + "learning_rate": 4.985253295216459e-05, + "loss": 0.885, + "step": 34687 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852524433124426e-05, + "loss": 1.0302, + "step": 34688 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852515913838936e-05, + "loss": 1.0395, + "step": 34689 + }, + { + "epoch": 0.03, + "learning_rate": 4.985250739430811e-05, + "loss": 0.8733, + "step": 34690 + }, + { + "epoch": 0.03, + "learning_rate": 4.985249887453195e-05, + "loss": 0.8179, + "step": 34691 + }, + { + "epoch": 0.03, + "learning_rate": 4.985249035451046e-05, + "loss": 0.9458, + "step": 34692 + }, + { + "epoch": 0.03, + "learning_rate": 4.985248183424363e-05, + "loss": 1.7071, + "step": 34693 + }, + { + "epoch": 0.03, + "learning_rate": 4.985247331373147e-05, + "loss": 1.3194, + "step": 34694 + }, + { + "epoch": 0.03, + "learning_rate": 4.985246479297398e-05, + "loss": 1.3808, + "step": 34695 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852456271971157e-05, + "loss": 0.988, + "step": 34696 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852447750722996e-05, + "loss": 1.0318, + "step": 34697 + }, + { + "epoch": 0.03, + "learning_rate": 4.98524392292295e-05, + "loss": 1.1555, + "step": 34698 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852430707490686e-05, + "loss": 0.8348, + "step": 34699 + }, + { + "epoch": 0.03, + "learning_rate": 4.985242218550652e-05, + "loss": 0.866, + "step": 34700 + }, + { + "epoch": 0.03, + "learning_rate": 4.985241366327703e-05, + "loss": 0.9565, + "step": 34701 + }, + { + "epoch": 0.03, + "learning_rate": 4.985240514080221e-05, + "loss": 1.0578, + "step": 34702 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852396618082055e-05, + "loss": 0.8996, + "step": 34703 + }, + { + "epoch": 0.03, + "learning_rate": 4.985238809511657e-05, + "loss": 0.9334, + "step": 34704 + }, + { + "epoch": 0.03, + "learning_rate": 4.985237957190575e-05, + "loss": 1.0315, + "step": 34705 + }, + { + "epoch": 0.03, + "learning_rate": 4.98523710484496e-05, + "loss": 1.0671, + "step": 34706 + }, + { + "epoch": 0.03, + "learning_rate": 4.985236252474812e-05, + "loss": 1.1546, + "step": 34707 + }, + { + "epoch": 0.03, + "learning_rate": 4.98523540008013e-05, + "loss": 1.0382, + "step": 34708 + }, + { + "epoch": 0.03, + "learning_rate": 4.985234547660915e-05, + "loss": 0.956, + "step": 34709 + }, + { + "epoch": 0.03, + "learning_rate": 4.985233695217167e-05, + "loss": 1.1609, + "step": 34710 + }, + { + "epoch": 0.03, + "learning_rate": 4.985232842748886e-05, + "loss": 0.9149, + "step": 34711 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852319902560716e-05, + "loss": 0.9112, + "step": 34712 + }, + { + "epoch": 0.03, + "learning_rate": 4.985231137738724e-05, + "loss": 1.2342, + "step": 34713 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852302851968435e-05, + "loss": 0.8864, + "step": 34714 + }, + { + "epoch": 0.03, + "learning_rate": 4.98522943263043e-05, + "loss": 1.1842, + "step": 34715 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852285800394835e-05, + "loss": 0.9632, + "step": 34716 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852277274240036e-05, + "loss": 0.6628, + "step": 34717 + }, + { + "epoch": 0.03, + "learning_rate": 4.98522687478399e-05, + "loss": 1.0577, + "step": 34718 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852260221194436e-05, + "loss": 0.8766, + "step": 34719 + }, + { + "epoch": 0.03, + "learning_rate": 4.985225169430364e-05, + "loss": 0.8547, + "step": 34720 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852243167167523e-05, + "loss": 0.8861, + "step": 34721 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852234639786065e-05, + "loss": 1.1018, + "step": 34722 + }, + { + "epoch": 0.03, + "learning_rate": 4.985222611215928e-05, + "loss": 0.6773, + "step": 34723 + }, + { + "epoch": 0.03, + "learning_rate": 4.985221758428716e-05, + "loss": 0.8537, + "step": 34724 + }, + { + "epoch": 0.03, + "learning_rate": 4.985220905616971e-05, + "loss": 1.0186, + "step": 34725 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852200527806934e-05, + "loss": 1.0239, + "step": 34726 + }, + { + "epoch": 0.03, + "learning_rate": 4.985219199919883e-05, + "loss": 0.6506, + "step": 34727 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852183470345385e-05, + "loss": 0.6663, + "step": 34728 + }, + { + "epoch": 0.03, + "learning_rate": 4.985217494124662e-05, + "loss": 0.658, + "step": 34729 + }, + { + "epoch": 0.03, + "learning_rate": 4.985216641190252e-05, + "loss": 0.5966, + "step": 34730 + }, + { + "epoch": 0.03, + "learning_rate": 4.985215788231309e-05, + "loss": 0.5591, + "step": 34731 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852149352478325e-05, + "loss": 0.5731, + "step": 34732 + }, + { + "epoch": 0.03, + "learning_rate": 4.985214082239824e-05, + "loss": 0.6202, + "step": 34733 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852132292072814e-05, + "loss": 1.2049, + "step": 34734 + }, + { + "epoch": 0.03, + "learning_rate": 4.985212376150207e-05, + "loss": 1.111, + "step": 34735 + }, + { + "epoch": 0.03, + "learning_rate": 4.985211523068599e-05, + "loss": 1.1131, + "step": 34736 + }, + { + "epoch": 0.03, + "learning_rate": 4.985210669962458e-05, + "loss": 0.8994, + "step": 34737 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852098168317845e-05, + "loss": 1.1207, + "step": 34738 + }, + { + "epoch": 0.03, + "learning_rate": 4.985208963676577e-05, + "loss": 0.9284, + "step": 34739 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852081104968387e-05, + "loss": 0.4906, + "step": 34740 + }, + { + "epoch": 0.03, + "learning_rate": 4.985207257292566e-05, + "loss": 0.7293, + "step": 34741 + }, + { + "epoch": 0.03, + "learning_rate": 4.98520640406376e-05, + "loss": 1.0558, + "step": 34742 + }, + { + "epoch": 0.03, + "learning_rate": 4.985205550810421e-05, + "loss": 0.5153, + "step": 34743 + }, + { + "epoch": 0.03, + "learning_rate": 4.985204697532551e-05, + "loss": 0.6357, + "step": 34744 + }, + { + "epoch": 0.03, + "learning_rate": 4.985203844230146e-05, + "loss": 0.705, + "step": 34745 + }, + { + "epoch": 0.03, + "learning_rate": 4.985202990903209e-05, + "loss": 0.6337, + "step": 34746 + }, + { + "epoch": 0.03, + "learning_rate": 4.985202137551739e-05, + "loss": 1.3914, + "step": 34747 + }, + { + "epoch": 0.03, + "learning_rate": 4.985201284175736e-05, + "loss": 1.2433, + "step": 34748 + }, + { + "epoch": 0.03, + "learning_rate": 4.985200430775201e-05, + "loss": 1.3301, + "step": 34749 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851995773501315e-05, + "loss": 1.0067, + "step": 34750 + }, + { + "epoch": 0.03, + "learning_rate": 4.98519872390053e-05, + "loss": 0.9287, + "step": 34751 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851978704263955e-05, + "loss": 0.7264, + "step": 34752 + }, + { + "epoch": 0.03, + "learning_rate": 4.985197016927729e-05, + "loss": 1.1368, + "step": 34753 + }, + { + "epoch": 0.03, + "learning_rate": 4.985196163404529e-05, + "loss": 0.9996, + "step": 34754 + }, + { + "epoch": 0.03, + "learning_rate": 4.985195309856796e-05, + "loss": 0.9146, + "step": 34755 + }, + { + "epoch": 0.03, + "learning_rate": 4.985194456284531e-05, + "loss": 1.0284, + "step": 34756 + }, + { + "epoch": 0.03, + "learning_rate": 4.985193602687732e-05, + "loss": 1.0017, + "step": 34757 + }, + { + "epoch": 0.03, + "learning_rate": 4.985192749066401e-05, + "loss": 1.0083, + "step": 34758 + }, + { + "epoch": 0.03, + "learning_rate": 4.985191895420538e-05, + "loss": 1.1071, + "step": 34759 + }, + { + "epoch": 0.03, + "learning_rate": 4.985191041750141e-05, + "loss": 1.1126, + "step": 34760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851901880552113e-05, + "loss": 0.7936, + "step": 34761 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851893343357494e-05, + "loss": 1.0472, + "step": 34762 + }, + { + "epoch": 0.03, + "learning_rate": 4.985188480591755e-05, + "loss": 0.8933, + "step": 34763 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851876268232265e-05, + "loss": 1.115, + "step": 34764 + }, + { + "epoch": 0.03, + "learning_rate": 4.985186773030166e-05, + "loss": 1.1586, + "step": 34765 + }, + { + "epoch": 0.03, + "learning_rate": 4.985185919212574e-05, + "loss": 1.1961, + "step": 34766 + }, + { + "epoch": 0.03, + "learning_rate": 4.985185065370448e-05, + "loss": 1.0175, + "step": 34767 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851842115037893e-05, + "loss": 1.0945, + "step": 34768 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851833576125986e-05, + "loss": 0.9411, + "step": 34769 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851825036968744e-05, + "loss": 0.9475, + "step": 34770 + }, + { + "epoch": 0.03, + "learning_rate": 4.985181649756618e-05, + "loss": 1.1305, + "step": 34771 + }, + { + "epoch": 0.03, + "learning_rate": 4.985180795791828e-05, + "loss": 0.6549, + "step": 34772 + }, + { + "epoch": 0.03, + "learning_rate": 4.985179941802507e-05, + "loss": 0.7362, + "step": 34773 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851790877886524e-05, + "loss": 0.8679, + "step": 34774 + }, + { + "epoch": 0.03, + "learning_rate": 4.985178233750265e-05, + "loss": 0.6782, + "step": 34775 + }, + { + "epoch": 0.03, + "learning_rate": 4.985177379687346e-05, + "loss": 0.88, + "step": 34776 + }, + { + "epoch": 0.03, + "learning_rate": 4.985176525599893e-05, + "loss": 1.2806, + "step": 34777 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851756714879086e-05, + "loss": 1.136, + "step": 34778 + }, + { + "epoch": 0.03, + "learning_rate": 4.98517481735139e-05, + "loss": 1.0058, + "step": 34779 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851739631903407e-05, + "loss": 1.0606, + "step": 34780 + }, + { + "epoch": 0.03, + "learning_rate": 4.985173109004758e-05, + "loss": 1.1788, + "step": 34781 + }, + { + "epoch": 0.03, + "learning_rate": 4.985172254794642e-05, + "loss": 1.0498, + "step": 34782 + }, + { + "epoch": 0.03, + "learning_rate": 4.985171400559994e-05, + "loss": 0.9889, + "step": 34783 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851705463008134e-05, + "loss": 0.8664, + "step": 34784 + }, + { + "epoch": 0.03, + "learning_rate": 4.985169692017101e-05, + "loss": 1.1718, + "step": 34785 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851688377088554e-05, + "loss": 1.0763, + "step": 34786 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851679833760766e-05, + "loss": 1.133, + "step": 34787 + }, + { + "epoch": 0.03, + "learning_rate": 4.985167129018766e-05, + "loss": 1.164, + "step": 34788 + }, + { + "epoch": 0.03, + "learning_rate": 4.985166274636923e-05, + "loss": 1.1523, + "step": 34789 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851654202305474e-05, + "loss": 1.1769, + "step": 34790 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851645657996395e-05, + "loss": 1.2291, + "step": 34791 + }, + { + "epoch": 0.03, + "learning_rate": 4.985163711344199e-05, + "loss": 1.3421, + "step": 34792 + }, + { + "epoch": 0.03, + "learning_rate": 4.985162856864225e-05, + "loss": 0.7721, + "step": 34793 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851620023597196e-05, + "loss": 1.0975, + "step": 34794 + }, + { + "epoch": 0.03, + "learning_rate": 4.985161147830681e-05, + "loss": 1.2812, + "step": 34795 + }, + { + "epoch": 0.03, + "learning_rate": 4.985160293277111e-05, + "loss": 0.9846, + "step": 34796 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851594386990074e-05, + "loss": 1.1827, + "step": 34797 + }, + { + "epoch": 0.03, + "learning_rate": 4.985158584096372e-05, + "loss": 1.0064, + "step": 34798 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851577294692045e-05, + "loss": 1.2609, + "step": 34799 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851568748175035e-05, + "loss": 1.0856, + "step": 34800 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851560201412705e-05, + "loss": 1.105, + "step": 34801 + }, + { + "epoch": 0.03, + "learning_rate": 4.985155165440506e-05, + "loss": 0.4654, + "step": 34802 + }, + { + "epoch": 0.03, + "learning_rate": 4.985154310715208e-05, + "loss": 0.4823, + "step": 34803 + }, + { + "epoch": 0.03, + "learning_rate": 4.985153455965378e-05, + "loss": 0.5348, + "step": 34804 + }, + { + "epoch": 0.03, + "learning_rate": 4.985152601191016e-05, + "loss": 0.4409, + "step": 34805 + }, + { + "epoch": 0.03, + "learning_rate": 4.985151746392121e-05, + "loss": 0.4025, + "step": 34806 + }, + { + "epoch": 0.03, + "learning_rate": 4.985150891568694e-05, + "loss": 0.2957, + "step": 34807 + }, + { + "epoch": 0.03, + "learning_rate": 4.985150036720734e-05, + "loss": 0.6545, + "step": 34808 + }, + { + "epoch": 0.03, + "learning_rate": 4.985149181848242e-05, + "loss": 0.7997, + "step": 34809 + }, + { + "epoch": 0.03, + "learning_rate": 4.985148326951218e-05, + "loss": 1.0515, + "step": 34810 + }, + { + "epoch": 0.03, + "learning_rate": 4.985147472029661e-05, + "loss": 0.6765, + "step": 34811 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851466170835726e-05, + "loss": 1.0246, + "step": 34812 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851457621129515e-05, + "loss": 0.988, + "step": 34813 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851449071177984e-05, + "loss": 0.9399, + "step": 34814 + }, + { + "epoch": 0.03, + "learning_rate": 4.985144052098112e-05, + "loss": 1.2341, + "step": 34815 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851431970538936e-05, + "loss": 1.1837, + "step": 34816 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851423419851435e-05, + "loss": 1.0106, + "step": 34817 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851414868918605e-05, + "loss": 1.0361, + "step": 34818 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851406317740454e-05, + "loss": 1.0299, + "step": 34819 + }, + { + "epoch": 0.03, + "learning_rate": 4.985139776631699e-05, + "loss": 0.8997, + "step": 34820 + }, + { + "epoch": 0.03, + "learning_rate": 4.985138921464819e-05, + "loss": 0.8869, + "step": 34821 + }, + { + "epoch": 0.03, + "learning_rate": 4.985138066273407e-05, + "loss": 0.6425, + "step": 34822 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851372110574634e-05, + "loss": 1.0946, + "step": 34823 + }, + { + "epoch": 0.03, + "learning_rate": 4.985136355816987e-05, + "loss": 1.5798, + "step": 34824 + }, + { + "epoch": 0.03, + "learning_rate": 4.985135500551979e-05, + "loss": 1.4891, + "step": 34825 + }, + { + "epoch": 0.03, + "learning_rate": 4.985134645262438e-05, + "loss": 1.1264, + "step": 34826 + }, + { + "epoch": 0.03, + "learning_rate": 4.985133789948365e-05, + "loss": 1.3564, + "step": 34827 + }, + { + "epoch": 0.03, + "learning_rate": 4.98513293460976e-05, + "loss": 1.2601, + "step": 34828 + }, + { + "epoch": 0.03, + "learning_rate": 4.985132079246623e-05, + "loss": 1.0127, + "step": 34829 + }, + { + "epoch": 0.03, + "learning_rate": 4.985131223858954e-05, + "loss": 1.068, + "step": 34830 + }, + { + "epoch": 0.03, + "learning_rate": 4.985130368446752e-05, + "loss": 0.8386, + "step": 34831 + }, + { + "epoch": 0.03, + "learning_rate": 4.985129513010018e-05, + "loss": 1.0739, + "step": 34832 + }, + { + "epoch": 0.03, + "learning_rate": 4.985128657548753e-05, + "loss": 1.3338, + "step": 34833 + }, + { + "epoch": 0.03, + "learning_rate": 4.985127802062954e-05, + "loss": 1.1487, + "step": 34834 + }, + { + "epoch": 0.03, + "learning_rate": 4.985126946552624e-05, + "loss": 1.0874, + "step": 34835 + }, + { + "epoch": 0.03, + "learning_rate": 4.985126091017762e-05, + "loss": 1.0312, + "step": 34836 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851252354583676e-05, + "loss": 1.1719, + "step": 34837 + }, + { + "epoch": 0.03, + "learning_rate": 4.985124379874441e-05, + "loss": 0.8483, + "step": 34838 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851235242659825e-05, + "loss": 1.0129, + "step": 34839 + }, + { + "epoch": 0.03, + "learning_rate": 4.985122668632992e-05, + "loss": 0.9348, + "step": 34840 + }, + { + "epoch": 0.03, + "learning_rate": 4.98512181297547e-05, + "loss": 1.0997, + "step": 34841 + }, + { + "epoch": 0.03, + "learning_rate": 4.985120957293414e-05, + "loss": 1.149, + "step": 34842 + }, + { + "epoch": 0.03, + "learning_rate": 4.985120101586828e-05, + "loss": 0.9304, + "step": 34843 + }, + { + "epoch": 0.03, + "learning_rate": 4.985119245855708e-05, + "loss": 1.1397, + "step": 34844 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851183901000576e-05, + "loss": 0.8995, + "step": 34845 + }, + { + "epoch": 0.03, + "learning_rate": 4.985117534319874e-05, + "loss": 0.8667, + "step": 34846 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851166785151595e-05, + "loss": 0.9021, + "step": 34847 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851158226859126e-05, + "loss": 1.204, + "step": 34848 + }, + { + "epoch": 0.03, + "learning_rate": 4.985114966832133e-05, + "loss": 1.2776, + "step": 34849 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851141109538226e-05, + "loss": 0.9706, + "step": 34850 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851132550509794e-05, + "loss": 1.1576, + "step": 34851 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851123991236035e-05, + "loss": 1.0305, + "step": 34852 + }, + { + "epoch": 0.03, + "learning_rate": 4.985111543171697e-05, + "loss": 0.9365, + "step": 34853 + }, + { + "epoch": 0.03, + "learning_rate": 4.985110687195258e-05, + "loss": 1.1005, + "step": 34854 + }, + { + "epoch": 0.03, + "learning_rate": 4.985109831194287e-05, + "loss": 1.0361, + "step": 34855 + }, + { + "epoch": 0.03, + "learning_rate": 4.985108975168783e-05, + "loss": 1.033, + "step": 34856 + }, + { + "epoch": 0.03, + "learning_rate": 4.985108119118749e-05, + "loss": 1.1448, + "step": 34857 + }, + { + "epoch": 0.03, + "learning_rate": 4.985107263044182e-05, + "loss": 1.001, + "step": 34858 + }, + { + "epoch": 0.03, + "learning_rate": 4.985106406945082e-05, + "loss": 0.8395, + "step": 34859 + }, + { + "epoch": 0.03, + "learning_rate": 4.985105550821452e-05, + "loss": 0.8597, + "step": 34860 + }, + { + "epoch": 0.03, + "learning_rate": 4.985104694673289e-05, + "loss": 0.9253, + "step": 34861 + }, + { + "epoch": 0.03, + "learning_rate": 4.985103838500595e-05, + "loss": 0.9248, + "step": 34862 + }, + { + "epoch": 0.03, + "learning_rate": 4.9851029823033685e-05, + "loss": 0.9853, + "step": 34863 + }, + { + "epoch": 0.03, + "learning_rate": 4.98510212608161e-05, + "loss": 0.9644, + "step": 34864 + }, + { + "epoch": 0.03, + "learning_rate": 4.98510126983532e-05, + "loss": 1.1919, + "step": 34865 + }, + { + "epoch": 0.03, + "learning_rate": 4.985100413564497e-05, + "loss": 1.2099, + "step": 34866 + }, + { + "epoch": 0.03, + "learning_rate": 4.985099557269144e-05, + "loss": 0.977, + "step": 34867 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850987009492575e-05, + "loss": 0.8892, + "step": 34868 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850978446048405e-05, + "loss": 1.0147, + "step": 34869 + }, + { + "epoch": 0.03, + "learning_rate": 4.985096988235891e-05, + "loss": 1.0968, + "step": 34870 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850961318424094e-05, + "loss": 0.6623, + "step": 34871 + }, + { + "epoch": 0.03, + "learning_rate": 4.985095275424396e-05, + "loss": 0.5332, + "step": 34872 + }, + { + "epoch": 0.03, + "learning_rate": 4.985094418981852e-05, + "loss": 1.1229, + "step": 34873 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850935625147744e-05, + "loss": 1.1852, + "step": 34874 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850927060231654e-05, + "loss": 1.0084, + "step": 34875 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850918495070256e-05, + "loss": 1.295, + "step": 34876 + }, + { + "epoch": 0.03, + "learning_rate": 4.985090992966354e-05, + "loss": 1.4525, + "step": 34877 + }, + { + "epoch": 0.03, + "learning_rate": 4.985090136401149e-05, + "loss": 1.0423, + "step": 34878 + }, + { + "epoch": 0.03, + "learning_rate": 4.985089279811414e-05, + "loss": 1.1942, + "step": 34879 + }, + { + "epoch": 0.03, + "learning_rate": 4.985088423197146e-05, + "loss": 0.7309, + "step": 34880 + }, + { + "epoch": 0.03, + "learning_rate": 4.985087566558347e-05, + "loss": 0.8905, + "step": 34881 + }, + { + "epoch": 0.03, + "learning_rate": 4.985086709895016e-05, + "loss": 0.9756, + "step": 34882 + }, + { + "epoch": 0.03, + "learning_rate": 4.985085853207154e-05, + "loss": 0.5623, + "step": 34883 + }, + { + "epoch": 0.03, + "learning_rate": 4.985084996494759e-05, + "loss": 0.432, + "step": 34884 + }, + { + "epoch": 0.03, + "learning_rate": 4.985084139757833e-05, + "loss": 1.103, + "step": 34885 + }, + { + "epoch": 0.03, + "learning_rate": 4.985083282996376e-05, + "loss": 1.1797, + "step": 34886 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850824262103855e-05, + "loss": 1.31, + "step": 34887 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850815693998646e-05, + "loss": 0.8807, + "step": 34888 + }, + { + "epoch": 0.03, + "learning_rate": 4.985080712564812e-05, + "loss": 0.9781, + "step": 34889 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850798557052277e-05, + "loss": 1.2669, + "step": 34890 + }, + { + "epoch": 0.03, + "learning_rate": 4.985078998821111e-05, + "loss": 1.0026, + "step": 34891 + }, + { + "epoch": 0.03, + "learning_rate": 4.985078141912464e-05, + "loss": 1.3281, + "step": 34892 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850772849792835e-05, + "loss": 1.225, + "step": 34893 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850764280215726e-05, + "loss": 0.3505, + "step": 34894 + }, + { + "epoch": 0.03, + "learning_rate": 4.98507557103933e-05, + "loss": 0.9193, + "step": 34895 + }, + { + "epoch": 0.03, + "learning_rate": 4.985074714032556e-05, + "loss": 1.1438, + "step": 34896 + }, + { + "epoch": 0.03, + "learning_rate": 4.98507385700125e-05, + "loss": 1.195, + "step": 34897 + }, + { + "epoch": 0.03, + "learning_rate": 4.985072999945413e-05, + "loss": 1.3047, + "step": 34898 + }, + { + "epoch": 0.03, + "learning_rate": 4.985072142865043e-05, + "loss": 1.1137, + "step": 34899 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850712857601425e-05, + "loss": 1.1797, + "step": 34900 + }, + { + "epoch": 0.03, + "learning_rate": 4.98507042863071e-05, + "loss": 1.086, + "step": 34901 + }, + { + "epoch": 0.03, + "learning_rate": 4.985069571476746e-05, + "loss": 0.9847, + "step": 34902 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850687142982514e-05, + "loss": 0.6972, + "step": 34903 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850678570952235e-05, + "loss": 0.8975, + "step": 34904 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850669998676655e-05, + "loss": 0.8192, + "step": 34905 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850661426155754e-05, + "loss": 1.0229, + "step": 34906 + }, + { + "epoch": 0.03, + "learning_rate": 4.985065285338953e-05, + "loss": 0.7504, + "step": 34907 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850644280378e-05, + "loss": 0.4522, + "step": 34908 + }, + { + "epoch": 0.03, + "learning_rate": 4.985063570712116e-05, + "loss": 0.4744, + "step": 34909 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850627133619e-05, + "loss": 0.6788, + "step": 34910 + }, + { + "epoch": 0.03, + "learning_rate": 4.985061855987152e-05, + "loss": 0.8266, + "step": 34911 + }, + { + "epoch": 0.03, + "learning_rate": 4.985060998587873e-05, + "loss": 0.6279, + "step": 34912 + }, + { + "epoch": 0.03, + "learning_rate": 4.985060141164062e-05, + "loss": 0.6941, + "step": 34913 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850592837157205e-05, + "loss": 1.2494, + "step": 34914 + }, + { + "epoch": 0.03, + "learning_rate": 4.985058426242847e-05, + "loss": 1.0798, + "step": 34915 + }, + { + "epoch": 0.03, + "learning_rate": 4.985057568745441e-05, + "loss": 1.0183, + "step": 34916 + }, + { + "epoch": 0.03, + "learning_rate": 4.985056711223506e-05, + "loss": 1.0499, + "step": 34917 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850558536770374e-05, + "loss": 1.0155, + "step": 34918 + }, + { + "epoch": 0.03, + "learning_rate": 4.985054996106038e-05, + "loss": 1.4859, + "step": 34919 + }, + { + "epoch": 0.03, + "learning_rate": 4.985054138510507e-05, + "loss": 1.0159, + "step": 34920 + }, + { + "epoch": 0.03, + "learning_rate": 4.985053280890446e-05, + "loss": 1.2786, + "step": 34921 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850524232458515e-05, + "loss": 1.3723, + "step": 34922 + }, + { + "epoch": 0.03, + "learning_rate": 4.985051565576727e-05, + "loss": 1.004, + "step": 34923 + }, + { + "epoch": 0.03, + "learning_rate": 4.985050707883071e-05, + "loss": 0.7014, + "step": 34924 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850498501648833e-05, + "loss": 0.3208, + "step": 34925 + }, + { + "epoch": 0.03, + "learning_rate": 4.985048992422165e-05, + "loss": 0.676, + "step": 34926 + }, + { + "epoch": 0.03, + "learning_rate": 4.985048134654914e-05, + "loss": 0.9864, + "step": 34927 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850472768631323e-05, + "loss": 1.1744, + "step": 34928 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850464190468194e-05, + "loss": 0.8535, + "step": 34929 + }, + { + "epoch": 0.03, + "learning_rate": 4.985045561205975e-05, + "loss": 0.8951, + "step": 34930 + }, + { + "epoch": 0.03, + "learning_rate": 4.985044703340599e-05, + "loss": 1.0114, + "step": 34931 + }, + { + "epoch": 0.03, + "learning_rate": 4.985043845450692e-05, + "loss": 1.0993, + "step": 34932 + }, + { + "epoch": 0.03, + "learning_rate": 4.985042987536254e-05, + "loss": 1.1287, + "step": 34933 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850421295972845e-05, + "loss": 1.3724, + "step": 34934 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850412716337836e-05, + "loss": 1.2491, + "step": 34935 + }, + { + "epoch": 0.03, + "learning_rate": 4.985040413645751e-05, + "loss": 0.9716, + "step": 34936 + }, + { + "epoch": 0.03, + "learning_rate": 4.985039555633188e-05, + "loss": 1.2134, + "step": 34937 + }, + { + "epoch": 0.03, + "learning_rate": 4.985038697596093e-05, + "loss": 1.1063, + "step": 34938 + }, + { + "epoch": 0.03, + "learning_rate": 4.985037839534468e-05, + "loss": 1.1697, + "step": 34939 + }, + { + "epoch": 0.03, + "learning_rate": 4.98503698144831e-05, + "loss": 1.1417, + "step": 34940 + }, + { + "epoch": 0.03, + "learning_rate": 4.985036123337622e-05, + "loss": 0.7838, + "step": 34941 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850352652024016e-05, + "loss": 1.0432, + "step": 34942 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850344070426514e-05, + "loss": 1.0318, + "step": 34943 + }, + { + "epoch": 0.03, + "learning_rate": 4.985033548858369e-05, + "loss": 0.9542, + "step": 34944 + }, + { + "epoch": 0.03, + "learning_rate": 4.985032690649556e-05, + "loss": 1.0233, + "step": 34945 + }, + { + "epoch": 0.03, + "learning_rate": 4.985031832416211e-05, + "loss": 1.1256, + "step": 34946 + }, + { + "epoch": 0.03, + "learning_rate": 4.985030974158335e-05, + "loss": 1.2297, + "step": 34947 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850301158759286e-05, + "loss": 1.1273, + "step": 34948 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850292575689904e-05, + "loss": 0.9187, + "step": 34949 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850283992375215e-05, + "loss": 0.8696, + "step": 34950 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850275408815205e-05, + "loss": 0.8027, + "step": 34951 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850266825009894e-05, + "loss": 1.0676, + "step": 34952 + }, + { + "epoch": 0.03, + "learning_rate": 4.985025824095927e-05, + "loss": 1.3017, + "step": 34953 + }, + { + "epoch": 0.03, + "learning_rate": 4.985024965666333e-05, + "loss": 1.2091, + "step": 34954 + }, + { + "epoch": 0.03, + "learning_rate": 4.985024107212208e-05, + "loss": 1.1124, + "step": 34955 + }, + { + "epoch": 0.03, + "learning_rate": 4.985023248733553e-05, + "loss": 1.156, + "step": 34956 + }, + { + "epoch": 0.03, + "learning_rate": 4.985022390230365e-05, + "loss": 1.3985, + "step": 34957 + }, + { + "epoch": 0.03, + "learning_rate": 4.985021531702647e-05, + "loss": 1.1125, + "step": 34958 + }, + { + "epoch": 0.03, + "learning_rate": 4.985020673150398e-05, + "loss": 0.951, + "step": 34959 + }, + { + "epoch": 0.03, + "learning_rate": 4.985019814573617e-05, + "loss": 1.0205, + "step": 34960 + }, + { + "epoch": 0.03, + "learning_rate": 4.985018955972306e-05, + "loss": 1.1025, + "step": 34961 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850180973464634e-05, + "loss": 0.8249, + "step": 34962 + }, + { + "epoch": 0.03, + "learning_rate": 4.98501723869609e-05, + "loss": 0.8069, + "step": 34963 + }, + { + "epoch": 0.03, + "learning_rate": 4.985016380021185e-05, + "loss": 1.0272, + "step": 34964 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850155213217494e-05, + "loss": 1.4139, + "step": 34965 + }, + { + "epoch": 0.03, + "learning_rate": 4.985014662597783e-05, + "loss": 0.816, + "step": 34966 + }, + { + "epoch": 0.03, + "learning_rate": 4.985013803849285e-05, + "loss": 0.8349, + "step": 34967 + }, + { + "epoch": 0.03, + "learning_rate": 4.985012945076257e-05, + "loss": 1.1302, + "step": 34968 + }, + { + "epoch": 0.03, + "learning_rate": 4.985012086278697e-05, + "loss": 1.1969, + "step": 34969 + }, + { + "epoch": 0.03, + "learning_rate": 4.985011227456606e-05, + "loss": 0.9089, + "step": 34970 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850103686099846e-05, + "loss": 1.3359, + "step": 34971 + }, + { + "epoch": 0.03, + "learning_rate": 4.985009509738832e-05, + "loss": 1.2993, + "step": 34972 + }, + { + "epoch": 0.03, + "learning_rate": 4.985008650843148e-05, + "loss": 1.0437, + "step": 34973 + }, + { + "epoch": 0.03, + "learning_rate": 4.985007791922934e-05, + "loss": 1.0615, + "step": 34974 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850069329781884e-05, + "loss": 1.4714, + "step": 34975 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850060740089125e-05, + "loss": 1.5359, + "step": 34976 + }, + { + "epoch": 0.03, + "learning_rate": 4.985005215015105e-05, + "loss": 1.4002, + "step": 34977 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850043559967664e-05, + "loss": 1.4875, + "step": 34978 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850034969538975e-05, + "loss": 1.4917, + "step": 34979 + }, + { + "epoch": 0.03, + "learning_rate": 4.985002637886497e-05, + "loss": 0.9054, + "step": 34980 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850017787945655e-05, + "loss": 0.1122, + "step": 34981 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850009196781044e-05, + "loss": 0.0933, + "step": 34982 + }, + { + "epoch": 0.03, + "learning_rate": 4.985000060537112e-05, + "loss": 0.1112, + "step": 34983 + }, + { + "epoch": 0.03, + "learning_rate": 4.984999201371587e-05, + "loss": 0.0775, + "step": 34984 + }, + { + "epoch": 0.03, + "learning_rate": 4.984998342181533e-05, + "loss": 0.0788, + "step": 34985 + }, + { + "epoch": 0.03, + "learning_rate": 4.984997482966948e-05, + "loss": 0.0343, + "step": 34986 + }, + { + "epoch": 0.03, + "learning_rate": 4.9849966237278315e-05, + "loss": 0.694, + "step": 34987 + }, + { + "epoch": 0.03, + "learning_rate": 4.9849957644641846e-05, + "loss": 0.8573, + "step": 34988 + }, + { + "epoch": 0.03, + "learning_rate": 4.984994905176007e-05, + "loss": 0.9291, + "step": 34989 + }, + { + "epoch": 0.03, + "learning_rate": 4.9849940458632984e-05, + "loss": 1.3698, + "step": 34990 + }, + { + "epoch": 0.03, + "learning_rate": 4.9849931865260585e-05, + "loss": 1.006, + "step": 34991 + }, + { + "epoch": 0.03, + "learning_rate": 4.9849923271642885e-05, + "loss": 1.2665, + "step": 34992 + }, + { + "epoch": 0.03, + "learning_rate": 4.984991467777987e-05, + "loss": 1.0887, + "step": 34993 + }, + { + "epoch": 0.03, + "learning_rate": 4.984990608367155e-05, + "loss": 1.1577, + "step": 34994 + }, + { + "epoch": 0.03, + "learning_rate": 4.984989748931792e-05, + "loss": 0.5345, + "step": 34995 + }, + { + "epoch": 0.03, + "learning_rate": 4.984988889471899e-05, + "loss": 0.9198, + "step": 34996 + }, + { + "epoch": 0.03, + "learning_rate": 4.9849880299874746e-05, + "loss": 0.6681, + "step": 34997 + }, + { + "epoch": 0.03, + "learning_rate": 4.9849871704785195e-05, + "loss": 1.0401, + "step": 34998 + }, + { + "epoch": 0.03, + "learning_rate": 4.984986310945034e-05, + "loss": 0.9903, + "step": 34999 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849854513870175e-05, + "loss": 1.0312, + "step": 35000 + }, + { + "epoch": 0.04, + "eval_loss": 1.0894055366516113, + "eval_runtime": 125.4337, + "eval_samples_per_second": 11.042, + "eval_steps_per_second": 5.525, + "step": 35000 + }, + { + "epoch": 0.04, + "learning_rate": 4.98498459180447e-05, + "loss": 1.0623, + "step": 35001 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849837321973926e-05, + "loss": 0.8734, + "step": 35002 + }, + { + "epoch": 0.04, + "learning_rate": 4.984982872565783e-05, + "loss": 1.1336, + "step": 35003 + }, + { + "epoch": 0.04, + "learning_rate": 4.984982012909644e-05, + "loss": 1.0945, + "step": 35004 + }, + { + "epoch": 0.04, + "learning_rate": 4.984981153228974e-05, + "loss": 1.1202, + "step": 35005 + }, + { + "epoch": 0.04, + "learning_rate": 4.984980293523773e-05, + "loss": 1.0757, + "step": 35006 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849794337940425e-05, + "loss": 1.118, + "step": 35007 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849785740397805e-05, + "loss": 1.0745, + "step": 35008 + }, + { + "epoch": 0.04, + "learning_rate": 4.984977714260988e-05, + "loss": 0.6937, + "step": 35009 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849768544576636e-05, + "loss": 1.4132, + "step": 35010 + }, + { + "epoch": 0.04, + "learning_rate": 4.98497599462981e-05, + "loss": 0.9892, + "step": 35011 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849751347774257e-05, + "loss": 0.8812, + "step": 35012 + }, + { + "epoch": 0.04, + "learning_rate": 4.98497427490051e-05, + "loss": 0.824, + "step": 35013 + }, + { + "epoch": 0.04, + "learning_rate": 4.984973414999064e-05, + "loss": 0.4144, + "step": 35014 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849725550730875e-05, + "loss": 0.9108, + "step": 35015 + }, + { + "epoch": 0.04, + "learning_rate": 4.98497169512258e-05, + "loss": 1.2984, + "step": 35016 + }, + { + "epoch": 0.04, + "learning_rate": 4.984970835147543e-05, + "loss": 0.9801, + "step": 35017 + }, + { + "epoch": 0.04, + "learning_rate": 4.984969975147974e-05, + "loss": 0.7796, + "step": 35018 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849691151238756e-05, + "loss": 0.9012, + "step": 35019 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849682550752466e-05, + "loss": 0.9529, + "step": 35020 + }, + { + "epoch": 0.04, + "learning_rate": 4.984967395002086e-05, + "loss": 1.3308, + "step": 35021 + }, + { + "epoch": 0.04, + "learning_rate": 4.984966534904395e-05, + "loss": 1.3805, + "step": 35022 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849656747821744e-05, + "loss": 1.3476, + "step": 35023 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849648146354224e-05, + "loss": 1.1513, + "step": 35024 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849639544641404e-05, + "loss": 1.1367, + "step": 35025 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849630942683275e-05, + "loss": 0.9496, + "step": 35026 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849622340479846e-05, + "loss": 1.1523, + "step": 35027 + }, + { + "epoch": 0.04, + "learning_rate": 4.984961373803111e-05, + "loss": 0.6997, + "step": 35028 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849605135337066e-05, + "loss": 1.188, + "step": 35029 + }, + { + "epoch": 0.04, + "learning_rate": 4.984959653239772e-05, + "loss": 0.5121, + "step": 35030 + }, + { + "epoch": 0.04, + "learning_rate": 4.984958792921307e-05, + "loss": 0.6967, + "step": 35031 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849579325783116e-05, + "loss": 0.9343, + "step": 35032 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849570722107855e-05, + "loss": 0.9797, + "step": 35033 + }, + { + "epoch": 0.04, + "learning_rate": 4.984956211818729e-05, + "loss": 1.1391, + "step": 35034 + }, + { + "epoch": 0.04, + "learning_rate": 4.984955351402142e-05, + "loss": 0.8936, + "step": 35035 + }, + { + "epoch": 0.04, + "learning_rate": 4.984954490961025e-05, + "loss": 1.013, + "step": 35036 + }, + { + "epoch": 0.04, + "learning_rate": 4.984953630495377e-05, + "loss": 0.7365, + "step": 35037 + }, + { + "epoch": 0.04, + "learning_rate": 4.984952770005199e-05, + "loss": 1.008, + "step": 35038 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849519094904895e-05, + "loss": 1.2795, + "step": 35039 + }, + { + "epoch": 0.04, + "learning_rate": 4.984951048951251e-05, + "loss": 0.9438, + "step": 35040 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849501883874815e-05, + "loss": 0.9881, + "step": 35041 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849493277991814e-05, + "loss": 1.2154, + "step": 35042 + }, + { + "epoch": 0.04, + "learning_rate": 4.984948467186351e-05, + "loss": 1.1271, + "step": 35043 + }, + { + "epoch": 0.04, + "learning_rate": 4.984947606548991e-05, + "loss": 0.5867, + "step": 35044 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849467458871e-05, + "loss": 0.3893, + "step": 35045 + }, + { + "epoch": 0.04, + "learning_rate": 4.984945885200679e-05, + "loss": 0.3878, + "step": 35046 + }, + { + "epoch": 0.04, + "learning_rate": 4.984945024489727e-05, + "loss": 0.7078, + "step": 35047 + }, + { + "epoch": 0.04, + "learning_rate": 4.984944163754245e-05, + "loss": 0.9788, + "step": 35048 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849433029942324e-05, + "loss": 1.2769, + "step": 35049 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849424422096904e-05, + "loss": 0.7183, + "step": 35050 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849415814006176e-05, + "loss": 1.2445, + "step": 35051 + }, + { + "epoch": 0.04, + "learning_rate": 4.984940720567015e-05, + "loss": 0.9923, + "step": 35052 + }, + { + "epoch": 0.04, + "learning_rate": 4.984939859708882e-05, + "loss": 1.0441, + "step": 35053 + }, + { + "epoch": 0.04, + "learning_rate": 4.984938998826217e-05, + "loss": 1.1863, + "step": 35054 + }, + { + "epoch": 0.04, + "learning_rate": 4.984938137919023e-05, + "loss": 0.8559, + "step": 35055 + }, + { + "epoch": 0.04, + "learning_rate": 4.984937276987299e-05, + "loss": 0.7301, + "step": 35056 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849364160310444e-05, + "loss": 0.6529, + "step": 35057 + }, + { + "epoch": 0.04, + "learning_rate": 4.98493555505026e-05, + "loss": 1.0929, + "step": 35058 + }, + { + "epoch": 0.04, + "learning_rate": 4.984934694044945e-05, + "loss": 1.142, + "step": 35059 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849338330150994e-05, + "loss": 1.2402, + "step": 35060 + }, + { + "epoch": 0.04, + "learning_rate": 4.984932971960724e-05, + "loss": 0.9388, + "step": 35061 + }, + { + "epoch": 0.04, + "learning_rate": 4.984932110881819e-05, + "loss": 1.0686, + "step": 35062 + }, + { + "epoch": 0.04, + "learning_rate": 4.984931249778383e-05, + "loss": 0.919, + "step": 35063 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849303886504166e-05, + "loss": 0.891, + "step": 35064 + }, + { + "epoch": 0.04, + "learning_rate": 4.98492952749792e-05, + "loss": 1.0403, + "step": 35065 + }, + { + "epoch": 0.04, + "learning_rate": 4.984928666320894e-05, + "loss": 1.0117, + "step": 35066 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849278051193374e-05, + "loss": 1.5156, + "step": 35067 + }, + { + "epoch": 0.04, + "learning_rate": 4.984926943893251e-05, + "loss": 1.3507, + "step": 35068 + }, + { + "epoch": 0.04, + "learning_rate": 4.984926082642634e-05, + "loss": 1.0346, + "step": 35069 + }, + { + "epoch": 0.04, + "learning_rate": 4.984925221367487e-05, + "loss": 0.9328, + "step": 35070 + }, + { + "epoch": 0.04, + "learning_rate": 4.98492436006781e-05, + "loss": 0.9392, + "step": 35071 + }, + { + "epoch": 0.04, + "learning_rate": 4.984923498743602e-05, + "loss": 0.8452, + "step": 35072 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849226373948656e-05, + "loss": 0.4713, + "step": 35073 + }, + { + "epoch": 0.04, + "learning_rate": 4.984921776021598e-05, + "loss": 0.9736, + "step": 35074 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849209146238e-05, + "loss": 1.042, + "step": 35075 + }, + { + "epoch": 0.04, + "learning_rate": 4.984920053201472e-05, + "loss": 0.4969, + "step": 35076 + }, + { + "epoch": 0.04, + "learning_rate": 4.984919191754614e-05, + "loss": 0.9774, + "step": 35077 + }, + { + "epoch": 0.04, + "learning_rate": 4.984918330283227e-05, + "loss": 1.1382, + "step": 35078 + }, + { + "epoch": 0.04, + "learning_rate": 4.984917468787309e-05, + "loss": 1.1217, + "step": 35079 + }, + { + "epoch": 0.04, + "learning_rate": 4.984916607266861e-05, + "loss": 1.0447, + "step": 35080 + }, + { + "epoch": 0.04, + "learning_rate": 4.984915745721883e-05, + "loss": 1.1908, + "step": 35081 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849148841523745e-05, + "loss": 1.1169, + "step": 35082 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849140225583366e-05, + "loss": 0.9859, + "step": 35083 + }, + { + "epoch": 0.04, + "learning_rate": 4.984913160939768e-05, + "loss": 1.1006, + "step": 35084 + }, + { + "epoch": 0.04, + "learning_rate": 4.98491229929667e-05, + "loss": 0.9824, + "step": 35085 + }, + { + "epoch": 0.04, + "learning_rate": 4.984911437629042e-05, + "loss": 1.0418, + "step": 35086 + }, + { + "epoch": 0.04, + "learning_rate": 4.984910575936884e-05, + "loss": 1.4116, + "step": 35087 + }, + { + "epoch": 0.04, + "learning_rate": 4.984909714220196e-05, + "loss": 1.0648, + "step": 35088 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849088524789777e-05, + "loss": 0.812, + "step": 35089 + }, + { + "epoch": 0.04, + "learning_rate": 4.984907990713229e-05, + "loss": 0.5262, + "step": 35090 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849071289229514e-05, + "loss": 0.6011, + "step": 35091 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849062671081435e-05, + "loss": 0.8062, + "step": 35092 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849054052688056e-05, + "loss": 0.8015, + "step": 35093 + }, + { + "epoch": 0.04, + "learning_rate": 4.984904543404938e-05, + "loss": 1.001, + "step": 35094 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849036815165394e-05, + "loss": 0.5645, + "step": 35095 + }, + { + "epoch": 0.04, + "learning_rate": 4.984902819603612e-05, + "loss": 0.8296, + "step": 35096 + }, + { + "epoch": 0.04, + "learning_rate": 4.9849019576661544e-05, + "loss": 0.7638, + "step": 35097 + }, + { + "epoch": 0.04, + "learning_rate": 4.984901095704167e-05, + "loss": 0.5477, + "step": 35098 + }, + { + "epoch": 0.04, + "learning_rate": 4.984900233717649e-05, + "loss": 1.4543, + "step": 35099 + }, + { + "epoch": 0.04, + "learning_rate": 4.984899371706602e-05, + "loss": 1.0837, + "step": 35100 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848985096710247e-05, + "loss": 0.864, + "step": 35101 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848976476109174e-05, + "loss": 1.4664, + "step": 35102 + }, + { + "epoch": 0.04, + "learning_rate": 4.984896785526281e-05, + "loss": 1.0748, + "step": 35103 + }, + { + "epoch": 0.04, + "learning_rate": 4.984895923417114e-05, + "loss": 0.773, + "step": 35104 + }, + { + "epoch": 0.04, + "learning_rate": 4.984895061283417e-05, + "loss": 0.8841, + "step": 35105 + }, + { + "epoch": 0.04, + "learning_rate": 4.984894199125191e-05, + "loss": 1.0091, + "step": 35106 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848933369424347e-05, + "loss": 1.4164, + "step": 35107 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848924747351476e-05, + "loss": 1.1177, + "step": 35108 + }, + { + "epoch": 0.04, + "learning_rate": 4.984891612503332e-05, + "loss": 0.9043, + "step": 35109 + }, + { + "epoch": 0.04, + "learning_rate": 4.984890750246987e-05, + "loss": 1.0209, + "step": 35110 + }, + { + "epoch": 0.04, + "learning_rate": 4.984889887966111e-05, + "loss": 0.6882, + "step": 35111 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848890256607055e-05, + "loss": 0.9862, + "step": 35112 + }, + { + "epoch": 0.04, + "learning_rate": 4.98488816333077e-05, + "loss": 0.9784, + "step": 35113 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848873009763053e-05, + "loss": 1.0302, + "step": 35114 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848864385973105e-05, + "loss": 0.7577, + "step": 35115 + }, + { + "epoch": 0.04, + "learning_rate": 4.984885576193786e-05, + "loss": 0.6143, + "step": 35116 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848847137657326e-05, + "loss": 0.8948, + "step": 35117 + }, + { + "epoch": 0.04, + "learning_rate": 4.984883851313148e-05, + "loss": 1.3249, + "step": 35118 + }, + { + "epoch": 0.04, + "learning_rate": 4.984882988836035e-05, + "loss": 1.8569, + "step": 35119 + }, + { + "epoch": 0.04, + "learning_rate": 4.984882126334391e-05, + "loss": 1.5735, + "step": 35120 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848812638082187e-05, + "loss": 1.6746, + "step": 35121 + }, + { + "epoch": 0.04, + "learning_rate": 4.984880401257515e-05, + "loss": 0.9175, + "step": 35122 + }, + { + "epoch": 0.04, + "learning_rate": 4.984879538682283e-05, + "loss": 1.8466, + "step": 35123 + }, + { + "epoch": 0.04, + "learning_rate": 4.984878676082521e-05, + "loss": 1.6194, + "step": 35124 + }, + { + "epoch": 0.04, + "learning_rate": 4.984877813458229e-05, + "loss": 1.7118, + "step": 35125 + }, + { + "epoch": 0.04, + "learning_rate": 4.984876950809408e-05, + "loss": 1.3658, + "step": 35126 + }, + { + "epoch": 0.04, + "learning_rate": 4.984876088136057e-05, + "loss": 1.0392, + "step": 35127 + }, + { + "epoch": 0.04, + "learning_rate": 4.984875225438176e-05, + "loss": 0.9247, + "step": 35128 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848743627157655e-05, + "loss": 0.884, + "step": 35129 + }, + { + "epoch": 0.04, + "learning_rate": 4.984873499968825e-05, + "loss": 0.4157, + "step": 35130 + }, + { + "epoch": 0.04, + "learning_rate": 4.984872637197356e-05, + "loss": 0.789, + "step": 35131 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848717744013564e-05, + "loss": 1.0047, + "step": 35132 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848709115808276e-05, + "loss": 0.9043, + "step": 35133 + }, + { + "epoch": 0.04, + "learning_rate": 4.984870048735769e-05, + "loss": 0.9412, + "step": 35134 + }, + { + "epoch": 0.04, + "learning_rate": 4.984869185866181e-05, + "loss": 0.9589, + "step": 35135 + }, + { + "epoch": 0.04, + "learning_rate": 4.984868322972063e-05, + "loss": 0.9689, + "step": 35136 + }, + { + "epoch": 0.04, + "learning_rate": 4.984867460053416e-05, + "loss": 1.1717, + "step": 35137 + }, + { + "epoch": 0.04, + "learning_rate": 4.984866597110239e-05, + "loss": 1.0968, + "step": 35138 + }, + { + "epoch": 0.04, + "learning_rate": 4.984865734142533e-05, + "loss": 0.9518, + "step": 35139 + }, + { + "epoch": 0.04, + "learning_rate": 4.984864871150297e-05, + "loss": 0.5801, + "step": 35140 + }, + { + "epoch": 0.04, + "learning_rate": 4.984864008133532e-05, + "loss": 1.1701, + "step": 35141 + }, + { + "epoch": 0.04, + "learning_rate": 4.984863145092237e-05, + "loss": 1.1502, + "step": 35142 + }, + { + "epoch": 0.04, + "learning_rate": 4.984862282026413e-05, + "loss": 1.0741, + "step": 35143 + }, + { + "epoch": 0.04, + "learning_rate": 4.984861418936058e-05, + "loss": 1.0057, + "step": 35144 + }, + { + "epoch": 0.04, + "learning_rate": 4.984860555821175e-05, + "loss": 0.4241, + "step": 35145 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848596926817615e-05, + "loss": 0.8475, + "step": 35146 + }, + { + "epoch": 0.04, + "learning_rate": 4.984858829517819e-05, + "loss": 0.952, + "step": 35147 + }, + { + "epoch": 0.04, + "learning_rate": 4.984857966329347e-05, + "loss": 1.0483, + "step": 35148 + }, + { + "epoch": 0.04, + "learning_rate": 4.984857103116346e-05, + "loss": 0.9244, + "step": 35149 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848562398788145e-05, + "loss": 1.0326, + "step": 35150 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848553766167546e-05, + "loss": 0.9544, + "step": 35151 + }, + { + "epoch": 0.04, + "learning_rate": 4.984854513330165e-05, + "loss": 1.0788, + "step": 35152 + }, + { + "epoch": 0.04, + "learning_rate": 4.984853650019046e-05, + "loss": 1.0107, + "step": 35153 + }, + { + "epoch": 0.04, + "learning_rate": 4.984852786683397e-05, + "loss": 1.0957, + "step": 35154 + }, + { + "epoch": 0.04, + "learning_rate": 4.984851923323218e-05, + "loss": 1.2412, + "step": 35155 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848510599385114e-05, + "loss": 0.9693, + "step": 35156 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848501965292745e-05, + "loss": 1.2893, + "step": 35157 + }, + { + "epoch": 0.04, + "learning_rate": 4.984849333095508e-05, + "loss": 1.0818, + "step": 35158 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848484696372124e-05, + "loss": 1.0476, + "step": 35159 + }, + { + "epoch": 0.04, + "learning_rate": 4.984847606154387e-05, + "loss": 1.0029, + "step": 35160 + }, + { + "epoch": 0.04, + "learning_rate": 4.984846742647033e-05, + "loss": 1.9273, + "step": 35161 + }, + { + "epoch": 0.04, + "learning_rate": 4.984845879115149e-05, + "loss": 1.8755, + "step": 35162 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848450155587354e-05, + "loss": 1.5557, + "step": 35163 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848441519777934e-05, + "loss": 1.6269, + "step": 35164 + }, + { + "epoch": 0.04, + "learning_rate": 4.984843288372322e-05, + "loss": 1.0303, + "step": 35165 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848424247423204e-05, + "loss": 1.173, + "step": 35166 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848415610877895e-05, + "loss": 1.0805, + "step": 35167 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848406974087305e-05, + "loss": 0.8099, + "step": 35168 + }, + { + "epoch": 0.04, + "learning_rate": 4.984839833705141e-05, + "loss": 1.2668, + "step": 35169 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848389699770223e-05, + "loss": 1.4652, + "step": 35170 + }, + { + "epoch": 0.04, + "learning_rate": 4.984838106224375e-05, + "loss": 1.0246, + "step": 35171 + }, + { + "epoch": 0.04, + "learning_rate": 4.984837242447198e-05, + "loss": 1.1662, + "step": 35172 + }, + { + "epoch": 0.04, + "learning_rate": 4.984836378645492e-05, + "loss": 1.1885, + "step": 35173 + }, + { + "epoch": 0.04, + "learning_rate": 4.984835514819257e-05, + "loss": 0.8481, + "step": 35174 + }, + { + "epoch": 0.04, + "learning_rate": 4.984834650968492e-05, + "loss": 0.9474, + "step": 35175 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848337870931985e-05, + "loss": 0.8052, + "step": 35176 + }, + { + "epoch": 0.04, + "learning_rate": 4.984832923193374e-05, + "loss": 0.7693, + "step": 35177 + }, + { + "epoch": 0.04, + "learning_rate": 4.984832059269022e-05, + "loss": 0.8429, + "step": 35178 + }, + { + "epoch": 0.04, + "learning_rate": 4.984831195320141e-05, + "loss": 1.015, + "step": 35179 + }, + { + "epoch": 0.04, + "learning_rate": 4.98483033134673e-05, + "loss": 0.9952, + "step": 35180 + }, + { + "epoch": 0.04, + "learning_rate": 4.98482946734879e-05, + "loss": 0.9698, + "step": 35181 + }, + { + "epoch": 0.04, + "learning_rate": 4.984828603326321e-05, + "loss": 0.7457, + "step": 35182 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848277392793225e-05, + "loss": 0.8136, + "step": 35183 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848268752077946e-05, + "loss": 0.7728, + "step": 35184 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848260111117386e-05, + "loss": 1.0528, + "step": 35185 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848251469911525e-05, + "loss": 1.1624, + "step": 35186 + }, + { + "epoch": 0.04, + "learning_rate": 4.984824282846037e-05, + "loss": 0.9558, + "step": 35187 + }, + { + "epoch": 0.04, + "learning_rate": 4.984823418676393e-05, + "loss": 0.9299, + "step": 35188 + }, + { + "epoch": 0.04, + "learning_rate": 4.98482255448222e-05, + "loss": 0.3303, + "step": 35189 + }, + { + "epoch": 0.04, + "learning_rate": 4.984821690263517e-05, + "loss": 0.845, + "step": 35190 + }, + { + "epoch": 0.04, + "learning_rate": 4.984820826020285e-05, + "loss": 0.5868, + "step": 35191 + }, + { + "epoch": 0.04, + "learning_rate": 4.984819961752525e-05, + "loss": 0.9096, + "step": 35192 + }, + { + "epoch": 0.04, + "learning_rate": 4.984819097460236e-05, + "loss": 1.0104, + "step": 35193 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848182331434166e-05, + "loss": 0.7899, + "step": 35194 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848173688020686e-05, + "loss": 1.0698, + "step": 35195 + }, + { + "epoch": 0.04, + "learning_rate": 4.984816504436191e-05, + "loss": 0.9921, + "step": 35196 + }, + { + "epoch": 0.04, + "learning_rate": 4.984815640045786e-05, + "loss": 1.1188, + "step": 35197 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848147756308505e-05, + "loss": 1.102, + "step": 35198 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848139111913864e-05, + "loss": 1.0997, + "step": 35199 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848130467273935e-05, + "loss": 1.0516, + "step": 35200 + }, + { + "epoch": 0.04, + "learning_rate": 4.984812182238872e-05, + "loss": 0.8551, + "step": 35201 + }, + { + "epoch": 0.04, + "learning_rate": 4.98481131772582e-05, + "loss": 0.9198, + "step": 35202 + }, + { + "epoch": 0.04, + "learning_rate": 4.98481045318824e-05, + "loss": 0.9318, + "step": 35203 + }, + { + "epoch": 0.04, + "learning_rate": 4.98480958862613e-05, + "loss": 0.9444, + "step": 35204 + }, + { + "epoch": 0.04, + "learning_rate": 4.984808724039492e-05, + "loss": 1.0138, + "step": 35205 + }, + { + "epoch": 0.04, + "learning_rate": 4.984807859428326e-05, + "loss": 0.9552, + "step": 35206 + }, + { + "epoch": 0.04, + "learning_rate": 4.984806994792629e-05, + "loss": 0.8876, + "step": 35207 + }, + { + "epoch": 0.04, + "learning_rate": 4.984806130132404e-05, + "loss": 0.8737, + "step": 35208 + }, + { + "epoch": 0.04, + "learning_rate": 4.98480526544765e-05, + "loss": 1.104, + "step": 35209 + }, + { + "epoch": 0.04, + "learning_rate": 4.984804400738366e-05, + "loss": 1.079, + "step": 35210 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848035360045554e-05, + "loss": 0.9533, + "step": 35211 + }, + { + "epoch": 0.04, + "learning_rate": 4.984802671246214e-05, + "loss": 0.8287, + "step": 35212 + }, + { + "epoch": 0.04, + "learning_rate": 4.984801806463344e-05, + "loss": 0.82, + "step": 35213 + }, + { + "epoch": 0.04, + "learning_rate": 4.984800941655945e-05, + "loss": 0.9085, + "step": 35214 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848000768240175e-05, + "loss": 1.2352, + "step": 35215 + }, + { + "epoch": 0.04, + "learning_rate": 4.984799211967561e-05, + "loss": 0.9492, + "step": 35216 + }, + { + "epoch": 0.04, + "learning_rate": 4.984798347086575e-05, + "loss": 1.1153, + "step": 35217 + }, + { + "epoch": 0.04, + "learning_rate": 4.984797482181062e-05, + "loss": 1.0779, + "step": 35218 + }, + { + "epoch": 0.04, + "learning_rate": 4.984796617251019e-05, + "loss": 0.8904, + "step": 35219 + }, + { + "epoch": 0.04, + "learning_rate": 4.984795752296446e-05, + "loss": 0.7934, + "step": 35220 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847948873173454e-05, + "loss": 1.0323, + "step": 35221 + }, + { + "epoch": 0.04, + "learning_rate": 4.984794022313716e-05, + "loss": 1.1143, + "step": 35222 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847931572855564e-05, + "loss": 0.7686, + "step": 35223 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847922922328695e-05, + "loss": 0.3302, + "step": 35224 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847914271556526e-05, + "loss": 0.1488, + "step": 35225 + }, + { + "epoch": 0.04, + "learning_rate": 4.984790562053908e-05, + "loss": 0.0552, + "step": 35226 + }, + { + "epoch": 0.04, + "learning_rate": 4.984789696927634e-05, + "loss": 0.5229, + "step": 35227 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847888317768315e-05, + "loss": 0.9628, + "step": 35228 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847879666014997e-05, + "loss": 0.9744, + "step": 35229 + }, + { + "epoch": 0.04, + "learning_rate": 4.98478710140164e-05, + "loss": 0.7234, + "step": 35230 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847862361772505e-05, + "loss": 0.8032, + "step": 35231 + }, + { + "epoch": 0.04, + "learning_rate": 4.984785370928333e-05, + "loss": 0.9162, + "step": 35232 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847845056548865e-05, + "loss": 0.7407, + "step": 35233 + }, + { + "epoch": 0.04, + "learning_rate": 4.984783640356911e-05, + "loss": 1.4039, + "step": 35234 + }, + { + "epoch": 0.04, + "learning_rate": 4.984782775034407e-05, + "loss": 1.2139, + "step": 35235 + }, + { + "epoch": 0.04, + "learning_rate": 4.984781909687374e-05, + "loss": 0.9048, + "step": 35236 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847810443158126e-05, + "loss": 0.9498, + "step": 35237 + }, + { + "epoch": 0.04, + "learning_rate": 4.984780178919722e-05, + "loss": 1.1104, + "step": 35238 + }, + { + "epoch": 0.04, + "learning_rate": 4.984779313499104e-05, + "loss": 1.0529, + "step": 35239 + }, + { + "epoch": 0.04, + "learning_rate": 4.984778448053956e-05, + "loss": 0.7387, + "step": 35240 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847775825842804e-05, + "loss": 1.0026, + "step": 35241 + }, + { + "epoch": 0.04, + "learning_rate": 4.984776717090075e-05, + "loss": 1.3559, + "step": 35242 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847758515713414e-05, + "loss": 0.9414, + "step": 35243 + }, + { + "epoch": 0.04, + "learning_rate": 4.984774986028079e-05, + "loss": 0.8637, + "step": 35244 + }, + { + "epoch": 0.04, + "learning_rate": 4.984774120460288e-05, + "loss": 1.0638, + "step": 35245 + }, + { + "epoch": 0.04, + "learning_rate": 4.984773254867968e-05, + "loss": 1.0209, + "step": 35246 + }, + { + "epoch": 0.04, + "learning_rate": 4.98477238925112e-05, + "loss": 1.664, + "step": 35247 + }, + { + "epoch": 0.04, + "learning_rate": 4.984771523609744e-05, + "loss": 1.1672, + "step": 35248 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847706579438383e-05, + "loss": 1.25, + "step": 35249 + }, + { + "epoch": 0.04, + "learning_rate": 4.984769792253404e-05, + "loss": 0.9078, + "step": 35250 + }, + { + "epoch": 0.04, + "learning_rate": 4.984768926538441e-05, + "loss": 1.244, + "step": 35251 + }, + { + "epoch": 0.04, + "learning_rate": 4.98476806079895e-05, + "loss": 0.9359, + "step": 35252 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847671950349304e-05, + "loss": 0.5297, + "step": 35253 + }, + { + "epoch": 0.04, + "learning_rate": 4.984766329246382e-05, + "loss": 0.9301, + "step": 35254 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847654634333055e-05, + "loss": 0.9451, + "step": 35255 + }, + { + "epoch": 0.04, + "learning_rate": 4.984764597595699e-05, + "loss": 0.9379, + "step": 35256 + }, + { + "epoch": 0.04, + "learning_rate": 4.984763731733565e-05, + "loss": 0.8832, + "step": 35257 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847628658469024e-05, + "loss": 1.3297, + "step": 35258 + }, + { + "epoch": 0.04, + "learning_rate": 4.984761999935712e-05, + "loss": 1.1195, + "step": 35259 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847611339999916e-05, + "loss": 0.9588, + "step": 35260 + }, + { + "epoch": 0.04, + "learning_rate": 4.984760268039743e-05, + "loss": 0.8528, + "step": 35261 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847594020549674e-05, + "loss": 1.1645, + "step": 35262 + }, + { + "epoch": 0.04, + "learning_rate": 4.984758536045662e-05, + "loss": 1.1353, + "step": 35263 + }, + { + "epoch": 0.04, + "learning_rate": 4.984757670011828e-05, + "loss": 0.9786, + "step": 35264 + }, + { + "epoch": 0.04, + "learning_rate": 4.984756803953466e-05, + "loss": 1.0999, + "step": 35265 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847559378705756e-05, + "loss": 0.8423, + "step": 35266 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847550717631565e-05, + "loss": 0.8518, + "step": 35267 + }, + { + "epoch": 0.04, + "learning_rate": 4.984754205631209e-05, + "loss": 1.1776, + "step": 35268 + }, + { + "epoch": 0.04, + "learning_rate": 4.984753339474733e-05, + "loss": 0.7055, + "step": 35269 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847524732937276e-05, + "loss": 1.0613, + "step": 35270 + }, + { + "epoch": 0.04, + "learning_rate": 4.984751607088195e-05, + "loss": 0.6846, + "step": 35271 + }, + { + "epoch": 0.04, + "learning_rate": 4.984750740858134e-05, + "loss": 0.9037, + "step": 35272 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847498746035444e-05, + "loss": 1.0289, + "step": 35273 + }, + { + "epoch": 0.04, + "learning_rate": 4.984749008324426e-05, + "loss": 0.7932, + "step": 35274 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847481420207795e-05, + "loss": 0.7977, + "step": 35275 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847472756926053e-05, + "loss": 0.8965, + "step": 35276 + }, + { + "epoch": 0.04, + "learning_rate": 4.984746409339902e-05, + "loss": 1.0726, + "step": 35277 + }, + { + "epoch": 0.04, + "learning_rate": 4.98474554296267e-05, + "loss": 1.0342, + "step": 35278 + }, + { + "epoch": 0.04, + "learning_rate": 4.98474467656091e-05, + "loss": 1.2878, + "step": 35279 + }, + { + "epoch": 0.04, + "learning_rate": 4.984743810134621e-05, + "loss": 0.9038, + "step": 35280 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847429436838045e-05, + "loss": 1.1228, + "step": 35281 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847420772084593e-05, + "loss": 1.1851, + "step": 35282 + }, + { + "epoch": 0.04, + "learning_rate": 4.984741210708587e-05, + "loss": 0.991, + "step": 35283 + }, + { + "epoch": 0.04, + "learning_rate": 4.984740344184184e-05, + "loss": 0.8864, + "step": 35284 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847394776352544e-05, + "loss": 1.1357, + "step": 35285 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847386110617964e-05, + "loss": 1.1755, + "step": 35286 + }, + { + "epoch": 0.04, + "learning_rate": 4.98473774446381e-05, + "loss": 0.8697, + "step": 35287 + }, + { + "epoch": 0.04, + "learning_rate": 4.984736877841295e-05, + "loss": 1.199, + "step": 35288 + }, + { + "epoch": 0.04, + "learning_rate": 4.984736011194252e-05, + "loss": 1.168, + "step": 35289 + }, + { + "epoch": 0.04, + "learning_rate": 4.984735144522681e-05, + "loss": 0.5979, + "step": 35290 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847342778265806e-05, + "loss": 0.6703, + "step": 35291 + }, + { + "epoch": 0.04, + "learning_rate": 4.984733411105953e-05, + "loss": 0.7635, + "step": 35292 + }, + { + "epoch": 0.04, + "learning_rate": 4.984732544360797e-05, + "loss": 1.0139, + "step": 35293 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847316775911125e-05, + "loss": 0.9747, + "step": 35294 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847308107969e-05, + "loss": 0.9673, + "step": 35295 + }, + { + "epoch": 0.04, + "learning_rate": 4.984729943978159e-05, + "loss": 1.092, + "step": 35296 + }, + { + "epoch": 0.04, + "learning_rate": 4.98472907713489e-05, + "loss": 0.8167, + "step": 35297 + }, + { + "epoch": 0.04, + "learning_rate": 4.984728210267092e-05, + "loss": 0.8897, + "step": 35298 + }, + { + "epoch": 0.04, + "learning_rate": 4.984727343374767e-05, + "loss": 0.8506, + "step": 35299 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847264764579136e-05, + "loss": 1.0463, + "step": 35300 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847256095165316e-05, + "loss": 1.1365, + "step": 35301 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847247425506216e-05, + "loss": 0.7955, + "step": 35302 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847238755601836e-05, + "loss": 0.852, + "step": 35303 + }, + { + "epoch": 0.04, + "learning_rate": 4.984723008545218e-05, + "loss": 0.9365, + "step": 35304 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847221415057234e-05, + "loss": 1.0777, + "step": 35305 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847212744417005e-05, + "loss": 1.3479, + "step": 35306 + }, + { + "epoch": 0.04, + "learning_rate": 4.98472040735315e-05, + "loss": 1.1182, + "step": 35307 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847195402400714e-05, + "loss": 0.6509, + "step": 35308 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847186731024644e-05, + "loss": 0.8631, + "step": 35309 + }, + { + "epoch": 0.04, + "learning_rate": 4.98471780594033e-05, + "loss": 1.006, + "step": 35310 + }, + { + "epoch": 0.04, + "learning_rate": 4.984716938753666e-05, + "loss": 0.9823, + "step": 35311 + }, + { + "epoch": 0.04, + "learning_rate": 4.984716071542476e-05, + "loss": 0.9474, + "step": 35312 + }, + { + "epoch": 0.04, + "learning_rate": 4.984715204306756e-05, + "loss": 0.9207, + "step": 35313 + }, + { + "epoch": 0.04, + "learning_rate": 4.984714337046509e-05, + "loss": 0.9666, + "step": 35314 + }, + { + "epoch": 0.04, + "learning_rate": 4.984713469761734e-05, + "loss": 1.0452, + "step": 35315 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847126024524304e-05, + "loss": 1.3953, + "step": 35316 + }, + { + "epoch": 0.04, + "learning_rate": 4.984711735118599e-05, + "loss": 1.2963, + "step": 35317 + }, + { + "epoch": 0.04, + "learning_rate": 4.984710867760239e-05, + "loss": 1.0749, + "step": 35318 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847100003773524e-05, + "loss": 0.808, + "step": 35319 + }, + { + "epoch": 0.04, + "learning_rate": 4.984709132969937e-05, + "loss": 1.1279, + "step": 35320 + }, + { + "epoch": 0.04, + "learning_rate": 4.984708265537993e-05, + "loss": 1.0747, + "step": 35321 + }, + { + "epoch": 0.04, + "learning_rate": 4.984707398081522e-05, + "loss": 0.8571, + "step": 35322 + }, + { + "epoch": 0.04, + "learning_rate": 4.9847065306005225e-05, + "loss": 0.9806, + "step": 35323 + }, + { + "epoch": 0.04, + "learning_rate": 4.984705663094995e-05, + "loss": 0.6652, + "step": 35324 + }, + { + "epoch": 0.04, + "learning_rate": 4.984704795564939e-05, + "loss": 0.9588, + "step": 35325 + }, + { + "epoch": 0.04, + "learning_rate": 4.984703928010356e-05, + "loss": 1.0593, + "step": 35326 + }, + { + "epoch": 0.04, + "learning_rate": 4.984703060431245e-05, + "loss": 0.864, + "step": 35327 + }, + { + "epoch": 0.04, + "learning_rate": 4.984702192827606e-05, + "loss": 0.6823, + "step": 35328 + }, + { + "epoch": 0.04, + "learning_rate": 4.984701325199439e-05, + "loss": 0.479, + "step": 35329 + }, + { + "epoch": 0.04, + "learning_rate": 4.984700457546744e-05, + "loss": 1.0757, + "step": 35330 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846995898695206e-05, + "loss": 1.1516, + "step": 35331 + }, + { + "epoch": 0.04, + "learning_rate": 4.98469872216777e-05, + "loss": 1.1057, + "step": 35332 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846978544414915e-05, + "loss": 1.171, + "step": 35333 + }, + { + "epoch": 0.04, + "learning_rate": 4.984696986690685e-05, + "loss": 1.1742, + "step": 35334 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846961189153495e-05, + "loss": 0.9171, + "step": 35335 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846952511154874e-05, + "loss": 1.0105, + "step": 35336 + }, + { + "epoch": 0.04, + "learning_rate": 4.984694383291097e-05, + "loss": 1.1043, + "step": 35337 + }, + { + "epoch": 0.04, + "learning_rate": 4.984693515442179e-05, + "loss": 0.3692, + "step": 35338 + }, + { + "epoch": 0.04, + "learning_rate": 4.984692647568733e-05, + "loss": 1.9987, + "step": 35339 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846917796707595e-05, + "loss": 1.3376, + "step": 35340 + }, + { + "epoch": 0.04, + "learning_rate": 4.984690911748258e-05, + "loss": 0.8989, + "step": 35341 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846900438012276e-05, + "loss": 0.848, + "step": 35342 + }, + { + "epoch": 0.04, + "learning_rate": 4.984689175829671e-05, + "loss": 0.7614, + "step": 35343 + }, + { + "epoch": 0.04, + "learning_rate": 4.984688307833586e-05, + "loss": 1.1036, + "step": 35344 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846874398129725e-05, + "loss": 1.086, + "step": 35345 + }, + { + "epoch": 0.04, + "learning_rate": 4.984686571767832e-05, + "loss": 1.011, + "step": 35346 + }, + { + "epoch": 0.04, + "learning_rate": 4.984685703698163e-05, + "loss": 1.0425, + "step": 35347 + }, + { + "epoch": 0.04, + "learning_rate": 4.984684835603968e-05, + "loss": 0.8912, + "step": 35348 + }, + { + "epoch": 0.04, + "learning_rate": 4.984683967485243e-05, + "loss": 0.784, + "step": 35349 + }, + { + "epoch": 0.04, + "learning_rate": 4.984683099341991e-05, + "loss": 0.9387, + "step": 35350 + }, + { + "epoch": 0.04, + "learning_rate": 4.984682231174212e-05, + "loss": 0.7556, + "step": 35351 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846813629819047e-05, + "loss": 0.5313, + "step": 35352 + }, + { + "epoch": 0.04, + "learning_rate": 4.98468049476507e-05, + "loss": 0.8471, + "step": 35353 + }, + { + "epoch": 0.04, + "learning_rate": 4.984679626523707e-05, + "loss": 0.829, + "step": 35354 + }, + { + "epoch": 0.04, + "learning_rate": 4.984678758257817e-05, + "loss": 0.9784, + "step": 35355 + }, + { + "epoch": 0.04, + "learning_rate": 4.984677889967398e-05, + "loss": 1.0198, + "step": 35356 + }, + { + "epoch": 0.04, + "learning_rate": 4.984677021652453e-05, + "loss": 0.9779, + "step": 35357 + }, + { + "epoch": 0.04, + "learning_rate": 4.984676153312979e-05, + "loss": 1.1738, + "step": 35358 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846752849489784e-05, + "loss": 1.0099, + "step": 35359 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846744165604494e-05, + "loss": 0.9488, + "step": 35360 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846735481473925e-05, + "loss": 0.6395, + "step": 35361 + }, + { + "epoch": 0.04, + "learning_rate": 4.984672679709809e-05, + "loss": 0.9952, + "step": 35362 + }, + { + "epoch": 0.04, + "learning_rate": 4.984671811247698e-05, + "loss": 1.4528, + "step": 35363 + }, + { + "epoch": 0.04, + "learning_rate": 4.984670942761058e-05, + "loss": 1.8283, + "step": 35364 + }, + { + "epoch": 0.04, + "learning_rate": 4.984670074249891e-05, + "loss": 1.6398, + "step": 35365 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846692057141964e-05, + "loss": 1.0044, + "step": 35366 + }, + { + "epoch": 0.04, + "learning_rate": 4.984668337153974e-05, + "loss": 0.9758, + "step": 35367 + }, + { + "epoch": 0.04, + "learning_rate": 4.984667468569224e-05, + "loss": 1.0854, + "step": 35368 + }, + { + "epoch": 0.04, + "learning_rate": 4.984666599959946e-05, + "loss": 1.1148, + "step": 35369 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846657313261414e-05, + "loss": 1.1253, + "step": 35370 + }, + { + "epoch": 0.04, + "learning_rate": 4.984664862667809e-05, + "loss": 1.0509, + "step": 35371 + }, + { + "epoch": 0.04, + "learning_rate": 4.984663993984949e-05, + "loss": 1.1549, + "step": 35372 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846631252775614e-05, + "loss": 0.8335, + "step": 35373 + }, + { + "epoch": 0.04, + "learning_rate": 4.984662256545646e-05, + "loss": 1.1395, + "step": 35374 + }, + { + "epoch": 0.04, + "learning_rate": 4.984661387789203e-05, + "loss": 0.8623, + "step": 35375 + }, + { + "epoch": 0.04, + "learning_rate": 4.984660519008233e-05, + "loss": 1.0867, + "step": 35376 + }, + { + "epoch": 0.04, + "learning_rate": 4.984659650202735e-05, + "loss": 1.2312, + "step": 35377 + }, + { + "epoch": 0.04, + "learning_rate": 4.98465878137271e-05, + "loss": 1.1237, + "step": 35378 + }, + { + "epoch": 0.04, + "learning_rate": 4.984657912518157e-05, + "loss": 0.887, + "step": 35379 + }, + { + "epoch": 0.04, + "learning_rate": 4.984657043639076e-05, + "loss": 1.3979, + "step": 35380 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846561747354684e-05, + "loss": 0.9779, + "step": 35381 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846553058073334e-05, + "loss": 1.0117, + "step": 35382 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846544368546703e-05, + "loss": 1.0942, + "step": 35383 + }, + { + "epoch": 0.04, + "learning_rate": 4.98465356787748e-05, + "loss": 0.5182, + "step": 35384 + }, + { + "epoch": 0.04, + "learning_rate": 4.984652698875762e-05, + "loss": 0.9672, + "step": 35385 + }, + { + "epoch": 0.04, + "learning_rate": 4.984651829849517e-05, + "loss": 0.9896, + "step": 35386 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846509607987445e-05, + "loss": 0.9358, + "step": 35387 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846500917234446e-05, + "loss": 0.9495, + "step": 35388 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846492226236174e-05, + "loss": 1.1464, + "step": 35389 + }, + { + "epoch": 0.04, + "learning_rate": 4.984648353499263e-05, + "loss": 1.0408, + "step": 35390 + }, + { + "epoch": 0.04, + "learning_rate": 4.98464748435038e-05, + "loss": 1.0577, + "step": 35391 + }, + { + "epoch": 0.04, + "learning_rate": 4.984646615176971e-05, + "loss": 1.0566, + "step": 35392 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846457459790335e-05, + "loss": 1.1429, + "step": 35393 + }, + { + "epoch": 0.04, + "learning_rate": 4.984644876756569e-05, + "loss": 1.1724, + "step": 35394 + }, + { + "epoch": 0.04, + "learning_rate": 4.984644007509577e-05, + "loss": 0.9468, + "step": 35395 + }, + { + "epoch": 0.04, + "learning_rate": 4.984643138238058e-05, + "loss": 0.7389, + "step": 35396 + }, + { + "epoch": 0.04, + "learning_rate": 4.984642268942011e-05, + "loss": 0.5397, + "step": 35397 + }, + { + "epoch": 0.04, + "learning_rate": 4.984641399621438e-05, + "loss": 0.5567, + "step": 35398 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846405302763367e-05, + "loss": 0.5169, + "step": 35399 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846396609067084e-05, + "loss": 0.5509, + "step": 35400 + }, + { + "epoch": 0.04, + "learning_rate": 4.984638791512553e-05, + "loss": 0.5026, + "step": 35401 + }, + { + "epoch": 0.04, + "learning_rate": 4.984637922093869e-05, + "loss": 0.503, + "step": 35402 + }, + { + "epoch": 0.04, + "learning_rate": 4.984637052650659e-05, + "loss": 0.665, + "step": 35403 + }, + { + "epoch": 0.04, + "learning_rate": 4.984636183182921e-05, + "loss": 1.1975, + "step": 35404 + }, + { + "epoch": 0.04, + "learning_rate": 4.984635313690657e-05, + "loss": 1.115, + "step": 35405 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846344441738644e-05, + "loss": 0.7975, + "step": 35406 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846335746325446e-05, + "loss": 1.054, + "step": 35407 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846327050666975e-05, + "loss": 0.9657, + "step": 35408 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846318354763236e-05, + "loss": 1.3256, + "step": 35409 + }, + { + "epoch": 0.04, + "learning_rate": 4.984630965861422e-05, + "loss": 1.1433, + "step": 35410 + }, + { + "epoch": 0.04, + "learning_rate": 4.984630096221994e-05, + "loss": 0.8217, + "step": 35411 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846292265580386e-05, + "loss": 1.2064, + "step": 35412 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846283568695553e-05, + "loss": 1.1215, + "step": 35413 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846274871565454e-05, + "loss": 1.5455, + "step": 35414 + }, + { + "epoch": 0.04, + "learning_rate": 4.984626617419008e-05, + "loss": 0.7606, + "step": 35415 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846257476569433e-05, + "loss": 0.7441, + "step": 35416 + }, + { + "epoch": 0.04, + "learning_rate": 4.984624877870352e-05, + "loss": 0.7863, + "step": 35417 + }, + { + "epoch": 0.04, + "learning_rate": 4.984624008059233e-05, + "loss": 0.9624, + "step": 35418 + }, + { + "epoch": 0.04, + "learning_rate": 4.984623138223587e-05, + "loss": 1.1976, + "step": 35419 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846222683634136e-05, + "loss": 1.094, + "step": 35420 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846213984787135e-05, + "loss": 0.6869, + "step": 35421 + }, + { + "epoch": 0.04, + "learning_rate": 4.984620528569486e-05, + "loss": 0.8052, + "step": 35422 + }, + { + "epoch": 0.04, + "learning_rate": 4.984619658635732e-05, + "loss": 1.751, + "step": 35423 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846187886774495e-05, + "loss": 1.2522, + "step": 35424 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846179186946405e-05, + "loss": 1.0573, + "step": 35425 + }, + { + "epoch": 0.04, + "learning_rate": 4.984617048687305e-05, + "loss": 0.9347, + "step": 35426 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846161786554426e-05, + "loss": 1.1104, + "step": 35427 + }, + { + "epoch": 0.04, + "learning_rate": 4.984615308599052e-05, + "loss": 1.0164, + "step": 35428 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846144385181345e-05, + "loss": 1.0149, + "step": 35429 + }, + { + "epoch": 0.04, + "learning_rate": 4.98461356841269e-05, + "loss": 0.8992, + "step": 35430 + }, + { + "epoch": 0.04, + "learning_rate": 4.984612698282719e-05, + "loss": 1.0848, + "step": 35431 + }, + { + "epoch": 0.04, + "learning_rate": 4.984611828128221e-05, + "loss": 1.0297, + "step": 35432 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846109579491955e-05, + "loss": 1.0195, + "step": 35433 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846100877456436e-05, + "loss": 1.191, + "step": 35434 + }, + { + "epoch": 0.04, + "learning_rate": 4.984609217517564e-05, + "loss": 0.8629, + "step": 35435 + }, + { + "epoch": 0.04, + "learning_rate": 4.984608347264957e-05, + "loss": 0.834, + "step": 35436 + }, + { + "epoch": 0.04, + "learning_rate": 4.984607476987824e-05, + "loss": 1.0474, + "step": 35437 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846066066861634e-05, + "loss": 0.7199, + "step": 35438 + }, + { + "epoch": 0.04, + "learning_rate": 4.984605736359976e-05, + "loss": 0.7822, + "step": 35439 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846048660092615e-05, + "loss": 1.0809, + "step": 35440 + }, + { + "epoch": 0.04, + "learning_rate": 4.984603995634021e-05, + "loss": 1.2252, + "step": 35441 + }, + { + "epoch": 0.04, + "learning_rate": 4.984603125234252e-05, + "loss": 1.1013, + "step": 35442 + }, + { + "epoch": 0.04, + "learning_rate": 4.984602254809957e-05, + "loss": 1.1287, + "step": 35443 + }, + { + "epoch": 0.04, + "learning_rate": 4.9846013843611347e-05, + "loss": 1.203, + "step": 35444 + }, + { + "epoch": 0.04, + "learning_rate": 4.984600513887785e-05, + "loss": 1.2037, + "step": 35445 + }, + { + "epoch": 0.04, + "learning_rate": 4.984599643389909e-05, + "loss": 1.0088, + "step": 35446 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845987728675056e-05, + "loss": 1.2701, + "step": 35447 + }, + { + "epoch": 0.04, + "learning_rate": 4.984597902320576e-05, + "loss": 1.1544, + "step": 35448 + }, + { + "epoch": 0.04, + "learning_rate": 4.98459703174912e-05, + "loss": 1.0155, + "step": 35449 + }, + { + "epoch": 0.04, + "learning_rate": 4.984596161153136e-05, + "loss": 1.1083, + "step": 35450 + }, + { + "epoch": 0.04, + "learning_rate": 4.984595290532625e-05, + "loss": 1.0148, + "step": 35451 + }, + { + "epoch": 0.04, + "learning_rate": 4.984594419887587e-05, + "loss": 0.6365, + "step": 35452 + }, + { + "epoch": 0.04, + "learning_rate": 4.984593549218023e-05, + "loss": 1.109, + "step": 35453 + }, + { + "epoch": 0.04, + "learning_rate": 4.984592678523931e-05, + "loss": 0.9471, + "step": 35454 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845918078053137e-05, + "loss": 0.4936, + "step": 35455 + }, + { + "epoch": 0.04, + "learning_rate": 4.984590937062169e-05, + "loss": 0.7972, + "step": 35456 + }, + { + "epoch": 0.04, + "learning_rate": 4.984590066294497e-05, + "loss": 1.0959, + "step": 35457 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845891955022986e-05, + "loss": 0.8842, + "step": 35458 + }, + { + "epoch": 0.04, + "learning_rate": 4.984588324685573e-05, + "loss": 0.8078, + "step": 35459 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845874538443204e-05, + "loss": 1.1154, + "step": 35460 + }, + { + "epoch": 0.04, + "learning_rate": 4.984586582978542e-05, + "loss": 0.8691, + "step": 35461 + }, + { + "epoch": 0.04, + "learning_rate": 4.984585712088236e-05, + "loss": 0.8443, + "step": 35462 + }, + { + "epoch": 0.04, + "learning_rate": 4.984584841173403e-05, + "loss": 1.0395, + "step": 35463 + }, + { + "epoch": 0.04, + "learning_rate": 4.984583970234044e-05, + "loss": 0.921, + "step": 35464 + }, + { + "epoch": 0.04, + "learning_rate": 4.984583099270158e-05, + "loss": 1.181, + "step": 35465 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845822282817454e-05, + "loss": 0.829, + "step": 35466 + }, + { + "epoch": 0.04, + "learning_rate": 4.984581357268805e-05, + "loss": 0.8694, + "step": 35467 + }, + { + "epoch": 0.04, + "learning_rate": 4.984580486231339e-05, + "loss": 0.817, + "step": 35468 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845796151693456e-05, + "loss": 1.0431, + "step": 35469 + }, + { + "epoch": 0.04, + "learning_rate": 4.984578744082826e-05, + "loss": 1.2206, + "step": 35470 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845778729717796e-05, + "loss": 0.9296, + "step": 35471 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845770018362056e-05, + "loss": 1.1075, + "step": 35472 + }, + { + "epoch": 0.04, + "learning_rate": 4.984576130676106e-05, + "loss": 0.8346, + "step": 35473 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845752594914795e-05, + "loss": 0.8243, + "step": 35474 + }, + { + "epoch": 0.04, + "learning_rate": 4.984574388282326e-05, + "loss": 0.2984, + "step": 35475 + }, + { + "epoch": 0.04, + "learning_rate": 4.984573517048646e-05, + "loss": 0.0629, + "step": 35476 + }, + { + "epoch": 0.04, + "learning_rate": 4.98457264579044e-05, + "loss": 0.0291, + "step": 35477 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845717745077065e-05, + "loss": 0.0264, + "step": 35478 + }, + { + "epoch": 0.04, + "learning_rate": 4.984570903200446e-05, + "loss": 0.0841, + "step": 35479 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845700318686595e-05, + "loss": 0.2923, + "step": 35480 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845691605123466e-05, + "loss": 1.3376, + "step": 35481 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845682891315064e-05, + "loss": 1.1168, + "step": 35482 + }, + { + "epoch": 0.04, + "learning_rate": 4.98456741772614e-05, + "loss": 1.088, + "step": 35483 + }, + { + "epoch": 0.04, + "learning_rate": 4.984566546296247e-05, + "loss": 1.1247, + "step": 35484 + }, + { + "epoch": 0.04, + "learning_rate": 4.984565674841828e-05, + "loss": 1.1119, + "step": 35485 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845648033628815e-05, + "loss": 1.4529, + "step": 35486 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845639318594085e-05, + "loss": 0.9286, + "step": 35487 + }, + { + "epoch": 0.04, + "learning_rate": 4.984563060331409e-05, + "loss": 0.9182, + "step": 35488 + }, + { + "epoch": 0.04, + "learning_rate": 4.984562188778883e-05, + "loss": 0.9062, + "step": 35489 + }, + { + "epoch": 0.04, + "learning_rate": 4.984561317201831e-05, + "loss": 1.138, + "step": 35490 + }, + { + "epoch": 0.04, + "learning_rate": 4.984560445600251e-05, + "loss": 0.8796, + "step": 35491 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845595739741455e-05, + "loss": 0.6116, + "step": 35492 + }, + { + "epoch": 0.04, + "learning_rate": 4.984558702323514e-05, + "loss": 1.2383, + "step": 35493 + }, + { + "epoch": 0.04, + "learning_rate": 4.984557830648355e-05, + "loss": 1.1968, + "step": 35494 + }, + { + "epoch": 0.04, + "learning_rate": 4.98455695894867e-05, + "loss": 0.9655, + "step": 35495 + }, + { + "epoch": 0.04, + "learning_rate": 4.984556087224459e-05, + "loss": 1.207, + "step": 35496 + }, + { + "epoch": 0.04, + "learning_rate": 4.98455521547572e-05, + "loss": 1.1957, + "step": 35497 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845543437024555e-05, + "loss": 1.1315, + "step": 35498 + }, + { + "epoch": 0.04, + "learning_rate": 4.984553471904665e-05, + "loss": 1.2101, + "step": 35499 + }, + { + "epoch": 0.04, + "learning_rate": 4.984552600082347e-05, + "loss": 0.9307, + "step": 35500 + }, + { + "epoch": 0.04, + "eval_loss": 1.074920654296875, + "eval_runtime": 128.552, + "eval_samples_per_second": 10.774, + "eval_steps_per_second": 5.391, + "step": 35500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845517282355034e-05, + "loss": 1.0622, + "step": 35501 + }, + { + "epoch": 0.04, + "learning_rate": 4.984550856364133e-05, + "loss": 1.0732, + "step": 35502 + }, + { + "epoch": 0.04, + "learning_rate": 4.984549984468236e-05, + "loss": 0.9974, + "step": 35503 + }, + { + "epoch": 0.04, + "learning_rate": 4.984549112547813e-05, + "loss": 0.8338, + "step": 35504 + }, + { + "epoch": 0.04, + "learning_rate": 4.984548240602863e-05, + "loss": 0.379, + "step": 35505 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845473686333865e-05, + "loss": 0.7419, + "step": 35506 + }, + { + "epoch": 0.04, + "learning_rate": 4.984546496639384e-05, + "loss": 1.0488, + "step": 35507 + }, + { + "epoch": 0.04, + "learning_rate": 4.984545624620855e-05, + "loss": 1.0695, + "step": 35508 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845447525778e-05, + "loss": 0.882, + "step": 35509 + }, + { + "epoch": 0.04, + "learning_rate": 4.984543880510219e-05, + "loss": 1.0561, + "step": 35510 + }, + { + "epoch": 0.04, + "learning_rate": 4.98454300841811e-05, + "loss": 0.9131, + "step": 35511 + }, + { + "epoch": 0.04, + "learning_rate": 4.984542136301475e-05, + "loss": 0.6939, + "step": 35512 + }, + { + "epoch": 0.04, + "learning_rate": 4.984541264160315e-05, + "loss": 0.9903, + "step": 35513 + }, + { + "epoch": 0.04, + "learning_rate": 4.984540391994628e-05, + "loss": 1.1215, + "step": 35514 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845395198044145e-05, + "loss": 1.2417, + "step": 35515 + }, + { + "epoch": 0.04, + "learning_rate": 4.984538647589675e-05, + "loss": 0.951, + "step": 35516 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845377753504083e-05, + "loss": 0.9214, + "step": 35517 + }, + { + "epoch": 0.04, + "learning_rate": 4.984536903086616e-05, + "loss": 0.8145, + "step": 35518 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845360307982975e-05, + "loss": 0.7352, + "step": 35519 + }, + { + "epoch": 0.04, + "learning_rate": 4.984535158485453e-05, + "loss": 0.8548, + "step": 35520 + }, + { + "epoch": 0.04, + "learning_rate": 4.984534286148081e-05, + "loss": 0.8834, + "step": 35521 + }, + { + "epoch": 0.04, + "learning_rate": 4.984533413786183e-05, + "loss": 1.0662, + "step": 35522 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845325413997596e-05, + "loss": 0.9115, + "step": 35523 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845316689888094e-05, + "loss": 0.4238, + "step": 35524 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845307965533325e-05, + "loss": 0.8906, + "step": 35525 + }, + { + "epoch": 0.04, + "learning_rate": 4.984529924093331e-05, + "loss": 0.869, + "step": 35526 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845290516088015e-05, + "loss": 0.841, + "step": 35527 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845281790997466e-05, + "loss": 0.4218, + "step": 35528 + }, + { + "epoch": 0.04, + "learning_rate": 4.984527306566166e-05, + "loss": 0.6233, + "step": 35529 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845264340080574e-05, + "loss": 1.0356, + "step": 35530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845255614254245e-05, + "loss": 1.0757, + "step": 35531 + }, + { + "epoch": 0.04, + "learning_rate": 4.984524688818264e-05, + "loss": 0.9746, + "step": 35532 + }, + { + "epoch": 0.04, + "learning_rate": 4.984523816186578e-05, + "loss": 0.9402, + "step": 35533 + }, + { + "epoch": 0.04, + "learning_rate": 4.984522943530366e-05, + "loss": 0.9099, + "step": 35534 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845220708496273e-05, + "loss": 0.9519, + "step": 35535 + }, + { + "epoch": 0.04, + "learning_rate": 4.984521198144363e-05, + "loss": 1.1088, + "step": 35536 + }, + { + "epoch": 0.04, + "learning_rate": 4.984520325414572e-05, + "loss": 1.0734, + "step": 35537 + }, + { + "epoch": 0.04, + "learning_rate": 4.984519452660256e-05, + "loss": 0.7664, + "step": 35538 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845185798814134e-05, + "loss": 0.8211, + "step": 35539 + }, + { + "epoch": 0.04, + "learning_rate": 4.984517707078044e-05, + "loss": 1.0537, + "step": 35540 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845168342501487e-05, + "loss": 1.1324, + "step": 35541 + }, + { + "epoch": 0.04, + "learning_rate": 4.984515961397727e-05, + "loss": 1.0452, + "step": 35542 + }, + { + "epoch": 0.04, + "learning_rate": 4.98451508852078e-05, + "loss": 1.1262, + "step": 35543 + }, + { + "epoch": 0.04, + "learning_rate": 4.984514215619306e-05, + "loss": 0.8965, + "step": 35544 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845133426933064e-05, + "loss": 0.9886, + "step": 35545 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845124697427813e-05, + "loss": 1.2205, + "step": 35546 + }, + { + "epoch": 0.04, + "learning_rate": 4.984511596767729e-05, + "loss": 1.0511, + "step": 35547 + }, + { + "epoch": 0.04, + "learning_rate": 4.984510723768152e-05, + "loss": 1.0725, + "step": 35548 + }, + { + "epoch": 0.04, + "learning_rate": 4.984509850744048e-05, + "loss": 0.8924, + "step": 35549 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845089776954176e-05, + "loss": 1.041, + "step": 35550 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845081046222625e-05, + "loss": 0.9047, + "step": 35551 + }, + { + "epoch": 0.04, + "learning_rate": 4.984507231524581e-05, + "loss": 0.9395, + "step": 35552 + }, + { + "epoch": 0.04, + "learning_rate": 4.984506358402373e-05, + "loss": 1.0754, + "step": 35553 + }, + { + "epoch": 0.04, + "learning_rate": 4.984505485255639e-05, + "loss": 1.076, + "step": 35554 + }, + { + "epoch": 0.04, + "learning_rate": 4.984504612084379e-05, + "loss": 0.8581, + "step": 35555 + }, + { + "epoch": 0.04, + "learning_rate": 4.984503738888593e-05, + "loss": 0.9374, + "step": 35556 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845028656682816e-05, + "loss": 0.8387, + "step": 35557 + }, + { + "epoch": 0.04, + "learning_rate": 4.984501992423444e-05, + "loss": 0.8342, + "step": 35558 + }, + { + "epoch": 0.04, + "learning_rate": 4.9845011191540805e-05, + "loss": 0.7948, + "step": 35559 + }, + { + "epoch": 0.04, + "learning_rate": 4.98450024586019e-05, + "loss": 1.2674, + "step": 35560 + }, + { + "epoch": 0.04, + "learning_rate": 4.984499372541774e-05, + "loss": 1.349, + "step": 35561 + }, + { + "epoch": 0.04, + "learning_rate": 4.984498499198833e-05, + "loss": 0.8993, + "step": 35562 + }, + { + "epoch": 0.04, + "learning_rate": 4.984497625831366e-05, + "loss": 0.7697, + "step": 35563 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844967524393725e-05, + "loss": 1.0755, + "step": 35564 + }, + { + "epoch": 0.04, + "learning_rate": 4.984495879022853e-05, + "loss": 1.2239, + "step": 35565 + }, + { + "epoch": 0.04, + "learning_rate": 4.984495005581808e-05, + "loss": 1.3209, + "step": 35566 + }, + { + "epoch": 0.04, + "learning_rate": 4.984494132116237e-05, + "loss": 0.8872, + "step": 35567 + }, + { + "epoch": 0.04, + "learning_rate": 4.98449325862614e-05, + "loss": 0.9954, + "step": 35568 + }, + { + "epoch": 0.04, + "learning_rate": 4.984492385111517e-05, + "loss": 1.1882, + "step": 35569 + }, + { + "epoch": 0.04, + "learning_rate": 4.984491511572368e-05, + "loss": 0.8167, + "step": 35570 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844906380086944e-05, + "loss": 0.7586, + "step": 35571 + }, + { + "epoch": 0.04, + "learning_rate": 4.984489764420494e-05, + "loss": 1.0209, + "step": 35572 + }, + { + "epoch": 0.04, + "learning_rate": 4.984488890807767e-05, + "loss": 0.9987, + "step": 35573 + }, + { + "epoch": 0.04, + "learning_rate": 4.984488017170515e-05, + "loss": 0.8803, + "step": 35574 + }, + { + "epoch": 0.04, + "learning_rate": 4.984487143508737e-05, + "loss": 0.9528, + "step": 35575 + }, + { + "epoch": 0.04, + "learning_rate": 4.984486269822434e-05, + "loss": 0.7082, + "step": 35576 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844853961116047e-05, + "loss": 0.8565, + "step": 35577 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844845223762493e-05, + "loss": 0.7315, + "step": 35578 + }, + { + "epoch": 0.04, + "learning_rate": 4.984483648616368e-05, + "loss": 0.9103, + "step": 35579 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844827748319614e-05, + "loss": 0.9051, + "step": 35580 + }, + { + "epoch": 0.04, + "learning_rate": 4.984481901023029e-05, + "loss": 0.8553, + "step": 35581 + }, + { + "epoch": 0.04, + "learning_rate": 4.98448102718957e-05, + "loss": 1.0612, + "step": 35582 + }, + { + "epoch": 0.04, + "learning_rate": 4.984480153331586e-05, + "loss": 0.9809, + "step": 35583 + }, + { + "epoch": 0.04, + "learning_rate": 4.984479279449076e-05, + "loss": 0.8094, + "step": 35584 + }, + { + "epoch": 0.04, + "learning_rate": 4.984478405542041e-05, + "loss": 0.9999, + "step": 35585 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844775316104794e-05, + "loss": 0.8881, + "step": 35586 + }, + { + "epoch": 0.04, + "learning_rate": 4.984476657654392e-05, + "loss": 1.0075, + "step": 35587 + }, + { + "epoch": 0.04, + "learning_rate": 4.98447578367378e-05, + "loss": 0.8136, + "step": 35588 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844749096686414e-05, + "loss": 0.5092, + "step": 35589 + }, + { + "epoch": 0.04, + "learning_rate": 4.984474035638977e-05, + "loss": 0.2087, + "step": 35590 + }, + { + "epoch": 0.04, + "learning_rate": 4.984473161584787e-05, + "loss": 0.3954, + "step": 35591 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844722875060726e-05, + "loss": 0.9006, + "step": 35592 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844714134028306e-05, + "loss": 0.9634, + "step": 35593 + }, + { + "epoch": 0.04, + "learning_rate": 4.984470539275064e-05, + "loss": 1.1577, + "step": 35594 + }, + { + "epoch": 0.04, + "learning_rate": 4.984469665122772e-05, + "loss": 0.9211, + "step": 35595 + }, + { + "epoch": 0.04, + "learning_rate": 4.984468790945954e-05, + "loss": 0.9429, + "step": 35596 + }, + { + "epoch": 0.04, + "learning_rate": 4.98446791674461e-05, + "loss": 1.3022, + "step": 35597 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844670425187404e-05, + "loss": 0.9114, + "step": 35598 + }, + { + "epoch": 0.04, + "learning_rate": 4.984466168268346e-05, + "loss": 0.9705, + "step": 35599 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844652939934256e-05, + "loss": 1.1915, + "step": 35600 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844644196939796e-05, + "loss": 0.7148, + "step": 35601 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844635453700075e-05, + "loss": 0.6594, + "step": 35602 + }, + { + "epoch": 0.04, + "learning_rate": 4.98446267102151e-05, + "loss": 0.5457, + "step": 35603 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844617966484874e-05, + "loss": 0.8753, + "step": 35604 + }, + { + "epoch": 0.04, + "learning_rate": 4.984460922250939e-05, + "loss": 0.9331, + "step": 35605 + }, + { + "epoch": 0.04, + "learning_rate": 4.984460047828865e-05, + "loss": 0.9237, + "step": 35606 + }, + { + "epoch": 0.04, + "learning_rate": 4.984459173382265e-05, + "loss": 0.9059, + "step": 35607 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844582989111406e-05, + "loss": 1.0297, + "step": 35608 + }, + { + "epoch": 0.04, + "learning_rate": 4.98445742441549e-05, + "loss": 1.2184, + "step": 35609 + }, + { + "epoch": 0.04, + "learning_rate": 4.984456549895313e-05, + "loss": 1.1075, + "step": 35610 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844556753506124e-05, + "loss": 0.9655, + "step": 35611 + }, + { + "epoch": 0.04, + "learning_rate": 4.984454800781385e-05, + "loss": 0.9831, + "step": 35612 + }, + { + "epoch": 0.04, + "learning_rate": 4.984453926187632e-05, + "loss": 0.7876, + "step": 35613 + }, + { + "epoch": 0.04, + "learning_rate": 4.984453051569354e-05, + "loss": 0.4685, + "step": 35614 + }, + { + "epoch": 0.04, + "learning_rate": 4.984452176926551e-05, + "loss": 0.3275, + "step": 35615 + }, + { + "epoch": 0.04, + "learning_rate": 4.984451302259222e-05, + "loss": 0.8144, + "step": 35616 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844504275673674e-05, + "loss": 0.9203, + "step": 35617 + }, + { + "epoch": 0.04, + "learning_rate": 4.984449552850987e-05, + "loss": 1.0247, + "step": 35618 + }, + { + "epoch": 0.04, + "learning_rate": 4.984448678110082e-05, + "loss": 0.6517, + "step": 35619 + }, + { + "epoch": 0.04, + "learning_rate": 4.984447803344652e-05, + "loss": 0.7768, + "step": 35620 + }, + { + "epoch": 0.04, + "learning_rate": 4.984446928554695e-05, + "loss": 1.3702, + "step": 35621 + }, + { + "epoch": 0.04, + "learning_rate": 4.984446053740214e-05, + "loss": 1.2153, + "step": 35622 + }, + { + "epoch": 0.04, + "learning_rate": 4.984445178901207e-05, + "loss": 1.3629, + "step": 35623 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844443040376744e-05, + "loss": 1.3987, + "step": 35624 + }, + { + "epoch": 0.04, + "learning_rate": 4.984443429149617e-05, + "loss": 1.1554, + "step": 35625 + }, + { + "epoch": 0.04, + "learning_rate": 4.984442554237033e-05, + "loss": 1.2544, + "step": 35626 + }, + { + "epoch": 0.04, + "learning_rate": 4.984441679299925e-05, + "loss": 0.8952, + "step": 35627 + }, + { + "epoch": 0.04, + "learning_rate": 4.984440804338291e-05, + "loss": 1.1739, + "step": 35628 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844399293521315e-05, + "loss": 0.9961, + "step": 35629 + }, + { + "epoch": 0.04, + "learning_rate": 4.984439054341447e-05, + "loss": 0.6917, + "step": 35630 + }, + { + "epoch": 0.04, + "learning_rate": 4.984438179306237e-05, + "loss": 0.97, + "step": 35631 + }, + { + "epoch": 0.04, + "learning_rate": 4.984437304246502e-05, + "loss": 0.9295, + "step": 35632 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844364291622414e-05, + "loss": 0.768, + "step": 35633 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844355540534547e-05, + "loss": 1.0434, + "step": 35634 + }, + { + "epoch": 0.04, + "learning_rate": 4.984434678920144e-05, + "loss": 1.0461, + "step": 35635 + }, + { + "epoch": 0.04, + "learning_rate": 4.984433803762307e-05, + "loss": 1.0325, + "step": 35636 + }, + { + "epoch": 0.04, + "learning_rate": 4.984432928579946e-05, + "loss": 1.0239, + "step": 35637 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844320533730586e-05, + "loss": 0.7943, + "step": 35638 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844311781416466e-05, + "loss": 1.0785, + "step": 35639 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844303028857086e-05, + "loss": 0.8401, + "step": 35640 + }, + { + "epoch": 0.04, + "learning_rate": 4.984429427605246e-05, + "loss": 1.0434, + "step": 35641 + }, + { + "epoch": 0.04, + "learning_rate": 4.984428552300258e-05, + "loss": 0.9105, + "step": 35642 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844276769707446e-05, + "loss": 1.0619, + "step": 35643 + }, + { + "epoch": 0.04, + "learning_rate": 4.984426801616706e-05, + "loss": 1.3015, + "step": 35644 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844259262381426e-05, + "loss": 1.0374, + "step": 35645 + }, + { + "epoch": 0.04, + "learning_rate": 4.984425050835054e-05, + "loss": 0.5783, + "step": 35646 + }, + { + "epoch": 0.04, + "learning_rate": 4.984424175407439e-05, + "loss": 0.7916, + "step": 35647 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844232999553e-05, + "loss": 1.0366, + "step": 35648 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844224244786354e-05, + "loss": 0.985, + "step": 35649 + }, + { + "epoch": 0.04, + "learning_rate": 4.984421548977446e-05, + "loss": 1.0898, + "step": 35650 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844206734517315e-05, + "loss": 1.0672, + "step": 35651 + }, + { + "epoch": 0.04, + "learning_rate": 4.984419797901491e-05, + "loss": 0.8315, + "step": 35652 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844189223267256e-05, + "loss": 0.3422, + "step": 35653 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844180467274356e-05, + "loss": 0.1713, + "step": 35654 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844171711036204e-05, + "loss": 0.159, + "step": 35655 + }, + { + "epoch": 0.04, + "learning_rate": 4.98441629545528e-05, + "loss": 0.1263, + "step": 35656 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844154197824145e-05, + "loss": 0.3435, + "step": 35657 + }, + { + "epoch": 0.04, + "learning_rate": 4.984414544085023e-05, + "loss": 1.0127, + "step": 35658 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844136683631074e-05, + "loss": 0.3764, + "step": 35659 + }, + { + "epoch": 0.04, + "learning_rate": 4.984412792616667e-05, + "loss": 0.4668, + "step": 35660 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844119168457e-05, + "loss": 0.9383, + "step": 35661 + }, + { + "epoch": 0.04, + "learning_rate": 4.98441104105021e-05, + "loss": 0.5878, + "step": 35662 + }, + { + "epoch": 0.04, + "learning_rate": 4.984410165230193e-05, + "loss": 0.1589, + "step": 35663 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844092893856526e-05, + "loss": 0.7427, + "step": 35664 + }, + { + "epoch": 0.04, + "learning_rate": 4.984408413516586e-05, + "loss": 0.4986, + "step": 35665 + }, + { + "epoch": 0.04, + "learning_rate": 4.984407537622995e-05, + "loss": 1.1592, + "step": 35666 + }, + { + "epoch": 0.04, + "learning_rate": 4.984406661704879e-05, + "loss": 1.3337, + "step": 35667 + }, + { + "epoch": 0.04, + "learning_rate": 4.984405785762237e-05, + "loss": 0.9085, + "step": 35668 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844049097950714e-05, + "loss": 0.8542, + "step": 35669 + }, + { + "epoch": 0.04, + "learning_rate": 4.98440403380338e-05, + "loss": 0.9674, + "step": 35670 + }, + { + "epoch": 0.04, + "learning_rate": 4.984403157787164e-05, + "loss": 0.9193, + "step": 35671 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844022817464226e-05, + "loss": 0.8836, + "step": 35672 + }, + { + "epoch": 0.04, + "learning_rate": 4.984401405681156e-05, + "loss": 1.0496, + "step": 35673 + }, + { + "epoch": 0.04, + "learning_rate": 4.9844005295913657e-05, + "loss": 1.1534, + "step": 35674 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843996534770485e-05, + "loss": 0.9335, + "step": 35675 + }, + { + "epoch": 0.04, + "learning_rate": 4.984398777338208e-05, + "loss": 0.9213, + "step": 35676 + }, + { + "epoch": 0.04, + "learning_rate": 4.984397901174842e-05, + "loss": 1.1905, + "step": 35677 + }, + { + "epoch": 0.04, + "learning_rate": 4.984397024986951e-05, + "loss": 1.3698, + "step": 35678 + }, + { + "epoch": 0.04, + "learning_rate": 4.984396148774535e-05, + "loss": 1.7347, + "step": 35679 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843952725375944e-05, + "loss": 1.4196, + "step": 35680 + }, + { + "epoch": 0.04, + "learning_rate": 4.984394396276129e-05, + "loss": 1.2891, + "step": 35681 + }, + { + "epoch": 0.04, + "learning_rate": 4.984393519990138e-05, + "loss": 1.4047, + "step": 35682 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843926436796226e-05, + "loss": 1.4042, + "step": 35683 + }, + { + "epoch": 0.04, + "learning_rate": 4.984391767344582e-05, + "loss": 1.2641, + "step": 35684 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843908909850165e-05, + "loss": 1.2154, + "step": 35685 + }, + { + "epoch": 0.04, + "learning_rate": 4.984390014600927e-05, + "loss": 1.2608, + "step": 35686 + }, + { + "epoch": 0.04, + "learning_rate": 4.984389138192312e-05, + "loss": 1.3248, + "step": 35687 + }, + { + "epoch": 0.04, + "learning_rate": 4.984388261759172e-05, + "loss": 1.2879, + "step": 35688 + }, + { + "epoch": 0.04, + "learning_rate": 4.984387385301508e-05, + "loss": 1.3666, + "step": 35689 + }, + { + "epoch": 0.04, + "learning_rate": 4.984386508819318e-05, + "loss": 1.3216, + "step": 35690 + }, + { + "epoch": 0.04, + "learning_rate": 4.984385632312604e-05, + "loss": 1.2325, + "step": 35691 + }, + { + "epoch": 0.04, + "learning_rate": 4.984384755781365e-05, + "loss": 1.3281, + "step": 35692 + }, + { + "epoch": 0.04, + "learning_rate": 4.984383879225601e-05, + "loss": 1.1993, + "step": 35693 + }, + { + "epoch": 0.04, + "learning_rate": 4.984383002645312e-05, + "loss": 1.3123, + "step": 35694 + }, + { + "epoch": 0.04, + "learning_rate": 4.984382126040499e-05, + "loss": 1.3348, + "step": 35695 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843812494111606e-05, + "loss": 1.2534, + "step": 35696 + }, + { + "epoch": 0.04, + "learning_rate": 4.984380372757298e-05, + "loss": 1.242, + "step": 35697 + }, + { + "epoch": 0.04, + "learning_rate": 4.98437949607891e-05, + "loss": 1.335, + "step": 35698 + }, + { + "epoch": 0.04, + "learning_rate": 4.984378619375998e-05, + "loss": 1.078, + "step": 35699 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843777426485604e-05, + "loss": 0.9686, + "step": 35700 + }, + { + "epoch": 0.04, + "learning_rate": 4.984376865896598e-05, + "loss": 0.7852, + "step": 35701 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843759891201114e-05, + "loss": 0.9842, + "step": 35702 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843751123191006e-05, + "loss": 1.2287, + "step": 35703 + }, + { + "epoch": 0.04, + "learning_rate": 4.984374235493564e-05, + "loss": 1.1586, + "step": 35704 + }, + { + "epoch": 0.04, + "learning_rate": 4.984373358643504e-05, + "loss": 0.9486, + "step": 35705 + }, + { + "epoch": 0.04, + "learning_rate": 4.984372481768918e-05, + "loss": 1.3406, + "step": 35706 + }, + { + "epoch": 0.04, + "learning_rate": 4.984371604869808e-05, + "loss": 0.9539, + "step": 35707 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843707279461734e-05, + "loss": 1.0154, + "step": 35708 + }, + { + "epoch": 0.04, + "learning_rate": 4.984369850998013e-05, + "loss": 1.2615, + "step": 35709 + }, + { + "epoch": 0.04, + "learning_rate": 4.98436897402533e-05, + "loss": 0.9997, + "step": 35710 + }, + { + "epoch": 0.04, + "learning_rate": 4.984368097028121e-05, + "loss": 0.8818, + "step": 35711 + }, + { + "epoch": 0.04, + "learning_rate": 4.984367220006387e-05, + "loss": 0.6707, + "step": 35712 + }, + { + "epoch": 0.04, + "learning_rate": 4.98436634296013e-05, + "loss": 0.4047, + "step": 35713 + }, + { + "epoch": 0.04, + "learning_rate": 4.984365465889347e-05, + "loss": 0.42, + "step": 35714 + }, + { + "epoch": 0.04, + "learning_rate": 4.984364588794039e-05, + "loss": 1.534, + "step": 35715 + }, + { + "epoch": 0.04, + "learning_rate": 4.984363711674208e-05, + "loss": 1.4167, + "step": 35716 + }, + { + "epoch": 0.04, + "learning_rate": 4.984362834529851e-05, + "loss": 1.1574, + "step": 35717 + }, + { + "epoch": 0.04, + "learning_rate": 4.98436195736097e-05, + "loss": 0.6924, + "step": 35718 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843610801675646e-05, + "loss": 0.9367, + "step": 35719 + }, + { + "epoch": 0.04, + "learning_rate": 4.984360202949635e-05, + "loss": 0.8826, + "step": 35720 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843593257071795e-05, + "loss": 1.2747, + "step": 35721 + }, + { + "epoch": 0.04, + "learning_rate": 4.984358448440201e-05, + "loss": 1.2626, + "step": 35722 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843575711486964e-05, + "loss": 1.3765, + "step": 35723 + }, + { + "epoch": 0.04, + "learning_rate": 4.984356693832668e-05, + "loss": 1.0162, + "step": 35724 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843558164921154e-05, + "loss": 0.471, + "step": 35725 + }, + { + "epoch": 0.04, + "learning_rate": 4.984354939127038e-05, + "loss": 0.1131, + "step": 35726 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843540617374364e-05, + "loss": 0.0706, + "step": 35727 + }, + { + "epoch": 0.04, + "learning_rate": 4.984353184323309e-05, + "loss": 0.0601, + "step": 35728 + }, + { + "epoch": 0.04, + "learning_rate": 4.984352306884659e-05, + "loss": 0.057, + "step": 35729 + }, + { + "epoch": 0.04, + "learning_rate": 4.984351429421483e-05, + "loss": 0.0385, + "step": 35730 + }, + { + "epoch": 0.04, + "learning_rate": 4.984350551933784e-05, + "loss": 0.0489, + "step": 35731 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843496744215596e-05, + "loss": 0.075, + "step": 35732 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843487968848105e-05, + "loss": 0.7287, + "step": 35733 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843479193235375e-05, + "loss": 0.8929, + "step": 35734 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843470417377405e-05, + "loss": 1.1149, + "step": 35735 + }, + { + "epoch": 0.04, + "learning_rate": 4.984346164127418e-05, + "loss": 0.8072, + "step": 35736 + }, + { + "epoch": 0.04, + "learning_rate": 4.984345286492571e-05, + "loss": 1.1397, + "step": 35737 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843444088332e-05, + "loss": 0.9059, + "step": 35738 + }, + { + "epoch": 0.04, + "learning_rate": 4.984343531149305e-05, + "loss": 0.6744, + "step": 35739 + }, + { + "epoch": 0.04, + "learning_rate": 4.984342653440886e-05, + "loss": 1.006, + "step": 35740 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843417757079415e-05, + "loss": 0.8156, + "step": 35741 + }, + { + "epoch": 0.04, + "learning_rate": 4.984340897950473e-05, + "loss": 1.0036, + "step": 35742 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843400201684804e-05, + "loss": 0.6912, + "step": 35743 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843391423619635e-05, + "loss": 0.7325, + "step": 35744 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843382645309214e-05, + "loss": 1.281, + "step": 35745 + }, + { + "epoch": 0.04, + "learning_rate": 4.984337386675356e-05, + "loss": 1.089, + "step": 35746 + }, + { + "epoch": 0.04, + "learning_rate": 4.984336508795265e-05, + "loss": 1.1541, + "step": 35747 + }, + { + "epoch": 0.04, + "learning_rate": 4.984335630890651e-05, + "loss": 1.2013, + "step": 35748 + }, + { + "epoch": 0.04, + "learning_rate": 4.984334752961512e-05, + "loss": 1.1518, + "step": 35749 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843338750078495e-05, + "loss": 1.2795, + "step": 35750 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843329970296614e-05, + "loss": 1.0765, + "step": 35751 + }, + { + "epoch": 0.04, + "learning_rate": 4.98433211902695e-05, + "loss": 1.0017, + "step": 35752 + }, + { + "epoch": 0.04, + "learning_rate": 4.984331240999715e-05, + "loss": 1.2948, + "step": 35753 + }, + { + "epoch": 0.04, + "learning_rate": 4.984330362947954e-05, + "loss": 1.3663, + "step": 35754 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843294848716694e-05, + "loss": 1.2718, + "step": 35755 + }, + { + "epoch": 0.04, + "learning_rate": 4.984328606770861e-05, + "loss": 1.0776, + "step": 35756 + }, + { + "epoch": 0.04, + "learning_rate": 4.984327728645528e-05, + "loss": 1.0094, + "step": 35757 + }, + { + "epoch": 0.04, + "learning_rate": 4.984326850495671e-05, + "loss": 0.7132, + "step": 35758 + }, + { + "epoch": 0.04, + "learning_rate": 4.984325972321289e-05, + "loss": 1.1757, + "step": 35759 + }, + { + "epoch": 0.04, + "learning_rate": 4.984325094122383e-05, + "loss": 1.3481, + "step": 35760 + }, + { + "epoch": 0.04, + "learning_rate": 4.984324215898953e-05, + "loss": 2.5868, + "step": 35761 + }, + { + "epoch": 0.04, + "learning_rate": 4.984323337650999e-05, + "loss": 2.6195, + "step": 35762 + }, + { + "epoch": 0.04, + "learning_rate": 4.984322459378521e-05, + "loss": 1.415, + "step": 35763 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843215810815186e-05, + "loss": 0.9975, + "step": 35764 + }, + { + "epoch": 0.04, + "learning_rate": 4.984320702759992e-05, + "loss": 0.8945, + "step": 35765 + }, + { + "epoch": 0.04, + "learning_rate": 4.984319824413941e-05, + "loss": 0.8188, + "step": 35766 + }, + { + "epoch": 0.04, + "learning_rate": 4.984318946043366e-05, + "loss": 0.5958, + "step": 35767 + }, + { + "epoch": 0.04, + "learning_rate": 4.984318067648267e-05, + "loss": 1.0995, + "step": 35768 + }, + { + "epoch": 0.04, + "learning_rate": 4.984317189228643e-05, + "loss": 1.2327, + "step": 35769 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843163107844956e-05, + "loss": 1.0035, + "step": 35770 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843154323158246e-05, + "loss": 0.5474, + "step": 35771 + }, + { + "epoch": 0.04, + "learning_rate": 4.984314553822629e-05, + "loss": 0.9737, + "step": 35772 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843136753049094e-05, + "loss": 0.6853, + "step": 35773 + }, + { + "epoch": 0.04, + "learning_rate": 4.984312796762666e-05, + "loss": 0.8506, + "step": 35774 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843119181958977e-05, + "loss": 0.7606, + "step": 35775 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843110396046055e-05, + "loss": 0.9043, + "step": 35776 + }, + { + "epoch": 0.04, + "learning_rate": 4.984310160988789e-05, + "loss": 1.0766, + "step": 35777 + }, + { + "epoch": 0.04, + "learning_rate": 4.984309282348449e-05, + "loss": 0.9974, + "step": 35778 + }, + { + "epoch": 0.04, + "learning_rate": 4.984308403683585e-05, + "loss": 1.0074, + "step": 35779 + }, + { + "epoch": 0.04, + "learning_rate": 4.984307524994197e-05, + "loss": 1.0311, + "step": 35780 + }, + { + "epoch": 0.04, + "learning_rate": 4.984306646280285e-05, + "loss": 1.0388, + "step": 35781 + }, + { + "epoch": 0.04, + "learning_rate": 4.984305767541848e-05, + "loss": 1.0477, + "step": 35782 + }, + { + "epoch": 0.04, + "learning_rate": 4.984304888778888e-05, + "loss": 0.9412, + "step": 35783 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843040099914034e-05, + "loss": 0.7505, + "step": 35784 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843031311793955e-05, + "loss": 0.9229, + "step": 35785 + }, + { + "epoch": 0.04, + "learning_rate": 4.984302252342863e-05, + "loss": 1.1431, + "step": 35786 + }, + { + "epoch": 0.04, + "learning_rate": 4.984301373481807e-05, + "loss": 1.0372, + "step": 35787 + }, + { + "epoch": 0.04, + "learning_rate": 4.984300494596226e-05, + "loss": 0.9026, + "step": 35788 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842996156861225e-05, + "loss": 0.9564, + "step": 35789 + }, + { + "epoch": 0.04, + "learning_rate": 4.984298736751494e-05, + "loss": 1.1121, + "step": 35790 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842978577923414e-05, + "loss": 0.8662, + "step": 35791 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842969788086656e-05, + "loss": 0.645, + "step": 35792 + }, + { + "epoch": 0.04, + "learning_rate": 4.984296099800465e-05, + "loss": 0.9325, + "step": 35793 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842952207677414e-05, + "loss": 0.7765, + "step": 35794 + }, + { + "epoch": 0.04, + "learning_rate": 4.984294341710494e-05, + "loss": 1.3199, + "step": 35795 + }, + { + "epoch": 0.04, + "learning_rate": 4.984293462628722e-05, + "loss": 0.8793, + "step": 35796 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842925835224256e-05, + "loss": 0.864, + "step": 35797 + }, + { + "epoch": 0.04, + "learning_rate": 4.984291704391606e-05, + "loss": 1.0938, + "step": 35798 + }, + { + "epoch": 0.04, + "learning_rate": 4.984290825236263e-05, + "loss": 0.9801, + "step": 35799 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842899460563954e-05, + "loss": 1.2037, + "step": 35800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842890668520045e-05, + "loss": 0.9513, + "step": 35801 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842881876230896e-05, + "loss": 1.1576, + "step": 35802 + }, + { + "epoch": 0.04, + "learning_rate": 4.984287308369651e-05, + "loss": 1.0167, + "step": 35803 + }, + { + "epoch": 0.04, + "learning_rate": 4.984286429091687e-05, + "loss": 0.9971, + "step": 35804 + }, + { + "epoch": 0.04, + "learning_rate": 4.984285549789201e-05, + "loss": 0.7449, + "step": 35805 + }, + { + "epoch": 0.04, + "learning_rate": 4.98428467046219e-05, + "loss": 0.6653, + "step": 35806 + }, + { + "epoch": 0.04, + "learning_rate": 4.984283791110656e-05, + "loss": 0.8425, + "step": 35807 + }, + { + "epoch": 0.04, + "learning_rate": 4.984282911734598e-05, + "loss": 1.1481, + "step": 35808 + }, + { + "epoch": 0.04, + "learning_rate": 4.984282032334017e-05, + "loss": 0.9919, + "step": 35809 + }, + { + "epoch": 0.04, + "learning_rate": 4.984281152908911e-05, + "loss": 1.9367, + "step": 35810 + }, + { + "epoch": 0.04, + "learning_rate": 4.984280273459282e-05, + "loss": 1.664, + "step": 35811 + }, + { + "epoch": 0.04, + "learning_rate": 4.984279393985128e-05, + "loss": 1.7057, + "step": 35812 + }, + { + "epoch": 0.04, + "learning_rate": 4.984278514486451e-05, + "loss": 1.8662, + "step": 35813 + }, + { + "epoch": 0.04, + "learning_rate": 4.984277634963251e-05, + "loss": 1.7142, + "step": 35814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842767554155266e-05, + "loss": 1.4657, + "step": 35815 + }, + { + "epoch": 0.04, + "learning_rate": 4.984275875843278e-05, + "loss": 1.1538, + "step": 35816 + }, + { + "epoch": 0.04, + "learning_rate": 4.984274996246506e-05, + "loss": 0.9053, + "step": 35817 + }, + { + "epoch": 0.04, + "learning_rate": 4.984274116625211e-05, + "loss": 1.4847, + "step": 35818 + }, + { + "epoch": 0.04, + "learning_rate": 4.984273236979392e-05, + "loss": 1.0648, + "step": 35819 + }, + { + "epoch": 0.04, + "learning_rate": 4.984272357309049e-05, + "loss": 0.9422, + "step": 35820 + }, + { + "epoch": 0.04, + "learning_rate": 4.984271477614182e-05, + "loss": 1.0947, + "step": 35821 + }, + { + "epoch": 0.04, + "learning_rate": 4.984270597894792e-05, + "loss": 0.6728, + "step": 35822 + }, + { + "epoch": 0.04, + "learning_rate": 4.984269718150878e-05, + "loss": 1.0015, + "step": 35823 + }, + { + "epoch": 0.04, + "learning_rate": 4.98426883838244e-05, + "loss": 0.964, + "step": 35824 + }, + { + "epoch": 0.04, + "learning_rate": 4.984267958589479e-05, + "loss": 0.7428, + "step": 35825 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842670787719934e-05, + "loss": 1.3861, + "step": 35826 + }, + { + "epoch": 0.04, + "learning_rate": 4.984266198929985e-05, + "loss": 1.0199, + "step": 35827 + }, + { + "epoch": 0.04, + "learning_rate": 4.984265319063453e-05, + "loss": 1.107, + "step": 35828 + }, + { + "epoch": 0.04, + "learning_rate": 4.984264439172397e-05, + "loss": 1.2811, + "step": 35829 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842635592568175e-05, + "loss": 1.1253, + "step": 35830 + }, + { + "epoch": 0.04, + "learning_rate": 4.984262679316715e-05, + "loss": 1.1324, + "step": 35831 + }, + { + "epoch": 0.04, + "learning_rate": 4.984261799352088e-05, + "loss": 0.856, + "step": 35832 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842609193629374e-05, + "loss": 0.7683, + "step": 35833 + }, + { + "epoch": 0.04, + "learning_rate": 4.984260039349264e-05, + "loss": 1.1629, + "step": 35834 + }, + { + "epoch": 0.04, + "learning_rate": 4.984259159311067e-05, + "loss": 0.802, + "step": 35835 + }, + { + "epoch": 0.04, + "learning_rate": 4.984258279248345e-05, + "loss": 1.1234, + "step": 35836 + }, + { + "epoch": 0.04, + "learning_rate": 4.984257399161101e-05, + "loss": 1.4686, + "step": 35837 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842565190493326e-05, + "loss": 1.3127, + "step": 35838 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842556389130415e-05, + "loss": 1.1848, + "step": 35839 + }, + { + "epoch": 0.04, + "learning_rate": 4.984254758752226e-05, + "loss": 0.9607, + "step": 35840 + }, + { + "epoch": 0.04, + "learning_rate": 4.984253878566887e-05, + "loss": 0.9921, + "step": 35841 + }, + { + "epoch": 0.04, + "learning_rate": 4.984252998357025e-05, + "loss": 0.9933, + "step": 35842 + }, + { + "epoch": 0.04, + "learning_rate": 4.98425211812264e-05, + "loss": 0.985, + "step": 35843 + }, + { + "epoch": 0.04, + "learning_rate": 4.984251237863731e-05, + "loss": 0.7496, + "step": 35844 + }, + { + "epoch": 0.04, + "learning_rate": 4.984250357580298e-05, + "loss": 1.0202, + "step": 35845 + }, + { + "epoch": 0.04, + "learning_rate": 4.984249477272342e-05, + "loss": 0.8187, + "step": 35846 + }, + { + "epoch": 0.04, + "learning_rate": 4.984248596939862e-05, + "loss": 1.3175, + "step": 35847 + }, + { + "epoch": 0.04, + "learning_rate": 4.984247716582859e-05, + "loss": 0.9956, + "step": 35848 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842468362013325e-05, + "loss": 0.9331, + "step": 35849 + }, + { + "epoch": 0.04, + "learning_rate": 4.984245955795283e-05, + "loss": 1.2494, + "step": 35850 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842450753647096e-05, + "loss": 1.108, + "step": 35851 + }, + { + "epoch": 0.04, + "learning_rate": 4.984244194909613e-05, + "loss": 1.3438, + "step": 35852 + }, + { + "epoch": 0.04, + "learning_rate": 4.984243314429993e-05, + "loss": 1.1418, + "step": 35853 + }, + { + "epoch": 0.04, + "learning_rate": 4.984242433925849e-05, + "loss": 1.0287, + "step": 35854 + }, + { + "epoch": 0.04, + "learning_rate": 4.984241553397182e-05, + "loss": 1.4552, + "step": 35855 + }, + { + "epoch": 0.04, + "learning_rate": 4.984240672843992e-05, + "loss": 0.7341, + "step": 35856 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842397922662785e-05, + "loss": 1.0553, + "step": 35857 + }, + { + "epoch": 0.04, + "learning_rate": 4.984238911664041e-05, + "loss": 0.9907, + "step": 35858 + }, + { + "epoch": 0.04, + "learning_rate": 4.98423803103728e-05, + "loss": 1.0518, + "step": 35859 + }, + { + "epoch": 0.04, + "learning_rate": 4.984237150385997e-05, + "loss": 0.9776, + "step": 35860 + }, + { + "epoch": 0.04, + "learning_rate": 4.984236269710189e-05, + "loss": 1.4611, + "step": 35861 + }, + { + "epoch": 0.04, + "learning_rate": 4.984235389009859e-05, + "loss": 1.0435, + "step": 35862 + }, + { + "epoch": 0.04, + "learning_rate": 4.984234508285005e-05, + "loss": 0.9104, + "step": 35863 + }, + { + "epoch": 0.04, + "learning_rate": 4.984233627535628e-05, + "loss": 0.4088, + "step": 35864 + }, + { + "epoch": 0.04, + "learning_rate": 4.984232746761728e-05, + "loss": 0.2331, + "step": 35865 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842318659633045e-05, + "loss": 0.738, + "step": 35866 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842309851403576e-05, + "loss": 1.4269, + "step": 35867 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842301042928875e-05, + "loss": 1.4492, + "step": 35868 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842292234208933e-05, + "loss": 1.4628, + "step": 35869 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842283425243766e-05, + "loss": 1.4231, + "step": 35870 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842274616033365e-05, + "loss": 1.3268, + "step": 35871 + }, + { + "epoch": 0.04, + "learning_rate": 4.984226580657774e-05, + "loss": 1.3977, + "step": 35872 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842256996876865e-05, + "loss": 1.1331, + "step": 35873 + }, + { + "epoch": 0.04, + "learning_rate": 4.984224818693077e-05, + "loss": 0.874, + "step": 35874 + }, + { + "epoch": 0.04, + "learning_rate": 4.984223937673944e-05, + "loss": 1.319, + "step": 35875 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842230566302875e-05, + "loss": 1.0888, + "step": 35876 + }, + { + "epoch": 0.04, + "learning_rate": 4.984222175562108e-05, + "loss": 0.9834, + "step": 35877 + }, + { + "epoch": 0.04, + "learning_rate": 4.984221294469406e-05, + "loss": 0.8755, + "step": 35878 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842204133521794e-05, + "loss": 0.9303, + "step": 35879 + }, + { + "epoch": 0.04, + "learning_rate": 4.984219532210431e-05, + "loss": 1.0206, + "step": 35880 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842186510441594e-05, + "loss": 0.873, + "step": 35881 + }, + { + "epoch": 0.04, + "learning_rate": 4.984217769853363e-05, + "loss": 1.0413, + "step": 35882 + }, + { + "epoch": 0.04, + "learning_rate": 4.984216888638045e-05, + "loss": 1.5246, + "step": 35883 + }, + { + "epoch": 0.04, + "learning_rate": 4.984216007398204e-05, + "loss": 0.8918, + "step": 35884 + }, + { + "epoch": 0.04, + "learning_rate": 4.984215126133839e-05, + "loss": 0.9504, + "step": 35885 + }, + { + "epoch": 0.04, + "learning_rate": 4.984214244844951e-05, + "loss": 0.1445, + "step": 35886 + }, + { + "epoch": 0.04, + "learning_rate": 4.98421336353154e-05, + "loss": 0.1599, + "step": 35887 + }, + { + "epoch": 0.04, + "learning_rate": 4.984212482193605e-05, + "loss": 0.1363, + "step": 35888 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842116008311486e-05, + "loss": 0.1803, + "step": 35889 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842107194441686e-05, + "loss": 0.2928, + "step": 35890 + }, + { + "epoch": 0.04, + "learning_rate": 4.984209838032665e-05, + "loss": 0.1829, + "step": 35891 + }, + { + "epoch": 0.04, + "learning_rate": 4.984208956596639e-05, + "loss": 0.1328, + "step": 35892 + }, + { + "epoch": 0.04, + "learning_rate": 4.984208075136089e-05, + "loss": 0.2917, + "step": 35893 + }, + { + "epoch": 0.04, + "learning_rate": 4.984207193651016e-05, + "loss": 1.0335, + "step": 35894 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842063121414205e-05, + "loss": 0.9299, + "step": 35895 + }, + { + "epoch": 0.04, + "learning_rate": 4.984205430607302e-05, + "loss": 0.9394, + "step": 35896 + }, + { + "epoch": 0.04, + "learning_rate": 4.98420454904866e-05, + "loss": 0.9896, + "step": 35897 + }, + { + "epoch": 0.04, + "learning_rate": 4.984203667465496e-05, + "loss": 1.0243, + "step": 35898 + }, + { + "epoch": 0.04, + "learning_rate": 4.984202785857808e-05, + "loss": 1.1984, + "step": 35899 + }, + { + "epoch": 0.04, + "learning_rate": 4.9842019042255973e-05, + "loss": 0.9209, + "step": 35900 + }, + { + "epoch": 0.04, + "learning_rate": 4.984201022568864e-05, + "loss": 0.9694, + "step": 35901 + }, + { + "epoch": 0.04, + "learning_rate": 4.984200140887607e-05, + "loss": 0.9917, + "step": 35902 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841992591818274e-05, + "loss": 0.6432, + "step": 35903 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841983774515246e-05, + "loss": 1.0637, + "step": 35904 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841974956967e-05, + "loss": 1.0296, + "step": 35905 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841966139173505e-05, + "loss": 1.1093, + "step": 35906 + }, + { + "epoch": 0.04, + "learning_rate": 4.984195732113479e-05, + "loss": 1.0239, + "step": 35907 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841948502850846e-05, + "loss": 1.014, + "step": 35908 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841939684321673e-05, + "loss": 1.0401, + "step": 35909 + }, + { + "epoch": 0.04, + "learning_rate": 4.984193086554727e-05, + "loss": 1.068, + "step": 35910 + }, + { + "epoch": 0.04, + "learning_rate": 4.984192204652764e-05, + "loss": 0.8777, + "step": 35911 + }, + { + "epoch": 0.04, + "learning_rate": 4.984191322726278e-05, + "loss": 1.1457, + "step": 35912 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841904407752695e-05, + "loss": 0.9329, + "step": 35913 + }, + { + "epoch": 0.04, + "learning_rate": 4.984189558799737e-05, + "loss": 0.8354, + "step": 35914 + }, + { + "epoch": 0.04, + "learning_rate": 4.984188676799683e-05, + "loss": 0.748, + "step": 35915 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841877947751045e-05, + "loss": 1.0405, + "step": 35916 + }, + { + "epoch": 0.04, + "learning_rate": 4.984186912726004e-05, + "loss": 1.3368, + "step": 35917 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841860306523814e-05, + "loss": 0.7639, + "step": 35918 + }, + { + "epoch": 0.04, + "learning_rate": 4.984185148554236e-05, + "loss": 0.8809, + "step": 35919 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841842664315665e-05, + "loss": 0.855, + "step": 35920 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841833842843744e-05, + "loss": 0.6448, + "step": 35921 + }, + { + "epoch": 0.04, + "learning_rate": 4.98418250211266e-05, + "loss": 0.6011, + "step": 35922 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841816199164224e-05, + "loss": 0.4623, + "step": 35923 + }, + { + "epoch": 0.04, + "learning_rate": 4.984180737695663e-05, + "loss": 0.7507, + "step": 35924 + }, + { + "epoch": 0.04, + "learning_rate": 4.984179855450379e-05, + "loss": 0.6548, + "step": 35925 + }, + { + "epoch": 0.04, + "learning_rate": 4.984178973180574e-05, + "loss": 0.7536, + "step": 35926 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841780908862456e-05, + "loss": 0.9634, + "step": 35927 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841772085673945e-05, + "loss": 1.1703, + "step": 35928 + }, + { + "epoch": 0.04, + "learning_rate": 4.98417632622402e-05, + "loss": 0.976, + "step": 35929 + }, + { + "epoch": 0.04, + "learning_rate": 4.984175443856123e-05, + "loss": 1.076, + "step": 35930 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841745614637035e-05, + "loss": 1.0893, + "step": 35931 + }, + { + "epoch": 0.04, + "learning_rate": 4.984173679046762e-05, + "loss": 1.0244, + "step": 35932 + }, + { + "epoch": 0.04, + "learning_rate": 4.984172796605296e-05, + "loss": 0.9607, + "step": 35933 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841719141393095e-05, + "loss": 0.9838, + "step": 35934 + }, + { + "epoch": 0.04, + "learning_rate": 4.984171031648799e-05, + "loss": 0.8608, + "step": 35935 + }, + { + "epoch": 0.04, + "learning_rate": 4.984170149133765e-05, + "loss": 1.1162, + "step": 35936 + }, + { + "epoch": 0.04, + "learning_rate": 4.98416926659421e-05, + "loss": 1.2094, + "step": 35937 + }, + { + "epoch": 0.04, + "learning_rate": 4.984168384030132e-05, + "loss": 1.5595, + "step": 35938 + }, + { + "epoch": 0.04, + "learning_rate": 4.984167501441531e-05, + "loss": 1.677, + "step": 35939 + }, + { + "epoch": 0.04, + "learning_rate": 4.984166618828407e-05, + "loss": 0.8705, + "step": 35940 + }, + { + "epoch": 0.04, + "learning_rate": 4.98416573619076e-05, + "loss": 1.1032, + "step": 35941 + }, + { + "epoch": 0.04, + "learning_rate": 4.984164853528592e-05, + "loss": 0.9135, + "step": 35942 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841639708419e-05, + "loss": 1.0587, + "step": 35943 + }, + { + "epoch": 0.04, + "learning_rate": 4.984163088130686e-05, + "loss": 1.2486, + "step": 35944 + }, + { + "epoch": 0.04, + "learning_rate": 4.984162205394949e-05, + "loss": 0.7723, + "step": 35945 + }, + { + "epoch": 0.04, + "learning_rate": 4.98416132263469e-05, + "loss": 1.0054, + "step": 35946 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841604398499076e-05, + "loss": 0.5999, + "step": 35947 + }, + { + "epoch": 0.04, + "learning_rate": 4.984159557040603e-05, + "loss": 0.7698, + "step": 35948 + }, + { + "epoch": 0.04, + "learning_rate": 4.984158674206776e-05, + "loss": 0.7885, + "step": 35949 + }, + { + "epoch": 0.04, + "learning_rate": 4.984157791348426e-05, + "loss": 0.9145, + "step": 35950 + }, + { + "epoch": 0.04, + "learning_rate": 4.984156908465554e-05, + "loss": 1.3103, + "step": 35951 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841560255581585e-05, + "loss": 1.1283, + "step": 35952 + }, + { + "epoch": 0.04, + "learning_rate": 4.984155142626241e-05, + "loss": 0.934, + "step": 35953 + }, + { + "epoch": 0.04, + "learning_rate": 4.984154259669801e-05, + "loss": 0.9433, + "step": 35954 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841533766888395e-05, + "loss": 1.128, + "step": 35955 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841524936833537e-05, + "loss": 1.1882, + "step": 35956 + }, + { + "epoch": 0.04, + "learning_rate": 4.984151610653347e-05, + "loss": 0.8243, + "step": 35957 + }, + { + "epoch": 0.04, + "learning_rate": 4.984150727598816e-05, + "loss": 1.0738, + "step": 35958 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841498445197646e-05, + "loss": 0.9336, + "step": 35959 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841489614161896e-05, + "loss": 1.2405, + "step": 35960 + }, + { + "epoch": 0.04, + "learning_rate": 4.984148078288092e-05, + "loss": 1.0776, + "step": 35961 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841471951354726e-05, + "loss": 1.0468, + "step": 35962 + }, + { + "epoch": 0.04, + "learning_rate": 4.98414631195833e-05, + "loss": 0.8313, + "step": 35963 + }, + { + "epoch": 0.04, + "learning_rate": 4.984145428756665e-05, + "loss": 1.0999, + "step": 35964 + }, + { + "epoch": 0.04, + "learning_rate": 4.984144545530478e-05, + "loss": 0.847, + "step": 35965 + }, + { + "epoch": 0.04, + "learning_rate": 4.984143662279768e-05, + "loss": 0.6584, + "step": 35966 + }, + { + "epoch": 0.04, + "learning_rate": 4.984142779004536e-05, + "loss": 1.0656, + "step": 35967 + }, + { + "epoch": 0.04, + "learning_rate": 4.984141895704781e-05, + "loss": 1.7565, + "step": 35968 + }, + { + "epoch": 0.04, + "learning_rate": 4.984141012380504e-05, + "loss": 1.947, + "step": 35969 + }, + { + "epoch": 0.04, + "learning_rate": 4.984140129031705e-05, + "loss": 2.0046, + "step": 35970 + }, + { + "epoch": 0.04, + "learning_rate": 4.984139245658384e-05, + "loss": 1.8425, + "step": 35971 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841383622605395e-05, + "loss": 1.564, + "step": 35972 + }, + { + "epoch": 0.04, + "learning_rate": 4.984137478838173e-05, + "loss": 0.5267, + "step": 35973 + }, + { + "epoch": 0.04, + "learning_rate": 4.984136595391284e-05, + "loss": 0.6167, + "step": 35974 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841357119198734e-05, + "loss": 1.3876, + "step": 35975 + }, + { + "epoch": 0.04, + "learning_rate": 4.98413482842394e-05, + "loss": 1.4475, + "step": 35976 + }, + { + "epoch": 0.04, + "learning_rate": 4.984133944903484e-05, + "loss": 0.7751, + "step": 35977 + }, + { + "epoch": 0.04, + "learning_rate": 4.984133061358506e-05, + "loss": 0.9983, + "step": 35978 + }, + { + "epoch": 0.04, + "learning_rate": 4.984132177789006e-05, + "loss": 0.9912, + "step": 35979 + }, + { + "epoch": 0.04, + "learning_rate": 4.984131294194983e-05, + "loss": 1.0256, + "step": 35980 + }, + { + "epoch": 0.04, + "learning_rate": 4.984130410576438e-05, + "loss": 1.1346, + "step": 35981 + }, + { + "epoch": 0.04, + "learning_rate": 4.984129526933371e-05, + "loss": 0.5768, + "step": 35982 + }, + { + "epoch": 0.04, + "learning_rate": 4.984128643265781e-05, + "loss": 0.5961, + "step": 35983 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841277595736694e-05, + "loss": 0.412, + "step": 35984 + }, + { + "epoch": 0.04, + "learning_rate": 4.984126875857035e-05, + "loss": 0.4683, + "step": 35985 + }, + { + "epoch": 0.04, + "learning_rate": 4.984125992115879e-05, + "loss": 0.313, + "step": 35986 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841251083502e-05, + "loss": 0.3498, + "step": 35987 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841242245599996e-05, + "loss": 0.3361, + "step": 35988 + }, + { + "epoch": 0.04, + "learning_rate": 4.984123340745276e-05, + "loss": 0.4745, + "step": 35989 + }, + { + "epoch": 0.04, + "learning_rate": 4.984122456906031e-05, + "loss": 0.4374, + "step": 35990 + }, + { + "epoch": 0.04, + "learning_rate": 4.984121573042263e-05, + "loss": 0.2949, + "step": 35991 + }, + { + "epoch": 0.04, + "learning_rate": 4.984120689153974e-05, + "loss": 0.5101, + "step": 35992 + }, + { + "epoch": 0.04, + "learning_rate": 4.984119805241162e-05, + "loss": 1.1441, + "step": 35993 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841189213038276e-05, + "loss": 1.0594, + "step": 35994 + }, + { + "epoch": 0.04, + "learning_rate": 4.984118037341972e-05, + "loss": 1.1236, + "step": 35995 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841171533555934e-05, + "loss": 0.6291, + "step": 35996 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841162693446916e-05, + "loss": 0.5097, + "step": 35997 + }, + { + "epoch": 0.04, + "learning_rate": 4.984115385309269e-05, + "loss": 0.6067, + "step": 35998 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841145012493244e-05, + "loss": 0.5333, + "step": 35999 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841136171648575e-05, + "loss": 0.6414, + "step": 36000 + }, + { + "epoch": 0.04, + "eval_loss": 1.0994189977645874, + "eval_runtime": 127.0969, + "eval_samples_per_second": 10.897, + "eval_steps_per_second": 5.453, + "step": 36000 + }, + { + "epoch": 0.04, + "learning_rate": 4.984112733055869e-05, + "loss": 0.6357, + "step": 36001 + }, + { + "epoch": 0.04, + "learning_rate": 4.984111848922357e-05, + "loss": 0.9692, + "step": 36002 + }, + { + "epoch": 0.04, + "learning_rate": 4.984110964764324e-05, + "loss": 1.0329, + "step": 36003 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841100805817686e-05, + "loss": 0.863, + "step": 36004 + }, + { + "epoch": 0.04, + "learning_rate": 4.984109196374691e-05, + "loss": 1.1844, + "step": 36005 + }, + { + "epoch": 0.04, + "learning_rate": 4.984108312143091e-05, + "loss": 1.0277, + "step": 36006 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841074278869696e-05, + "loss": 1.3178, + "step": 36007 + }, + { + "epoch": 0.04, + "learning_rate": 4.984106543606326e-05, + "loss": 1.1538, + "step": 36008 + }, + { + "epoch": 0.04, + "learning_rate": 4.98410565930116e-05, + "loss": 0.6218, + "step": 36009 + }, + { + "epoch": 0.04, + "learning_rate": 4.984104774971472e-05, + "loss": 0.9653, + "step": 36010 + }, + { + "epoch": 0.04, + "learning_rate": 4.984103890617262e-05, + "loss": 1.2337, + "step": 36011 + }, + { + "epoch": 0.04, + "learning_rate": 4.98410300623853e-05, + "loss": 0.9217, + "step": 36012 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841021218352765e-05, + "loss": 1.234, + "step": 36013 + }, + { + "epoch": 0.04, + "learning_rate": 4.9841012374075003e-05, + "loss": 0.7716, + "step": 36014 + }, + { + "epoch": 0.04, + "learning_rate": 4.984100352955202e-05, + "loss": 0.9316, + "step": 36015 + }, + { + "epoch": 0.04, + "learning_rate": 4.984099468478382e-05, + "loss": 0.8498, + "step": 36016 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840985839770404e-05, + "loss": 0.7091, + "step": 36017 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840976994511765e-05, + "loss": 1.1869, + "step": 36018 + }, + { + "epoch": 0.04, + "learning_rate": 4.984096814900791e-05, + "loss": 0.8455, + "step": 36019 + }, + { + "epoch": 0.04, + "learning_rate": 4.984095930325883e-05, + "loss": 0.898, + "step": 36020 + }, + { + "epoch": 0.04, + "learning_rate": 4.984095045726453e-05, + "loss": 0.9959, + "step": 36021 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840941611025016e-05, + "loss": 0.9665, + "step": 36022 + }, + { + "epoch": 0.04, + "learning_rate": 4.984093276454028e-05, + "loss": 0.8556, + "step": 36023 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840923917810324e-05, + "loss": 1.0557, + "step": 36024 + }, + { + "epoch": 0.04, + "learning_rate": 4.984091507083515e-05, + "loss": 1.124, + "step": 36025 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840906223614756e-05, + "loss": 1.0641, + "step": 36026 + }, + { + "epoch": 0.04, + "learning_rate": 4.984089737614914e-05, + "loss": 0.8255, + "step": 36027 + }, + { + "epoch": 0.04, + "learning_rate": 4.984088852843831e-05, + "loss": 0.9285, + "step": 36028 + }, + { + "epoch": 0.04, + "learning_rate": 4.984087968048226e-05, + "loss": 0.9995, + "step": 36029 + }, + { + "epoch": 0.04, + "learning_rate": 4.984087083228099e-05, + "loss": 0.8917, + "step": 36030 + }, + { + "epoch": 0.04, + "learning_rate": 4.984086198383451e-05, + "loss": 0.6413, + "step": 36031 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840853135142796e-05, + "loss": 0.49, + "step": 36032 + }, + { + "epoch": 0.04, + "learning_rate": 4.984084428620587e-05, + "loss": 0.5568, + "step": 36033 + }, + { + "epoch": 0.04, + "learning_rate": 4.984083543702373e-05, + "loss": 0.9611, + "step": 36034 + }, + { + "epoch": 0.04, + "learning_rate": 4.984082658759637e-05, + "loss": 1.3466, + "step": 36035 + }, + { + "epoch": 0.04, + "learning_rate": 4.984081773792379e-05, + "loss": 1.2394, + "step": 36036 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840808888006e-05, + "loss": 0.9979, + "step": 36037 + }, + { + "epoch": 0.04, + "learning_rate": 4.984080003784298e-05, + "loss": 0.7268, + "step": 36038 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840791187434745e-05, + "loss": 0.4704, + "step": 36039 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840782336781285e-05, + "loss": 0.768, + "step": 36040 + }, + { + "epoch": 0.04, + "learning_rate": 4.984077348588263e-05, + "loss": 1.8639, + "step": 36041 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840764634738736e-05, + "loss": 1.4552, + "step": 36042 + }, + { + "epoch": 0.04, + "learning_rate": 4.984075578334964e-05, + "loss": 1.4004, + "step": 36043 + }, + { + "epoch": 0.04, + "learning_rate": 4.984074693171531e-05, + "loss": 0.6935, + "step": 36044 + }, + { + "epoch": 0.04, + "learning_rate": 4.984073807983577e-05, + "loss": 0.9069, + "step": 36045 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840729227711016e-05, + "loss": 1.0758, + "step": 36046 + }, + { + "epoch": 0.04, + "learning_rate": 4.984072037534104e-05, + "loss": 0.738, + "step": 36047 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840711522725853e-05, + "loss": 1.1073, + "step": 36048 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840702669865446e-05, + "loss": 1.2305, + "step": 36049 + }, + { + "epoch": 0.04, + "learning_rate": 4.984069381675982e-05, + "loss": 1.3163, + "step": 36050 + }, + { + "epoch": 0.04, + "learning_rate": 4.984068496340898e-05, + "loss": 1.3099, + "step": 36051 + }, + { + "epoch": 0.04, + "learning_rate": 4.984067610981292e-05, + "loss": 0.8676, + "step": 36052 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840667255971644e-05, + "loss": 1.2094, + "step": 36053 + }, + { + "epoch": 0.04, + "learning_rate": 4.984065840188515e-05, + "loss": 0.8381, + "step": 36054 + }, + { + "epoch": 0.04, + "learning_rate": 4.984064954755344e-05, + "loss": 0.3978, + "step": 36055 + }, + { + "epoch": 0.04, + "learning_rate": 4.984064069297652e-05, + "loss": 0.2875, + "step": 36056 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840631838154385e-05, + "loss": 0.7138, + "step": 36057 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840622983087024e-05, + "loss": 0.4342, + "step": 36058 + }, + { + "epoch": 0.04, + "learning_rate": 4.984061412777445e-05, + "loss": 0.1264, + "step": 36059 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840605272216656e-05, + "loss": 0.903, + "step": 36060 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840596416413656e-05, + "loss": 1.0193, + "step": 36061 + }, + { + "epoch": 0.04, + "learning_rate": 4.984058756036544e-05, + "loss": 0.9833, + "step": 36062 + }, + { + "epoch": 0.04, + "learning_rate": 4.984057870407199e-05, + "loss": 1.0391, + "step": 36063 + }, + { + "epoch": 0.04, + "learning_rate": 4.984056984753335e-05, + "loss": 1.2619, + "step": 36064 + }, + { + "epoch": 0.04, + "learning_rate": 4.984056099074948e-05, + "loss": 0.7886, + "step": 36065 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840552133720394e-05, + "loss": 0.9808, + "step": 36066 + }, + { + "epoch": 0.04, + "learning_rate": 4.984054327644609e-05, + "loss": 0.4059, + "step": 36067 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840534418926576e-05, + "loss": 0.8065, + "step": 36068 + }, + { + "epoch": 0.04, + "learning_rate": 4.984052556116184e-05, + "loss": 1.2725, + "step": 36069 + }, + { + "epoch": 0.04, + "learning_rate": 4.98405167031519e-05, + "loss": 1.1076, + "step": 36070 + }, + { + "epoch": 0.04, + "learning_rate": 4.984050784489674e-05, + "loss": 1.0758, + "step": 36071 + }, + { + "epoch": 0.04, + "learning_rate": 4.984049898639637e-05, + "loss": 0.8058, + "step": 36072 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840490127650776e-05, + "loss": 0.9706, + "step": 36073 + }, + { + "epoch": 0.04, + "learning_rate": 4.984048126865997e-05, + "loss": 0.8857, + "step": 36074 + }, + { + "epoch": 0.04, + "learning_rate": 4.984047240942395e-05, + "loss": 1.132, + "step": 36075 + }, + { + "epoch": 0.04, + "learning_rate": 4.984046354994272e-05, + "loss": 1.2272, + "step": 36076 + }, + { + "epoch": 0.04, + "learning_rate": 4.984045469021627e-05, + "loss": 0.9162, + "step": 36077 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840445830244606e-05, + "loss": 0.9859, + "step": 36078 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840436970027725e-05, + "loss": 1.0873, + "step": 36079 + }, + { + "epoch": 0.04, + "learning_rate": 4.984042810956563e-05, + "loss": 1.2033, + "step": 36080 + }, + { + "epoch": 0.04, + "learning_rate": 4.984041924885833e-05, + "loss": 0.9602, + "step": 36081 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840410387905805e-05, + "loss": 1.0285, + "step": 36082 + }, + { + "epoch": 0.04, + "learning_rate": 4.984040152670807e-05, + "loss": 0.9018, + "step": 36083 + }, + { + "epoch": 0.04, + "learning_rate": 4.984039266526512e-05, + "loss": 0.9401, + "step": 36084 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840383803576964e-05, + "loss": 1.018, + "step": 36085 + }, + { + "epoch": 0.04, + "learning_rate": 4.984037494164358e-05, + "loss": 0.8266, + "step": 36086 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840366079465e-05, + "loss": 0.8032, + "step": 36087 + }, + { + "epoch": 0.04, + "learning_rate": 4.984035721704119e-05, + "loss": 0.7684, + "step": 36088 + }, + { + "epoch": 0.04, + "learning_rate": 4.984034835437217e-05, + "loss": 0.9346, + "step": 36089 + }, + { + "epoch": 0.04, + "learning_rate": 4.984033949145794e-05, + "loss": 0.903, + "step": 36090 + }, + { + "epoch": 0.04, + "learning_rate": 4.984033062829849e-05, + "loss": 0.9084, + "step": 36091 + }, + { + "epoch": 0.04, + "learning_rate": 4.984032176489384e-05, + "loss": 0.8013, + "step": 36092 + }, + { + "epoch": 0.04, + "learning_rate": 4.984031290124397e-05, + "loss": 1.2265, + "step": 36093 + }, + { + "epoch": 0.04, + "learning_rate": 4.984030403734888e-05, + "loss": 0.874, + "step": 36094 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840295173208586e-05, + "loss": 1.26, + "step": 36095 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840286308823076e-05, + "loss": 0.7702, + "step": 36096 + }, + { + "epoch": 0.04, + "learning_rate": 4.984027744419235e-05, + "loss": 0.4496, + "step": 36097 + }, + { + "epoch": 0.04, + "learning_rate": 4.984026857931642e-05, + "loss": 0.9595, + "step": 36098 + }, + { + "epoch": 0.04, + "learning_rate": 4.984025971419527e-05, + "loss": 1.0887, + "step": 36099 + }, + { + "epoch": 0.04, + "learning_rate": 4.984025084882891e-05, + "loss": 0.8328, + "step": 36100 + }, + { + "epoch": 0.04, + "learning_rate": 4.984024198321734e-05, + "loss": 0.5845, + "step": 36101 + }, + { + "epoch": 0.04, + "learning_rate": 4.984023311736055e-05, + "loss": 1.1231, + "step": 36102 + }, + { + "epoch": 0.04, + "learning_rate": 4.984022425125855e-05, + "loss": 1.039, + "step": 36103 + }, + { + "epoch": 0.04, + "learning_rate": 4.984021538491134e-05, + "loss": 1.3474, + "step": 36104 + }, + { + "epoch": 0.04, + "learning_rate": 4.984020651831892e-05, + "loss": 0.9911, + "step": 36105 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840197651481286e-05, + "loss": 1.247, + "step": 36106 + }, + { + "epoch": 0.04, + "learning_rate": 4.984018878439844e-05, + "loss": 1.1287, + "step": 36107 + }, + { + "epoch": 0.04, + "learning_rate": 4.984017991707038e-05, + "loss": 1.1348, + "step": 36108 + }, + { + "epoch": 0.04, + "learning_rate": 4.984017104949711e-05, + "loss": 1.0161, + "step": 36109 + }, + { + "epoch": 0.04, + "learning_rate": 4.984016218167863e-05, + "loss": 1.1611, + "step": 36110 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840153313614934e-05, + "loss": 0.9976, + "step": 36111 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840144445306035e-05, + "loss": 1.1252, + "step": 36112 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840135576751916e-05, + "loss": 1.1123, + "step": 36113 + }, + { + "epoch": 0.04, + "learning_rate": 4.984012670795259e-05, + "loss": 1.0918, + "step": 36114 + }, + { + "epoch": 0.04, + "learning_rate": 4.984011783890805e-05, + "loss": 0.9481, + "step": 36115 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840108969618306e-05, + "loss": 1.0242, + "step": 36116 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840100100083344e-05, + "loss": 1.0893, + "step": 36117 + }, + { + "epoch": 0.04, + "learning_rate": 4.984009123030316e-05, + "loss": 1.0855, + "step": 36118 + }, + { + "epoch": 0.04, + "learning_rate": 4.984008236027778e-05, + "loss": 0.8959, + "step": 36119 + }, + { + "epoch": 0.04, + "learning_rate": 4.984007349000719e-05, + "loss": 0.8306, + "step": 36120 + }, + { + "epoch": 0.04, + "learning_rate": 4.984006461949139e-05, + "loss": 0.7968, + "step": 36121 + }, + { + "epoch": 0.04, + "learning_rate": 4.984005574873038e-05, + "loss": 0.8668, + "step": 36122 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840046877724146e-05, + "loss": 0.6239, + "step": 36123 + }, + { + "epoch": 0.04, + "learning_rate": 4.984003800647271e-05, + "loss": 0.7585, + "step": 36124 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840029134976065e-05, + "loss": 1.0639, + "step": 36125 + }, + { + "epoch": 0.04, + "learning_rate": 4.98400202632342e-05, + "loss": 1.1409, + "step": 36126 + }, + { + "epoch": 0.04, + "learning_rate": 4.9840011391247135e-05, + "loss": 0.9677, + "step": 36127 + }, + { + "epoch": 0.04, + "learning_rate": 4.984000251901486e-05, + "loss": 1.9685, + "step": 36128 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839993646537374e-05, + "loss": 1.2319, + "step": 36129 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839984773814674e-05, + "loss": 0.4266, + "step": 36130 + }, + { + "epoch": 0.04, + "learning_rate": 4.983997590084677e-05, + "loss": 0.3192, + "step": 36131 + }, + { + "epoch": 0.04, + "learning_rate": 4.983996702763365e-05, + "loss": 0.2244, + "step": 36132 + }, + { + "epoch": 0.04, + "learning_rate": 4.983995815417533e-05, + "loss": 0.1375, + "step": 36133 + }, + { + "epoch": 0.04, + "learning_rate": 4.983994928047179e-05, + "loss": 0.1324, + "step": 36134 + }, + { + "epoch": 0.04, + "learning_rate": 4.983994040652305e-05, + "loss": 0.5312, + "step": 36135 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839931532329086e-05, + "loss": 0.2103, + "step": 36136 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839922657889925e-05, + "loss": 0.268, + "step": 36137 + }, + { + "epoch": 0.04, + "learning_rate": 4.983991378320555e-05, + "loss": 0.1435, + "step": 36138 + }, + { + "epoch": 0.04, + "learning_rate": 4.983990490827597e-05, + "loss": 0.2541, + "step": 36139 + }, + { + "epoch": 0.04, + "learning_rate": 4.983989603310117e-05, + "loss": 0.2068, + "step": 36140 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839887157681175e-05, + "loss": 0.1039, + "step": 36141 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839878282015964e-05, + "loss": 0.2614, + "step": 36142 + }, + { + "epoch": 0.04, + "learning_rate": 4.983986940610554e-05, + "loss": 0.1407, + "step": 36143 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839860529949925e-05, + "loss": 0.1547, + "step": 36144 + }, + { + "epoch": 0.04, + "learning_rate": 4.983985165354908e-05, + "loss": 0.1354, + "step": 36145 + }, + { + "epoch": 0.04, + "learning_rate": 4.983984277690304e-05, + "loss": 0.1374, + "step": 36146 + }, + { + "epoch": 0.04, + "learning_rate": 4.983983390001179e-05, + "loss": 0.6704, + "step": 36147 + }, + { + "epoch": 0.04, + "learning_rate": 4.983982502287532e-05, + "loss": 1.2066, + "step": 36148 + }, + { + "epoch": 0.04, + "learning_rate": 4.983981614549366e-05, + "loss": 0.8632, + "step": 36149 + }, + { + "epoch": 0.04, + "learning_rate": 4.983980726786678e-05, + "loss": 1.1672, + "step": 36150 + }, + { + "epoch": 0.04, + "learning_rate": 4.983979838999469e-05, + "loss": 1.1825, + "step": 36151 + }, + { + "epoch": 0.04, + "learning_rate": 4.98397895118774e-05, + "loss": 1.1686, + "step": 36152 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839780633514897e-05, + "loss": 1.2156, + "step": 36153 + }, + { + "epoch": 0.04, + "learning_rate": 4.983977175490718e-05, + "loss": 1.1535, + "step": 36154 + }, + { + "epoch": 0.04, + "learning_rate": 4.983976287605427e-05, + "loss": 0.9208, + "step": 36155 + }, + { + "epoch": 0.04, + "learning_rate": 4.983975399695614e-05, + "loss": 0.9759, + "step": 36156 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839745117612815e-05, + "loss": 1.2419, + "step": 36157 + }, + { + "epoch": 0.04, + "learning_rate": 4.983973623802427e-05, + "loss": 0.9781, + "step": 36158 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839727358190525e-05, + "loss": 0.8762, + "step": 36159 + }, + { + "epoch": 0.04, + "learning_rate": 4.983971847811157e-05, + "loss": 0.6648, + "step": 36160 + }, + { + "epoch": 0.04, + "learning_rate": 4.983970959778741e-05, + "loss": 0.9013, + "step": 36161 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839700717218036e-05, + "loss": 0.9752, + "step": 36162 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839691836403455e-05, + "loss": 1.05, + "step": 36163 + }, + { + "epoch": 0.04, + "learning_rate": 4.983968295534368e-05, + "loss": 0.8705, + "step": 36164 + }, + { + "epoch": 0.04, + "learning_rate": 4.983967407403869e-05, + "loss": 1.1399, + "step": 36165 + }, + { + "epoch": 0.04, + "learning_rate": 4.983966519248849e-05, + "loss": 1.0621, + "step": 36166 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839656310693084e-05, + "loss": 1.0423, + "step": 36167 + }, + { + "epoch": 0.04, + "learning_rate": 4.983964742865247e-05, + "loss": 0.9834, + "step": 36168 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839638546366656e-05, + "loss": 1.2641, + "step": 36169 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839629663835634e-05, + "loss": 1.0447, + "step": 36170 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839620781059405e-05, + "loss": 1.2176, + "step": 36171 + }, + { + "epoch": 0.04, + "learning_rate": 4.983961189803797e-05, + "loss": 0.7322, + "step": 36172 + }, + { + "epoch": 0.04, + "learning_rate": 4.983960301477133e-05, + "loss": 0.7928, + "step": 36173 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839594131259485e-05, + "loss": 1.1723, + "step": 36174 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839585247502426e-05, + "loss": 1.0021, + "step": 36175 + }, + { + "epoch": 0.04, + "learning_rate": 4.983957636350016e-05, + "loss": 1.071, + "step": 36176 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839567479252705e-05, + "loss": 1.0746, + "step": 36177 + }, + { + "epoch": 0.04, + "learning_rate": 4.983955859476003e-05, + "loss": 0.7886, + "step": 36178 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839549710022146e-05, + "loss": 0.9904, + "step": 36179 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839540825039065e-05, + "loss": 0.9323, + "step": 36180 + }, + { + "epoch": 0.04, + "learning_rate": 4.983953193981078e-05, + "loss": 1.3339, + "step": 36181 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839523054337285e-05, + "loss": 0.7238, + "step": 36182 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839514168618586e-05, + "loss": 1.0126, + "step": 36183 + }, + { + "epoch": 0.04, + "learning_rate": 4.983950528265468e-05, + "loss": 0.814, + "step": 36184 + }, + { + "epoch": 0.04, + "learning_rate": 4.983949639644557e-05, + "loss": 0.9338, + "step": 36185 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839487509991254e-05, + "loss": 2.0373, + "step": 36186 + }, + { + "epoch": 0.04, + "learning_rate": 4.983947862329174e-05, + "loss": 1.7628, + "step": 36187 + }, + { + "epoch": 0.04, + "learning_rate": 4.983946973634701e-05, + "loss": 1.0126, + "step": 36188 + }, + { + "epoch": 0.04, + "learning_rate": 4.983946084915708e-05, + "loss": 0.7795, + "step": 36189 + }, + { + "epoch": 0.04, + "learning_rate": 4.983945196172195e-05, + "loss": 0.7862, + "step": 36190 + }, + { + "epoch": 0.04, + "learning_rate": 4.983944307404162e-05, + "loss": 0.9197, + "step": 36191 + }, + { + "epoch": 0.04, + "learning_rate": 4.983943418611607e-05, + "loss": 1.0472, + "step": 36192 + }, + { + "epoch": 0.04, + "learning_rate": 4.983942529794532e-05, + "loss": 1.2454, + "step": 36193 + }, + { + "epoch": 0.04, + "learning_rate": 4.983941640952937e-05, + "loss": 1.0829, + "step": 36194 + }, + { + "epoch": 0.04, + "learning_rate": 4.983940752086821e-05, + "loss": 1.0528, + "step": 36195 + }, + { + "epoch": 0.04, + "learning_rate": 4.983939863196185e-05, + "loss": 0.9795, + "step": 36196 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839389742810286e-05, + "loss": 0.8911, + "step": 36197 + }, + { + "epoch": 0.04, + "learning_rate": 4.983938085341352e-05, + "loss": 1.1514, + "step": 36198 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839371963771546e-05, + "loss": 1.1321, + "step": 36199 + }, + { + "epoch": 0.04, + "learning_rate": 4.983936307388437e-05, + "loss": 1.0051, + "step": 36200 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839354183752e-05, + "loss": 1.052, + "step": 36201 + }, + { + "epoch": 0.04, + "learning_rate": 4.983934529337441e-05, + "loss": 0.9704, + "step": 36202 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839336402751624e-05, + "loss": 0.7938, + "step": 36203 + }, + { + "epoch": 0.04, + "learning_rate": 4.983932751188363e-05, + "loss": 0.7989, + "step": 36204 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839318620770435e-05, + "loss": 0.8196, + "step": 36205 + }, + { + "epoch": 0.04, + "learning_rate": 4.983930972941204e-05, + "loss": 0.9134, + "step": 36206 + }, + { + "epoch": 0.04, + "learning_rate": 4.983930083780844e-05, + "loss": 0.8086, + "step": 36207 + }, + { + "epoch": 0.04, + "learning_rate": 4.983929194595964e-05, + "loss": 1.1251, + "step": 36208 + }, + { + "epoch": 0.04, + "learning_rate": 4.983928305386563e-05, + "loss": 1.3068, + "step": 36209 + }, + { + "epoch": 0.04, + "learning_rate": 4.983927416152642e-05, + "loss": 0.9173, + "step": 36210 + }, + { + "epoch": 0.04, + "learning_rate": 4.983926526894201e-05, + "loss": 1.0446, + "step": 36211 + }, + { + "epoch": 0.04, + "learning_rate": 4.98392563761124e-05, + "loss": 0.911, + "step": 36212 + }, + { + "epoch": 0.04, + "learning_rate": 4.983924748303758e-05, + "loss": 1.0577, + "step": 36213 + }, + { + "epoch": 0.04, + "learning_rate": 4.983923858971756e-05, + "loss": 0.7771, + "step": 36214 + }, + { + "epoch": 0.04, + "learning_rate": 4.983922969615233e-05, + "loss": 0.7869, + "step": 36215 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839220802341914e-05, + "loss": 0.5327, + "step": 36216 + }, + { + "epoch": 0.04, + "learning_rate": 4.983921190828628e-05, + "loss": 0.866, + "step": 36217 + }, + { + "epoch": 0.04, + "learning_rate": 4.983920301398545e-05, + "loss": 0.756, + "step": 36218 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839194119439424e-05, + "loss": 0.7593, + "step": 36219 + }, + { + "epoch": 0.04, + "learning_rate": 4.983918522464819e-05, + "loss": 0.8556, + "step": 36220 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839176329611755e-05, + "loss": 1.262, + "step": 36221 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839167434330115e-05, + "loss": 0.6669, + "step": 36222 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839158538803276e-05, + "loss": 0.6324, + "step": 36223 + }, + { + "epoch": 0.04, + "learning_rate": 4.983914964303124e-05, + "loss": 0.9579, + "step": 36224 + }, + { + "epoch": 0.04, + "learning_rate": 4.983914074701399e-05, + "loss": 0.6922, + "step": 36225 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839131850751544e-05, + "loss": 0.6631, + "step": 36226 + }, + { + "epoch": 0.04, + "learning_rate": 4.98391229542439e-05, + "loss": 0.7537, + "step": 36227 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839114057491054e-05, + "loss": 0.771, + "step": 36228 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839105160493007e-05, + "loss": 0.8876, + "step": 36229 + }, + { + "epoch": 0.04, + "learning_rate": 4.983909626324976e-05, + "loss": 0.7129, + "step": 36230 + }, + { + "epoch": 0.04, + "learning_rate": 4.983908736576131e-05, + "loss": 0.9229, + "step": 36231 + }, + { + "epoch": 0.04, + "learning_rate": 4.983907846802766e-05, + "loss": 1.1804, + "step": 36232 + }, + { + "epoch": 0.04, + "learning_rate": 4.98390695700488e-05, + "loss": 1.1319, + "step": 36233 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839060671824744e-05, + "loss": 0.9919, + "step": 36234 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839051773355495e-05, + "loss": 1.1085, + "step": 36235 + }, + { + "epoch": 0.04, + "learning_rate": 4.983904287464104e-05, + "loss": 1.011, + "step": 36236 + }, + { + "epoch": 0.04, + "learning_rate": 4.983903397568138e-05, + "loss": 1.1694, + "step": 36237 + }, + { + "epoch": 0.04, + "learning_rate": 4.983902507647653e-05, + "loss": 0.82, + "step": 36238 + }, + { + "epoch": 0.04, + "learning_rate": 4.9839016177026476e-05, + "loss": 1.0341, + "step": 36239 + }, + { + "epoch": 0.04, + "learning_rate": 4.983900727733122e-05, + "loss": 1.1019, + "step": 36240 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838998377390755e-05, + "loss": 1.6458, + "step": 36241 + }, + { + "epoch": 0.04, + "learning_rate": 4.98389894772051e-05, + "loss": 1.0425, + "step": 36242 + }, + { + "epoch": 0.04, + "learning_rate": 4.983898057677424e-05, + "loss": 1.2461, + "step": 36243 + }, + { + "epoch": 0.04, + "learning_rate": 4.983897167609819e-05, + "loss": 1.036, + "step": 36244 + }, + { + "epoch": 0.04, + "learning_rate": 4.983896277517693e-05, + "loss": 0.6491, + "step": 36245 + }, + { + "epoch": 0.04, + "learning_rate": 4.983895387401047e-05, + "loss": 0.8753, + "step": 36246 + }, + { + "epoch": 0.04, + "learning_rate": 4.983894497259882e-05, + "loss": 1.008, + "step": 36247 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838936070941964e-05, + "loss": 0.582, + "step": 36248 + }, + { + "epoch": 0.04, + "learning_rate": 4.983892716903991e-05, + "loss": 0.8545, + "step": 36249 + }, + { + "epoch": 0.04, + "learning_rate": 4.983891826689265e-05, + "loss": 1.2027, + "step": 36250 + }, + { + "epoch": 0.04, + "learning_rate": 4.98389093645002e-05, + "loss": 0.8049, + "step": 36251 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838900461862544e-05, + "loss": 0.9199, + "step": 36252 + }, + { + "epoch": 0.04, + "learning_rate": 4.983889155897969e-05, + "loss": 0.9855, + "step": 36253 + }, + { + "epoch": 0.04, + "learning_rate": 4.983888265585164e-05, + "loss": 1.1091, + "step": 36254 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838873752478396e-05, + "loss": 0.9902, + "step": 36255 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838864848859946e-05, + "loss": 1.0596, + "step": 36256 + }, + { + "epoch": 0.04, + "learning_rate": 4.98388559449963e-05, + "loss": 1.6448, + "step": 36257 + }, + { + "epoch": 0.04, + "learning_rate": 4.983884704088745e-05, + "loss": 1.1522, + "step": 36258 + }, + { + "epoch": 0.04, + "learning_rate": 4.983883813653341e-05, + "loss": 1.0655, + "step": 36259 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838829231934156e-05, + "loss": 1.077, + "step": 36260 + }, + { + "epoch": 0.04, + "learning_rate": 4.983882032708972e-05, + "loss": 0.8504, + "step": 36261 + }, + { + "epoch": 0.04, + "learning_rate": 4.983881142200007e-05, + "loss": 1.063, + "step": 36262 + }, + { + "epoch": 0.04, + "learning_rate": 4.983880251666524e-05, + "loss": 1.0273, + "step": 36263 + }, + { + "epoch": 0.04, + "learning_rate": 4.98387936110852e-05, + "loss": 0.7588, + "step": 36264 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838784705259965e-05, + "loss": 1.6055, + "step": 36265 + }, + { + "epoch": 0.04, + "learning_rate": 4.983877579918953e-05, + "loss": 0.9031, + "step": 36266 + }, + { + "epoch": 0.04, + "learning_rate": 4.98387668928739e-05, + "loss": 1.0228, + "step": 36267 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838757986313065e-05, + "loss": 1.1927, + "step": 36268 + }, + { + "epoch": 0.04, + "learning_rate": 4.983874907950704e-05, + "loss": 0.9036, + "step": 36269 + }, + { + "epoch": 0.04, + "learning_rate": 4.983874017245582e-05, + "loss": 1.3422, + "step": 36270 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838731265159394e-05, + "loss": 0.9185, + "step": 36271 + }, + { + "epoch": 0.04, + "learning_rate": 4.983872235761777e-05, + "loss": 0.9251, + "step": 36272 + }, + { + "epoch": 0.04, + "learning_rate": 4.983871344983096e-05, + "loss": 1.1865, + "step": 36273 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838704541798945e-05, + "loss": 0.7768, + "step": 36274 + }, + { + "epoch": 0.04, + "learning_rate": 4.983869563352173e-05, + "loss": 1.0039, + "step": 36275 + }, + { + "epoch": 0.04, + "learning_rate": 4.983868672499932e-05, + "loss": 0.9592, + "step": 36276 + }, + { + "epoch": 0.04, + "learning_rate": 4.983867781623172e-05, + "loss": 1.3468, + "step": 36277 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838668907218914e-05, + "loss": 0.5559, + "step": 36278 + }, + { + "epoch": 0.04, + "learning_rate": 4.983865999796091e-05, + "loss": 0.8386, + "step": 36279 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838651088457725e-05, + "loss": 1.0412, + "step": 36280 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838642178709324e-05, + "loss": 0.8396, + "step": 36281 + }, + { + "epoch": 0.04, + "learning_rate": 4.983863326871574e-05, + "loss": 0.9549, + "step": 36282 + }, + { + "epoch": 0.04, + "learning_rate": 4.983862435847695e-05, + "loss": 0.9385, + "step": 36283 + }, + { + "epoch": 0.04, + "learning_rate": 4.983861544799296e-05, + "loss": 0.9189, + "step": 36284 + }, + { + "epoch": 0.04, + "learning_rate": 4.983860653726379e-05, + "loss": 0.9914, + "step": 36285 + }, + { + "epoch": 0.04, + "learning_rate": 4.983859762628941e-05, + "loss": 1.1612, + "step": 36286 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838588715069844e-05, + "loss": 1.3049, + "step": 36287 + }, + { + "epoch": 0.04, + "learning_rate": 4.983857980360507e-05, + "loss": 0.8165, + "step": 36288 + }, + { + "epoch": 0.04, + "learning_rate": 4.983857089189511e-05, + "loss": 1.2028, + "step": 36289 + }, + { + "epoch": 0.04, + "learning_rate": 4.983856197993995e-05, + "loss": 1.2213, + "step": 36290 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838553067739595e-05, + "loss": 1.2799, + "step": 36291 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838544155294046e-05, + "loss": 1.0248, + "step": 36292 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838535242603304e-05, + "loss": 1.1953, + "step": 36293 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838526329667356e-05, + "loss": 1.1197, + "step": 36294 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838517416486216e-05, + "loss": 0.8921, + "step": 36295 + }, + { + "epoch": 0.04, + "learning_rate": 4.983850850305989e-05, + "loss": 0.8819, + "step": 36296 + }, + { + "epoch": 0.04, + "learning_rate": 4.983849958938835e-05, + "loss": 1.1251, + "step": 36297 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838490675471635e-05, + "loss": 1.0817, + "step": 36298 + }, + { + "epoch": 0.04, + "learning_rate": 4.983848176130971e-05, + "loss": 0.7969, + "step": 36299 + }, + { + "epoch": 0.04, + "learning_rate": 4.98384728469026e-05, + "loss": 1.1206, + "step": 36300 + }, + { + "epoch": 0.04, + "learning_rate": 4.983846393225029e-05, + "loss": 1.3017, + "step": 36301 + }, + { + "epoch": 0.04, + "learning_rate": 4.983845501735279e-05, + "loss": 1.1686, + "step": 36302 + }, + { + "epoch": 0.04, + "learning_rate": 4.983844610221009e-05, + "loss": 1.0222, + "step": 36303 + }, + { + "epoch": 0.04, + "learning_rate": 4.98384371868222e-05, + "loss": 1.4083, + "step": 36304 + }, + { + "epoch": 0.04, + "learning_rate": 4.983842827118911e-05, + "loss": 0.8972, + "step": 36305 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838419355310825e-05, + "loss": 1.0247, + "step": 36306 + }, + { + "epoch": 0.04, + "learning_rate": 4.983841043918736e-05, + "loss": 1.0061, + "step": 36307 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838401522818676e-05, + "loss": 0.9431, + "step": 36308 + }, + { + "epoch": 0.04, + "learning_rate": 4.983839260620482e-05, + "loss": 0.9829, + "step": 36309 + }, + { + "epoch": 0.04, + "learning_rate": 4.983838368934576e-05, + "loss": 0.6701, + "step": 36310 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838374772241506e-05, + "loss": 1.2546, + "step": 36311 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838365854892056e-05, + "loss": 1.101, + "step": 36312 + }, + { + "epoch": 0.04, + "learning_rate": 4.983835693729741e-05, + "loss": 0.9267, + "step": 36313 + }, + { + "epoch": 0.04, + "learning_rate": 4.983834801945758e-05, + "loss": 0.9814, + "step": 36314 + }, + { + "epoch": 0.04, + "learning_rate": 4.983833910137256e-05, + "loss": 0.7914, + "step": 36315 + }, + { + "epoch": 0.04, + "learning_rate": 4.983833018304233e-05, + "loss": 0.8881, + "step": 36316 + }, + { + "epoch": 0.04, + "learning_rate": 4.983832126446691e-05, + "loss": 1.0799, + "step": 36317 + }, + { + "epoch": 0.04, + "learning_rate": 4.983831234564631e-05, + "loss": 1.0318, + "step": 36318 + }, + { + "epoch": 0.04, + "learning_rate": 4.98383034265805e-05, + "loss": 1.089, + "step": 36319 + }, + { + "epoch": 0.04, + "learning_rate": 4.98382945072695e-05, + "loss": 1.053, + "step": 36320 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838285587713315e-05, + "loss": 0.9615, + "step": 36321 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838276667911935e-05, + "loss": 1.0193, + "step": 36322 + }, + { + "epoch": 0.04, + "learning_rate": 4.983826774786536e-05, + "loss": 1.1666, + "step": 36323 + }, + { + "epoch": 0.04, + "learning_rate": 4.983825882757359e-05, + "loss": 0.9801, + "step": 36324 + }, + { + "epoch": 0.04, + "learning_rate": 4.983824990703663e-05, + "loss": 1.7853, + "step": 36325 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838240986254484e-05, + "loss": 1.2707, + "step": 36326 + }, + { + "epoch": 0.04, + "learning_rate": 4.983823206522713e-05, + "loss": 0.8186, + "step": 36327 + }, + { + "epoch": 0.04, + "learning_rate": 4.98382231439546e-05, + "loss": 0.7131, + "step": 36328 + }, + { + "epoch": 0.04, + "learning_rate": 4.983821422243686e-05, + "loss": 1.1457, + "step": 36329 + }, + { + "epoch": 0.04, + "learning_rate": 4.983820530067394e-05, + "loss": 1.2189, + "step": 36330 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838196378665824e-05, + "loss": 1.0234, + "step": 36331 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838187456412514e-05, + "loss": 1.3231, + "step": 36332 + }, + { + "epoch": 0.04, + "learning_rate": 4.983817853391402e-05, + "loss": 1.2075, + "step": 36333 + }, + { + "epoch": 0.04, + "learning_rate": 4.983816961117033e-05, + "loss": 1.301, + "step": 36334 + }, + { + "epoch": 0.04, + "learning_rate": 4.983816068818144e-05, + "loss": 1.1963, + "step": 36335 + }, + { + "epoch": 0.04, + "learning_rate": 4.983815176494736e-05, + "loss": 1.1028, + "step": 36336 + }, + { + "epoch": 0.04, + "learning_rate": 4.983814284146809e-05, + "loss": 0.6045, + "step": 36337 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838133917743634e-05, + "loss": 0.6063, + "step": 36338 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838124993773984e-05, + "loss": 1.2274, + "step": 36339 + }, + { + "epoch": 0.04, + "learning_rate": 4.983811606955914e-05, + "loss": 1.1496, + "step": 36340 + }, + { + "epoch": 0.04, + "learning_rate": 4.983810714509911e-05, + "loss": 1.3307, + "step": 36341 + }, + { + "epoch": 0.04, + "learning_rate": 4.983809822039388e-05, + "loss": 0.9699, + "step": 36342 + }, + { + "epoch": 0.04, + "learning_rate": 4.983808929544347e-05, + "loss": 0.8666, + "step": 36343 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838080370247864e-05, + "loss": 1.3348, + "step": 36344 + }, + { + "epoch": 0.04, + "learning_rate": 4.983807144480706e-05, + "loss": 0.97, + "step": 36345 + }, + { + "epoch": 0.04, + "learning_rate": 4.983806251912107e-05, + "loss": 1.6129, + "step": 36346 + }, + { + "epoch": 0.04, + "learning_rate": 4.983805359318989e-05, + "loss": 1.3962, + "step": 36347 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838044667013516e-05, + "loss": 1.0557, + "step": 36348 + }, + { + "epoch": 0.04, + "learning_rate": 4.983803574059196e-05, + "loss": 0.4388, + "step": 36349 + }, + { + "epoch": 0.04, + "learning_rate": 4.9838026813925205e-05, + "loss": 0.8511, + "step": 36350 + }, + { + "epoch": 0.04, + "learning_rate": 4.983801788701326e-05, + "loss": 0.7897, + "step": 36351 + }, + { + "epoch": 0.04, + "learning_rate": 4.983800895985613e-05, + "loss": 0.868, + "step": 36352 + }, + { + "epoch": 0.04, + "learning_rate": 4.98380000324538e-05, + "loss": 0.8487, + "step": 36353 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837991104806295e-05, + "loss": 1.1068, + "step": 36354 + }, + { + "epoch": 0.04, + "learning_rate": 4.983798217691359e-05, + "loss": 1.1727, + "step": 36355 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837973248775697e-05, + "loss": 0.6174, + "step": 36356 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837964320392605e-05, + "loss": 0.9871, + "step": 36357 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837955391764335e-05, + "loss": 0.9151, + "step": 36358 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837946462890866e-05, + "loss": 1.1448, + "step": 36359 + }, + { + "epoch": 0.04, + "learning_rate": 4.983793753377222e-05, + "loss": 0.7535, + "step": 36360 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837928604408365e-05, + "loss": 1.0201, + "step": 36361 + }, + { + "epoch": 0.04, + "learning_rate": 4.983791967479934e-05, + "loss": 1.0167, + "step": 36362 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837910744945114e-05, + "loss": 0.8897, + "step": 36363 + }, + { + "epoch": 0.04, + "learning_rate": 4.98379018148457e-05, + "loss": 0.6107, + "step": 36364 + }, + { + "epoch": 0.04, + "learning_rate": 4.98378928845011e-05, + "loss": 0.4304, + "step": 36365 + }, + { + "epoch": 0.04, + "learning_rate": 4.983788395391131e-05, + "loss": 0.6664, + "step": 36366 + }, + { + "epoch": 0.04, + "learning_rate": 4.983787502307633e-05, + "loss": 0.9567, + "step": 36367 + }, + { + "epoch": 0.04, + "learning_rate": 4.983786609199616e-05, + "loss": 1.0594, + "step": 36368 + }, + { + "epoch": 0.04, + "learning_rate": 4.98378571606708e-05, + "loss": 1.206, + "step": 36369 + }, + { + "epoch": 0.04, + "learning_rate": 4.983784822910026e-05, + "loss": 1.1449, + "step": 36370 + }, + { + "epoch": 0.04, + "learning_rate": 4.983783929728452e-05, + "loss": 1.1474, + "step": 36371 + }, + { + "epoch": 0.04, + "learning_rate": 4.98378303652236e-05, + "loss": 1.2334, + "step": 36372 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837821432917486e-05, + "loss": 0.8691, + "step": 36373 + }, + { + "epoch": 0.04, + "learning_rate": 4.983781250036618e-05, + "loss": 1.1021, + "step": 36374 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837803567569694e-05, + "loss": 0.8225, + "step": 36375 + }, + { + "epoch": 0.04, + "learning_rate": 4.983779463452801e-05, + "loss": 1.1686, + "step": 36376 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837785701241147e-05, + "loss": 1.0152, + "step": 36377 + }, + { + "epoch": 0.04, + "learning_rate": 4.983777676770909e-05, + "loss": 1.1812, + "step": 36378 + }, + { + "epoch": 0.04, + "learning_rate": 4.983776783393185e-05, + "loss": 1.2314, + "step": 36379 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837758899909424e-05, + "loss": 1.1869, + "step": 36380 + }, + { + "epoch": 0.04, + "learning_rate": 4.98377499656418e-05, + "loss": 0.9615, + "step": 36381 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837741031129e-05, + "loss": 1.1581, + "step": 36382 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837732096371004e-05, + "loss": 1.0246, + "step": 36383 + }, + { + "epoch": 0.04, + "learning_rate": 4.983772316136782e-05, + "loss": 1.3187, + "step": 36384 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837714226119454e-05, + "loss": 1.2377, + "step": 36385 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837705290625894e-05, + "loss": 1.0013, + "step": 36386 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837696354887155e-05, + "loss": 1.2781, + "step": 36387 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837687418903224e-05, + "loss": 0.8476, + "step": 36388 + }, + { + "epoch": 0.04, + "learning_rate": 4.983767848267411e-05, + "loss": 0.7404, + "step": 36389 + }, + { + "epoch": 0.04, + "learning_rate": 4.98376695461998e-05, + "loss": 0.8634, + "step": 36390 + }, + { + "epoch": 0.04, + "learning_rate": 4.983766060948031e-05, + "loss": 1.0988, + "step": 36391 + }, + { + "epoch": 0.04, + "learning_rate": 4.983765167251563e-05, + "loss": 1.3475, + "step": 36392 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837642735305765e-05, + "loss": 1.2042, + "step": 36393 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837633797850707e-05, + "loss": 0.8341, + "step": 36394 + }, + { + "epoch": 0.04, + "learning_rate": 4.983762486015047e-05, + "loss": 1.2152, + "step": 36395 + }, + { + "epoch": 0.04, + "learning_rate": 4.983761592220504e-05, + "loss": 0.7966, + "step": 36396 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837606984014426e-05, + "loss": 0.3903, + "step": 36397 + }, + { + "epoch": 0.04, + "learning_rate": 4.983759804557863e-05, + "loss": 0.4893, + "step": 36398 + }, + { + "epoch": 0.04, + "learning_rate": 4.983758910689765e-05, + "loss": 0.8536, + "step": 36399 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837580167971476e-05, + "loss": 1.0659, + "step": 36400 + }, + { + "epoch": 0.04, + "learning_rate": 4.983757122880012e-05, + "loss": 0.5378, + "step": 36401 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837562289383577e-05, + "loss": 1.0712, + "step": 36402 + }, + { + "epoch": 0.04, + "learning_rate": 4.983755334972184e-05, + "loss": 1.2348, + "step": 36403 + }, + { + "epoch": 0.04, + "learning_rate": 4.983754440981493e-05, + "loss": 0.8523, + "step": 36404 + }, + { + "epoch": 0.04, + "learning_rate": 4.983753546966283e-05, + "loss": 0.6551, + "step": 36405 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837526529265545e-05, + "loss": 0.9439, + "step": 36406 + }, + { + "epoch": 0.04, + "learning_rate": 4.983751758862307e-05, + "loss": 1.2621, + "step": 36407 + }, + { + "epoch": 0.04, + "learning_rate": 4.983750864773541e-05, + "loss": 1.1314, + "step": 36408 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837499706602565e-05, + "loss": 1.015, + "step": 36409 + }, + { + "epoch": 0.04, + "learning_rate": 4.983749076522454e-05, + "loss": 0.9599, + "step": 36410 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837481823601326e-05, + "loss": 0.9688, + "step": 36411 + }, + { + "epoch": 0.04, + "learning_rate": 4.983747288173293e-05, + "loss": 1.2966, + "step": 36412 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837463939619344e-05, + "loss": 0.9723, + "step": 36413 + }, + { + "epoch": 0.04, + "learning_rate": 4.983745499726057e-05, + "loss": 1.5354, + "step": 36414 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837446054656614e-05, + "loss": 0.9238, + "step": 36415 + }, + { + "epoch": 0.04, + "learning_rate": 4.983743711180748e-05, + "loss": 1.1238, + "step": 36416 + }, + { + "epoch": 0.04, + "learning_rate": 4.983742816871315e-05, + "loss": 0.5705, + "step": 36417 + }, + { + "epoch": 0.04, + "learning_rate": 4.983741922537365e-05, + "loss": 0.9367, + "step": 36418 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837410281788955e-05, + "loss": 1.14, + "step": 36419 + }, + { + "epoch": 0.04, + "learning_rate": 4.983740133795908e-05, + "loss": 0.6573, + "step": 36420 + }, + { + "epoch": 0.04, + "learning_rate": 4.983739239388402e-05, + "loss": 0.9054, + "step": 36421 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837383449563765e-05, + "loss": 0.9611, + "step": 36422 + }, + { + "epoch": 0.04, + "learning_rate": 4.983737450499834e-05, + "loss": 0.5259, + "step": 36423 + }, + { + "epoch": 0.04, + "learning_rate": 4.983736556018773e-05, + "loss": 0.5966, + "step": 36424 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837356615131926e-05, + "loss": 0.5382, + "step": 36425 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837347669830944e-05, + "loss": 0.2278, + "step": 36426 + }, + { + "epoch": 0.04, + "learning_rate": 4.983733872428478e-05, + "loss": 0.1921, + "step": 36427 + }, + { + "epoch": 0.04, + "learning_rate": 4.983732977849343e-05, + "loss": 0.2317, + "step": 36428 + }, + { + "epoch": 0.04, + "learning_rate": 4.98373208324569e-05, + "loss": 0.3406, + "step": 36429 + }, + { + "epoch": 0.04, + "learning_rate": 4.983731188617518e-05, + "loss": 0.4127, + "step": 36430 + }, + { + "epoch": 0.04, + "learning_rate": 4.983730293964828e-05, + "loss": 1.0662, + "step": 36431 + }, + { + "epoch": 0.04, + "learning_rate": 4.98372939928762e-05, + "loss": 1.645, + "step": 36432 + }, + { + "epoch": 0.04, + "learning_rate": 4.983728504585893e-05, + "loss": 1.646, + "step": 36433 + }, + { + "epoch": 0.04, + "learning_rate": 4.983727609859648e-05, + "loss": 1.601, + "step": 36434 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837267151088846e-05, + "loss": 1.3013, + "step": 36435 + }, + { + "epoch": 0.04, + "learning_rate": 4.983725820333603e-05, + "loss": 1.3478, + "step": 36436 + }, + { + "epoch": 0.04, + "learning_rate": 4.983724925533803e-05, + "loss": 1.2019, + "step": 36437 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837240307094845e-05, + "loss": 0.9599, + "step": 36438 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837231358606485e-05, + "loss": 1.0109, + "step": 36439 + }, + { + "epoch": 0.04, + "learning_rate": 4.983722240987293e-05, + "loss": 0.796, + "step": 36440 + }, + { + "epoch": 0.04, + "learning_rate": 4.983721346089421e-05, + "loss": 1.2218, + "step": 36441 + }, + { + "epoch": 0.04, + "learning_rate": 4.983720451167029e-05, + "loss": 1.2724, + "step": 36442 + }, + { + "epoch": 0.04, + "learning_rate": 4.98371955622012e-05, + "loss": 1.2245, + "step": 36443 + }, + { + "epoch": 0.04, + "learning_rate": 4.983718661248692e-05, + "loss": 1.6282, + "step": 36444 + }, + { + "epoch": 0.04, + "learning_rate": 4.983717766252746e-05, + "loss": 1.6637, + "step": 36445 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837168712322816e-05, + "loss": 1.2517, + "step": 36446 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837159761873e-05, + "loss": 1.1001, + "step": 36447 + }, + { + "epoch": 0.04, + "learning_rate": 4.983715081117799e-05, + "loss": 1.3622, + "step": 36448 + }, + { + "epoch": 0.04, + "learning_rate": 4.98371418602378e-05, + "loss": 0.8839, + "step": 36449 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837132909052434e-05, + "loss": 0.824, + "step": 36450 + }, + { + "epoch": 0.04, + "learning_rate": 4.983712395762188e-05, + "loss": 0.7723, + "step": 36451 + }, + { + "epoch": 0.04, + "learning_rate": 4.983711500594614e-05, + "loss": 0.8147, + "step": 36452 + }, + { + "epoch": 0.04, + "learning_rate": 4.983710605402523e-05, + "loss": 1.0763, + "step": 36453 + }, + { + "epoch": 0.04, + "learning_rate": 4.983709710185914e-05, + "loss": 1.1142, + "step": 36454 + }, + { + "epoch": 0.04, + "learning_rate": 4.983708814944786e-05, + "loss": 0.9714, + "step": 36455 + }, + { + "epoch": 0.04, + "learning_rate": 4.98370791967914e-05, + "loss": 1.142, + "step": 36456 + }, + { + "epoch": 0.04, + "learning_rate": 4.983707024388976e-05, + "loss": 0.5532, + "step": 36457 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837061290742945e-05, + "loss": 1.1034, + "step": 36458 + }, + { + "epoch": 0.04, + "learning_rate": 4.983705233735094e-05, + "loss": 0.9163, + "step": 36459 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837043383713753e-05, + "loss": 0.756, + "step": 36460 + }, + { + "epoch": 0.04, + "learning_rate": 4.98370344298314e-05, + "loss": 1.165, + "step": 36461 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837025475703846e-05, + "loss": 1.0016, + "step": 36462 + }, + { + "epoch": 0.04, + "learning_rate": 4.983701652133113e-05, + "loss": 1.2015, + "step": 36463 + }, + { + "epoch": 0.04, + "learning_rate": 4.9837007566713224e-05, + "loss": 0.7688, + "step": 36464 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836998611850135e-05, + "loss": 1.1314, + "step": 36465 + }, + { + "epoch": 0.04, + "learning_rate": 4.983698965674187e-05, + "loss": 1.0174, + "step": 36466 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836980701388427e-05, + "loss": 1.095, + "step": 36467 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836971745789794e-05, + "loss": 1.0393, + "step": 36468 + }, + { + "epoch": 0.04, + "learning_rate": 4.983696278994598e-05, + "loss": 0.7831, + "step": 36469 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836953833857006e-05, + "loss": 0.7903, + "step": 36470 + }, + { + "epoch": 0.04, + "learning_rate": 4.983694487752284e-05, + "loss": 1.4346, + "step": 36471 + }, + { + "epoch": 0.04, + "learning_rate": 4.983693592094348e-05, + "loss": 0.8633, + "step": 36472 + }, + { + "epoch": 0.04, + "learning_rate": 4.983692696411896e-05, + "loss": 0.8533, + "step": 36473 + }, + { + "epoch": 0.04, + "learning_rate": 4.983691800704925e-05, + "loss": 1.0254, + "step": 36474 + }, + { + "epoch": 0.04, + "learning_rate": 4.983690904973437e-05, + "loss": 1.3079, + "step": 36475 + }, + { + "epoch": 0.04, + "learning_rate": 4.98369000921743e-05, + "loss": 1.1198, + "step": 36476 + }, + { + "epoch": 0.04, + "learning_rate": 4.983689113436906e-05, + "loss": 0.9986, + "step": 36477 + }, + { + "epoch": 0.04, + "learning_rate": 4.983688217631863e-05, + "loss": 0.9721, + "step": 36478 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836873218023025e-05, + "loss": 1.1015, + "step": 36479 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836864259482254e-05, + "loss": 1.0201, + "step": 36480 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836855300696284e-05, + "loss": 0.9897, + "step": 36481 + }, + { + "epoch": 0.04, + "learning_rate": 4.983684634166514e-05, + "loss": 0.9728, + "step": 36482 + }, + { + "epoch": 0.04, + "learning_rate": 4.983683738238882e-05, + "loss": 1.0963, + "step": 36483 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836828422867335e-05, + "loss": 0.8765, + "step": 36484 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836819463100657e-05, + "loss": 0.9878, + "step": 36485 + }, + { + "epoch": 0.04, + "learning_rate": 4.983681050308879e-05, + "loss": 0.828, + "step": 36486 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836801542831764e-05, + "loss": 1.0672, + "step": 36487 + }, + { + "epoch": 0.04, + "learning_rate": 4.983679258232955e-05, + "loss": 1.7245, + "step": 36488 + }, + { + "epoch": 0.04, + "learning_rate": 4.983678362158216e-05, + "loss": 2.4195, + "step": 36489 + }, + { + "epoch": 0.04, + "learning_rate": 4.983677466058959e-05, + "loss": 2.2616, + "step": 36490 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836765699351846e-05, + "loss": 1.5746, + "step": 36491 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836756737868916e-05, + "loss": 1.5618, + "step": 36492 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836747776140815e-05, + "loss": 1.708, + "step": 36493 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836738814167534e-05, + "loss": 1.379, + "step": 36494 + }, + { + "epoch": 0.04, + "learning_rate": 4.983672985194907e-05, + "loss": 1.0986, + "step": 36495 + }, + { + "epoch": 0.04, + "learning_rate": 4.983672088948544e-05, + "loss": 0.7218, + "step": 36496 + }, + { + "epoch": 0.04, + "learning_rate": 4.983671192677663e-05, + "loss": 1.1069, + "step": 36497 + }, + { + "epoch": 0.04, + "learning_rate": 4.983670296382263e-05, + "loss": 1.2957, + "step": 36498 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836694000623464e-05, + "loss": 1.4501, + "step": 36499 + }, + { + "epoch": 0.04, + "learning_rate": 4.983668503717912e-05, + "loss": 1.1574, + "step": 36500 + }, + { + "epoch": 0.04, + "eval_loss": 1.0801715850830078, + "eval_runtime": 126.5217, + "eval_samples_per_second": 10.947, + "eval_steps_per_second": 5.477, + "step": 36500 + }, + { + "epoch": 0.04, + "learning_rate": 4.983667607348959e-05, + "loss": 1.1581, + "step": 36501 + }, + { + "epoch": 0.04, + "learning_rate": 4.983666710955489e-05, + "loss": 0.7902, + "step": 36502 + }, + { + "epoch": 0.04, + "learning_rate": 4.983665814537501e-05, + "loss": 0.7313, + "step": 36503 + }, + { + "epoch": 0.04, + "learning_rate": 4.983664918094996e-05, + "loss": 0.9981, + "step": 36504 + }, + { + "epoch": 0.04, + "learning_rate": 4.983664021627973e-05, + "loss": 0.5329, + "step": 36505 + }, + { + "epoch": 0.04, + "learning_rate": 4.983663125136432e-05, + "loss": 0.6306, + "step": 36506 + }, + { + "epoch": 0.04, + "learning_rate": 4.983662228620373e-05, + "loss": 0.5505, + "step": 36507 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836613320797966e-05, + "loss": 1.2046, + "step": 36508 + }, + { + "epoch": 0.04, + "learning_rate": 4.983660435514703e-05, + "loss": 1.0974, + "step": 36509 + }, + { + "epoch": 0.04, + "learning_rate": 4.983659538925091e-05, + "loss": 0.8238, + "step": 36510 + }, + { + "epoch": 0.04, + "learning_rate": 4.983658642310962e-05, + "loss": 1.0088, + "step": 36511 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836577456723154e-05, + "loss": 1.1824, + "step": 36512 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836568490091504e-05, + "loss": 1.3212, + "step": 36513 + }, + { + "epoch": 0.04, + "learning_rate": 4.983655952321469e-05, + "loss": 1.2092, + "step": 36514 + }, + { + "epoch": 0.04, + "learning_rate": 4.983655055609269e-05, + "loss": 0.8786, + "step": 36515 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836541588725515e-05, + "loss": 0.6624, + "step": 36516 + }, + { + "epoch": 0.04, + "learning_rate": 4.983653262111317e-05, + "loss": 0.851, + "step": 36517 + }, + { + "epoch": 0.04, + "learning_rate": 4.983652365325564e-05, + "loss": 1.3885, + "step": 36518 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836514685152945e-05, + "loss": 1.282, + "step": 36519 + }, + { + "epoch": 0.04, + "learning_rate": 4.983650571680507e-05, + "loss": 1.0518, + "step": 36520 + }, + { + "epoch": 0.04, + "learning_rate": 4.983649674821201e-05, + "loss": 0.7823, + "step": 36521 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836487779373786e-05, + "loss": 0.5314, + "step": 36522 + }, + { + "epoch": 0.04, + "learning_rate": 4.983647881029039e-05, + "loss": 0.5989, + "step": 36523 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836469840961806e-05, + "loss": 1.0711, + "step": 36524 + }, + { + "epoch": 0.04, + "learning_rate": 4.983646087138806e-05, + "loss": 1.4122, + "step": 36525 + }, + { + "epoch": 0.04, + "learning_rate": 4.983645190156913e-05, + "loss": 0.9583, + "step": 36526 + }, + { + "epoch": 0.04, + "learning_rate": 4.983644293150503e-05, + "loss": 0.7795, + "step": 36527 + }, + { + "epoch": 0.04, + "learning_rate": 4.983643396119575e-05, + "loss": 1.0108, + "step": 36528 + }, + { + "epoch": 0.04, + "learning_rate": 4.983642499064129e-05, + "loss": 0.87, + "step": 36529 + }, + { + "epoch": 0.04, + "learning_rate": 4.983641601984167e-05, + "loss": 1.055, + "step": 36530 + }, + { + "epoch": 0.04, + "learning_rate": 4.983640704879687e-05, + "loss": 1.1491, + "step": 36531 + }, + { + "epoch": 0.04, + "learning_rate": 4.983639807750689e-05, + "loss": 0.6437, + "step": 36532 + }, + { + "epoch": 0.04, + "learning_rate": 4.983638910597174e-05, + "loss": 1.0763, + "step": 36533 + }, + { + "epoch": 0.04, + "learning_rate": 4.983638013419142e-05, + "loss": 1.0292, + "step": 36534 + }, + { + "epoch": 0.04, + "learning_rate": 4.983637116216592e-05, + "loss": 1.0523, + "step": 36535 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836362189895245e-05, + "loss": 0.9211, + "step": 36536 + }, + { + "epoch": 0.04, + "learning_rate": 4.98363532173794e-05, + "loss": 0.9905, + "step": 36537 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836344244618375e-05, + "loss": 0.9577, + "step": 36538 + }, + { + "epoch": 0.04, + "learning_rate": 4.983633527161218e-05, + "loss": 0.8783, + "step": 36539 + }, + { + "epoch": 0.04, + "learning_rate": 4.983632629836082e-05, + "loss": 1.3294, + "step": 36540 + }, + { + "epoch": 0.04, + "learning_rate": 4.983631732486427e-05, + "loss": 0.9756, + "step": 36541 + }, + { + "epoch": 0.04, + "learning_rate": 4.983630835112255e-05, + "loss": 1.099, + "step": 36542 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836299377135667e-05, + "loss": 1.1711, + "step": 36543 + }, + { + "epoch": 0.04, + "learning_rate": 4.98362904029036e-05, + "loss": 0.8608, + "step": 36544 + }, + { + "epoch": 0.04, + "learning_rate": 4.983628142842636e-05, + "loss": 1.1201, + "step": 36545 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836272453703955e-05, + "loss": 1.1674, + "step": 36546 + }, + { + "epoch": 0.04, + "learning_rate": 4.983626347873637e-05, + "loss": 0.8879, + "step": 36547 + }, + { + "epoch": 0.04, + "learning_rate": 4.983625450352361e-05, + "loss": 1.0956, + "step": 36548 + }, + { + "epoch": 0.04, + "learning_rate": 4.983624552806568e-05, + "loss": 1.0042, + "step": 36549 + }, + { + "epoch": 0.04, + "learning_rate": 4.983623655236258e-05, + "loss": 1.1405, + "step": 36550 + }, + { + "epoch": 0.04, + "learning_rate": 4.98362275764143e-05, + "loss": 0.8372, + "step": 36551 + }, + { + "epoch": 0.04, + "learning_rate": 4.983621860022085e-05, + "loss": 1.0878, + "step": 36552 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836209623782235e-05, + "loss": 0.9614, + "step": 36553 + }, + { + "epoch": 0.04, + "learning_rate": 4.983620064709844e-05, + "loss": 0.7617, + "step": 36554 + }, + { + "epoch": 0.04, + "learning_rate": 4.983619167016948e-05, + "loss": 0.4594, + "step": 36555 + }, + { + "epoch": 0.04, + "learning_rate": 4.983618269299534e-05, + "loss": 0.7125, + "step": 36556 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836173715576024e-05, + "loss": 1.2592, + "step": 36557 + }, + { + "epoch": 0.04, + "learning_rate": 4.983616473791154e-05, + "loss": 0.8533, + "step": 36558 + }, + { + "epoch": 0.04, + "learning_rate": 4.983615576000188e-05, + "loss": 0.7256, + "step": 36559 + }, + { + "epoch": 0.04, + "learning_rate": 4.983614678184706e-05, + "loss": 0.6465, + "step": 36560 + }, + { + "epoch": 0.04, + "learning_rate": 4.983613780344706e-05, + "loss": 0.5999, + "step": 36561 + }, + { + "epoch": 0.04, + "learning_rate": 4.983612882480189e-05, + "loss": 0.8945, + "step": 36562 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836119845911544e-05, + "loss": 1.1469, + "step": 36563 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836110866776034e-05, + "loss": 1.3692, + "step": 36564 + }, + { + "epoch": 0.04, + "learning_rate": 4.983610188739534e-05, + "loss": 0.4626, + "step": 36565 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836092907769486e-05, + "loss": 0.3686, + "step": 36566 + }, + { + "epoch": 0.04, + "learning_rate": 4.983608392789846e-05, + "loss": 0.3258, + "step": 36567 + }, + { + "epoch": 0.04, + "learning_rate": 4.983607494778225e-05, + "loss": 1.0006, + "step": 36568 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836065967420886e-05, + "loss": 0.5486, + "step": 36569 + }, + { + "epoch": 0.04, + "learning_rate": 4.983605698681434e-05, + "loss": 1.0318, + "step": 36570 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836048005962624e-05, + "loss": 0.9734, + "step": 36571 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836039024865745e-05, + "loss": 0.7297, + "step": 36572 + }, + { + "epoch": 0.04, + "learning_rate": 4.983603004352369e-05, + "loss": 1.1062, + "step": 36573 + }, + { + "epoch": 0.04, + "learning_rate": 4.983602106193646e-05, + "loss": 0.8046, + "step": 36574 + }, + { + "epoch": 0.04, + "learning_rate": 4.983601208010406e-05, + "loss": 0.9771, + "step": 36575 + }, + { + "epoch": 0.04, + "learning_rate": 4.983600309802649e-05, + "loss": 0.9632, + "step": 36576 + }, + { + "epoch": 0.04, + "learning_rate": 4.983599411570376e-05, + "loss": 0.7491, + "step": 36577 + }, + { + "epoch": 0.04, + "learning_rate": 4.983598513313584e-05, + "loss": 1.4442, + "step": 36578 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835976150322764e-05, + "loss": 1.2506, + "step": 36579 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835967167264516e-05, + "loss": 1.0025, + "step": 36580 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835958183961096e-05, + "loss": 1.2585, + "step": 36581 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835949200412504e-05, + "loss": 1.1767, + "step": 36582 + }, + { + "epoch": 0.04, + "learning_rate": 4.983594021661874e-05, + "loss": 1.0739, + "step": 36583 + }, + { + "epoch": 0.04, + "learning_rate": 4.983593123257981e-05, + "loss": 1.0516, + "step": 36584 + }, + { + "epoch": 0.04, + "learning_rate": 4.983592224829571e-05, + "loss": 0.9998, + "step": 36585 + }, + { + "epoch": 0.04, + "learning_rate": 4.983591326376644e-05, + "loss": 1.1904, + "step": 36586 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835904278992005e-05, + "loss": 0.9756, + "step": 36587 + }, + { + "epoch": 0.04, + "learning_rate": 4.983589529397239e-05, + "loss": 1.3235, + "step": 36588 + }, + { + "epoch": 0.04, + "learning_rate": 4.983588630870761e-05, + "loss": 1.221, + "step": 36589 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835877323197664e-05, + "loss": 0.8106, + "step": 36590 + }, + { + "epoch": 0.04, + "learning_rate": 4.983586833744255e-05, + "loss": 1.0327, + "step": 36591 + }, + { + "epoch": 0.04, + "learning_rate": 4.983585935144226e-05, + "loss": 1.2364, + "step": 36592 + }, + { + "epoch": 0.04, + "learning_rate": 4.983585036519681e-05, + "loss": 1.0057, + "step": 36593 + }, + { + "epoch": 0.04, + "learning_rate": 4.983584137870618e-05, + "loss": 0.9911, + "step": 36594 + }, + { + "epoch": 0.04, + "learning_rate": 4.983583239197039e-05, + "loss": 1.1683, + "step": 36595 + }, + { + "epoch": 0.04, + "learning_rate": 4.983582340498942e-05, + "loss": 1.255, + "step": 36596 + }, + { + "epoch": 0.04, + "learning_rate": 4.98358144177633e-05, + "loss": 1.1747, + "step": 36597 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835805430291994e-05, + "loss": 0.7462, + "step": 36598 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835796442575525e-05, + "loss": 0.898, + "step": 36599 + }, + { + "epoch": 0.04, + "learning_rate": 4.983578745461389e-05, + "loss": 0.9625, + "step": 36600 + }, + { + "epoch": 0.04, + "learning_rate": 4.983577846640708e-05, + "loss": 0.8622, + "step": 36601 + }, + { + "epoch": 0.04, + "learning_rate": 4.983576947795511e-05, + "loss": 1.0915, + "step": 36602 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835760489257966e-05, + "loss": 0.2939, + "step": 36603 + }, + { + "epoch": 0.04, + "learning_rate": 4.983575150031565e-05, + "loss": 0.2163, + "step": 36604 + }, + { + "epoch": 0.04, + "learning_rate": 4.983574251112818e-05, + "loss": 0.1621, + "step": 36605 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835733521695525e-05, + "loss": 0.1711, + "step": 36606 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835724532017715e-05, + "loss": 0.1762, + "step": 36607 + }, + { + "epoch": 0.04, + "learning_rate": 4.983571554209473e-05, + "loss": 0.1318, + "step": 36608 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835706551926585e-05, + "loss": 0.1487, + "step": 36609 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835697561513265e-05, + "loss": 0.1226, + "step": 36610 + }, + { + "epoch": 0.04, + "learning_rate": 4.983568857085478e-05, + "loss": 0.0732, + "step": 36611 + }, + { + "epoch": 0.04, + "learning_rate": 4.983567957995113e-05, + "loss": 0.1173, + "step": 36612 + }, + { + "epoch": 0.04, + "learning_rate": 4.983567058880231e-05, + "loss": 0.1369, + "step": 36613 + }, + { + "epoch": 0.04, + "learning_rate": 4.983566159740832e-05, + "loss": 0.142, + "step": 36614 + }, + { + "epoch": 0.04, + "learning_rate": 4.983565260576917e-05, + "loss": 0.1397, + "step": 36615 + }, + { + "epoch": 0.04, + "learning_rate": 4.983564361388485e-05, + "loss": 0.1351, + "step": 36616 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835634621755354e-05, + "loss": 0.1188, + "step": 36617 + }, + { + "epoch": 0.04, + "learning_rate": 4.98356256293807e-05, + "loss": 0.1446, + "step": 36618 + }, + { + "epoch": 0.04, + "learning_rate": 4.983561663676088e-05, + "loss": 0.1566, + "step": 36619 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835607643895887e-05, + "loss": 0.094, + "step": 36620 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835598650785736e-05, + "loss": 0.0982, + "step": 36621 + }, + { + "epoch": 0.04, + "learning_rate": 4.983558965743041e-05, + "loss": 0.1192, + "step": 36622 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835580663829925e-05, + "loss": 0.1242, + "step": 36623 + }, + { + "epoch": 0.04, + "learning_rate": 4.983557166998427e-05, + "loss": 0.1149, + "step": 36624 + }, + { + "epoch": 0.04, + "learning_rate": 4.983556267589345e-05, + "loss": 0.1604, + "step": 36625 + }, + { + "epoch": 0.04, + "learning_rate": 4.983555368155746e-05, + "loss": 0.0998, + "step": 36626 + }, + { + "epoch": 0.04, + "learning_rate": 4.983554468697631e-05, + "loss": 0.1117, + "step": 36627 + }, + { + "epoch": 0.04, + "learning_rate": 4.983553569214999e-05, + "loss": 0.1402, + "step": 36628 + }, + { + "epoch": 0.04, + "learning_rate": 4.98355266970785e-05, + "loss": 0.1217, + "step": 36629 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835517701761844e-05, + "loss": 0.103, + "step": 36630 + }, + { + "epoch": 0.04, + "learning_rate": 4.983550870620003e-05, + "loss": 0.1668, + "step": 36631 + }, + { + "epoch": 0.04, + "learning_rate": 4.983549971039305e-05, + "loss": 0.1757, + "step": 36632 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835490714340906e-05, + "loss": 0.2533, + "step": 36633 + }, + { + "epoch": 0.04, + "learning_rate": 4.983548171804359e-05, + "loss": 1.0659, + "step": 36634 + }, + { + "epoch": 0.04, + "learning_rate": 4.983547272150111e-05, + "loss": 0.9119, + "step": 36635 + }, + { + "epoch": 0.04, + "learning_rate": 4.983546372471346e-05, + "loss": 0.9229, + "step": 36636 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835454727680653e-05, + "loss": 1.197, + "step": 36637 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835445730402674e-05, + "loss": 0.9739, + "step": 36638 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835436732879536e-05, + "loss": 1.2517, + "step": 36639 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835427735111226e-05, + "loss": 1.0737, + "step": 36640 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835418737097764e-05, + "loss": 1.0102, + "step": 36641 + }, + { + "epoch": 0.04, + "learning_rate": 4.983540973883913e-05, + "loss": 0.8781, + "step": 36642 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835400740335325e-05, + "loss": 0.8746, + "step": 36643 + }, + { + "epoch": 0.04, + "learning_rate": 4.983539174158637e-05, + "loss": 0.7693, + "step": 36644 + }, + { + "epoch": 0.04, + "learning_rate": 4.983538274259224e-05, + "loss": 1.1705, + "step": 36645 + }, + { + "epoch": 0.04, + "learning_rate": 4.983537374335294e-05, + "loss": 1.1494, + "step": 36646 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835364743868484e-05, + "loss": 0.9667, + "step": 36647 + }, + { + "epoch": 0.04, + "learning_rate": 4.983535574413886e-05, + "loss": 0.7656, + "step": 36648 + }, + { + "epoch": 0.04, + "learning_rate": 4.983534674416408e-05, + "loss": 1.0816, + "step": 36649 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835337743944126e-05, + "loss": 1.1325, + "step": 36650 + }, + { + "epoch": 0.04, + "learning_rate": 4.983532874347901e-05, + "loss": 1.318, + "step": 36651 + }, + { + "epoch": 0.04, + "learning_rate": 4.983531974276874e-05, + "loss": 1.1953, + "step": 36652 + }, + { + "epoch": 0.04, + "learning_rate": 4.983531074181329e-05, + "loss": 1.1058, + "step": 36653 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835301740612686e-05, + "loss": 0.7405, + "step": 36654 + }, + { + "epoch": 0.04, + "learning_rate": 4.983529273916692e-05, + "loss": 0.8232, + "step": 36655 + }, + { + "epoch": 0.04, + "learning_rate": 4.983528373747599e-05, + "loss": 1.0461, + "step": 36656 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835274735539894e-05, + "loss": 0.9282, + "step": 36657 + }, + { + "epoch": 0.04, + "learning_rate": 4.983526573335863e-05, + "loss": 1.0718, + "step": 36658 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835256730932215e-05, + "loss": 1.1952, + "step": 36659 + }, + { + "epoch": 0.04, + "learning_rate": 4.983524772826063e-05, + "loss": 0.9948, + "step": 36660 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835238725343875e-05, + "loss": 0.9008, + "step": 36661 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835229722181964e-05, + "loss": 1.1112, + "step": 36662 + }, + { + "epoch": 0.04, + "learning_rate": 4.983522071877489e-05, + "loss": 0.8711, + "step": 36663 + }, + { + "epoch": 0.04, + "learning_rate": 4.983521171512265e-05, + "loss": 0.968, + "step": 36664 + }, + { + "epoch": 0.04, + "learning_rate": 4.983520271122525e-05, + "loss": 1.0797, + "step": 36665 + }, + { + "epoch": 0.04, + "learning_rate": 4.983519370708268e-05, + "loss": 1.3646, + "step": 36666 + }, + { + "epoch": 0.04, + "learning_rate": 4.983518470269496e-05, + "loss": 0.5502, + "step": 36667 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835175698062076e-05, + "loss": 0.9814, + "step": 36668 + }, + { + "epoch": 0.04, + "learning_rate": 4.983516669318402e-05, + "loss": 0.9783, + "step": 36669 + }, + { + "epoch": 0.04, + "learning_rate": 4.983515768806081e-05, + "loss": 1.0796, + "step": 36670 + }, + { + "epoch": 0.04, + "learning_rate": 4.983514868269243e-05, + "loss": 0.8082, + "step": 36671 + }, + { + "epoch": 0.04, + "learning_rate": 4.98351396770789e-05, + "loss": 0.8245, + "step": 36672 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835130671220195e-05, + "loss": 1.0604, + "step": 36673 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835121665116336e-05, + "loss": 0.8059, + "step": 36674 + }, + { + "epoch": 0.04, + "learning_rate": 4.983511265876731e-05, + "loss": 0.5609, + "step": 36675 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835103652173134e-05, + "loss": 0.5552, + "step": 36676 + }, + { + "epoch": 0.04, + "learning_rate": 4.983509464533378e-05, + "loss": 1.3113, + "step": 36677 + }, + { + "epoch": 0.04, + "learning_rate": 4.983508563824927e-05, + "loss": 1.4318, + "step": 36678 + }, + { + "epoch": 0.04, + "learning_rate": 4.983507663091961e-05, + "loss": 1.0463, + "step": 36679 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835067623344776e-05, + "loss": 0.9647, + "step": 36680 + }, + { + "epoch": 0.04, + "learning_rate": 4.983505861552478e-05, + "loss": 0.7846, + "step": 36681 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835049607459626e-05, + "loss": 0.7959, + "step": 36682 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835040599149313e-05, + "loss": 0.6848, + "step": 36683 + }, + { + "epoch": 0.04, + "learning_rate": 4.983503159059384e-05, + "loss": 0.7586, + "step": 36684 + }, + { + "epoch": 0.04, + "learning_rate": 4.98350225817932e-05, + "loss": 1.0414, + "step": 36685 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835013572747404e-05, + "loss": 1.1165, + "step": 36686 + }, + { + "epoch": 0.04, + "learning_rate": 4.9835004563456444e-05, + "loss": 0.8255, + "step": 36687 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834995553920326e-05, + "loss": 0.9242, + "step": 36688 + }, + { + "epoch": 0.04, + "learning_rate": 4.983498654413905e-05, + "loss": 0.6501, + "step": 36689 + }, + { + "epoch": 0.04, + "learning_rate": 4.983497753411261e-05, + "loss": 1.0977, + "step": 36690 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834968523841006e-05, + "loss": 0.9076, + "step": 36691 + }, + { + "epoch": 0.04, + "learning_rate": 4.983495951332424e-05, + "loss": 1.1035, + "step": 36692 + }, + { + "epoch": 0.04, + "learning_rate": 4.983495050256233e-05, + "loss": 0.8694, + "step": 36693 + }, + { + "epoch": 0.04, + "learning_rate": 4.983494149155524e-05, + "loss": 0.9792, + "step": 36694 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834932480303e-05, + "loss": 0.9852, + "step": 36695 + }, + { + "epoch": 0.04, + "learning_rate": 4.98349234688056e-05, + "loss": 1.1988, + "step": 36696 + }, + { + "epoch": 0.04, + "learning_rate": 4.983491445706304e-05, + "loss": 1.1708, + "step": 36697 + }, + { + "epoch": 0.04, + "learning_rate": 4.983490544507532e-05, + "loss": 1.2852, + "step": 36698 + }, + { + "epoch": 0.04, + "learning_rate": 4.983489643284244e-05, + "loss": 1.1419, + "step": 36699 + }, + { + "epoch": 0.04, + "learning_rate": 4.98348874203644e-05, + "loss": 1.1139, + "step": 36700 + }, + { + "epoch": 0.04, + "learning_rate": 4.983487840764119e-05, + "loss": 1.1473, + "step": 36701 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834869394672835e-05, + "loss": 1.074, + "step": 36702 + }, + { + "epoch": 0.04, + "learning_rate": 4.983486038145931e-05, + "loss": 0.9082, + "step": 36703 + }, + { + "epoch": 0.04, + "learning_rate": 4.983485136800064e-05, + "loss": 0.9913, + "step": 36704 + }, + { + "epoch": 0.04, + "learning_rate": 4.98348423542968e-05, + "loss": 1.2458, + "step": 36705 + }, + { + "epoch": 0.04, + "learning_rate": 4.983483334034781e-05, + "loss": 1.1115, + "step": 36706 + }, + { + "epoch": 0.04, + "learning_rate": 4.983482432615365e-05, + "loss": 1.083, + "step": 36707 + }, + { + "epoch": 0.04, + "learning_rate": 4.983481531171433e-05, + "loss": 1.0694, + "step": 36708 + }, + { + "epoch": 0.04, + "learning_rate": 4.983480629702986e-05, + "loss": 1.1429, + "step": 36709 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834797282100235e-05, + "loss": 1.0356, + "step": 36710 + }, + { + "epoch": 0.04, + "learning_rate": 4.983478826692544e-05, + "loss": 1.1572, + "step": 36711 + }, + { + "epoch": 0.04, + "learning_rate": 4.983477925150549e-05, + "loss": 0.7691, + "step": 36712 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834770235840384e-05, + "loss": 0.9285, + "step": 36713 + }, + { + "epoch": 0.04, + "learning_rate": 4.983476121993011e-05, + "loss": 1.139, + "step": 36714 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834752203774695e-05, + "loss": 1.2964, + "step": 36715 + }, + { + "epoch": 0.04, + "learning_rate": 4.983474318737411e-05, + "loss": 1.0772, + "step": 36716 + }, + { + "epoch": 0.04, + "learning_rate": 4.983473417072837e-05, + "loss": 1.051, + "step": 36717 + }, + { + "epoch": 0.04, + "learning_rate": 4.983472515383747e-05, + "loss": 1.0587, + "step": 36718 + }, + { + "epoch": 0.04, + "learning_rate": 4.983471613670142e-05, + "loss": 0.8612, + "step": 36719 + }, + { + "epoch": 0.04, + "learning_rate": 4.98347071193202e-05, + "loss": 0.9058, + "step": 36720 + }, + { + "epoch": 0.04, + "learning_rate": 4.983469810169383e-05, + "loss": 0.8373, + "step": 36721 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834689083822295e-05, + "loss": 0.8507, + "step": 36722 + }, + { + "epoch": 0.04, + "learning_rate": 4.983468006570561e-05, + "loss": 0.9668, + "step": 36723 + }, + { + "epoch": 0.04, + "learning_rate": 4.983467104734376e-05, + "loss": 0.8768, + "step": 36724 + }, + { + "epoch": 0.04, + "learning_rate": 4.983466202873677e-05, + "loss": 1.0059, + "step": 36725 + }, + { + "epoch": 0.04, + "learning_rate": 4.98346530098846e-05, + "loss": 0.9779, + "step": 36726 + }, + { + "epoch": 0.04, + "learning_rate": 4.983464399078729e-05, + "loss": 1.1633, + "step": 36727 + }, + { + "epoch": 0.04, + "learning_rate": 4.983463497144481e-05, + "loss": 1.1685, + "step": 36728 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834625951857185e-05, + "loss": 1.1379, + "step": 36729 + }, + { + "epoch": 0.04, + "learning_rate": 4.983461693202439e-05, + "loss": 1.0121, + "step": 36730 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834607911946454e-05, + "loss": 1.3027, + "step": 36731 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834598891623343e-05, + "loss": 1.0348, + "step": 36732 + }, + { + "epoch": 0.04, + "learning_rate": 4.983458987105509e-05, + "loss": 0.8856, + "step": 36733 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834580850241675e-05, + "loss": 1.0344, + "step": 36734 + }, + { + "epoch": 0.04, + "learning_rate": 4.98345718291831e-05, + "loss": 1.0276, + "step": 36735 + }, + { + "epoch": 0.04, + "learning_rate": 4.983456280787937e-05, + "loss": 0.8174, + "step": 36736 + }, + { + "epoch": 0.04, + "learning_rate": 4.983455378633049e-05, + "loss": 0.6851, + "step": 36737 + }, + { + "epoch": 0.04, + "learning_rate": 4.983454476453645e-05, + "loss": 0.9232, + "step": 36738 + }, + { + "epoch": 0.04, + "learning_rate": 4.983453574249725e-05, + "loss": 0.9866, + "step": 36739 + }, + { + "epoch": 0.04, + "learning_rate": 4.98345267202129e-05, + "loss": 1.1603, + "step": 36740 + }, + { + "epoch": 0.04, + "learning_rate": 4.983451769768339e-05, + "loss": 0.9041, + "step": 36741 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834508674908724e-05, + "loss": 0.9886, + "step": 36742 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834499651888905e-05, + "loss": 1.3068, + "step": 36743 + }, + { + "epoch": 0.04, + "learning_rate": 4.983449062862393e-05, + "loss": 1.3399, + "step": 36744 + }, + { + "epoch": 0.04, + "learning_rate": 4.98344816051138e-05, + "loss": 1.4593, + "step": 36745 + }, + { + "epoch": 0.04, + "learning_rate": 4.983447258135851e-05, + "loss": 0.9628, + "step": 36746 + }, + { + "epoch": 0.04, + "learning_rate": 4.983446355735807e-05, + "loss": 1.1232, + "step": 36747 + }, + { + "epoch": 0.04, + "learning_rate": 4.983445453311247e-05, + "loss": 1.0046, + "step": 36748 + }, + { + "epoch": 0.04, + "learning_rate": 4.983444550862172e-05, + "loss": 1.1928, + "step": 36749 + }, + { + "epoch": 0.04, + "learning_rate": 4.98344364838858e-05, + "loss": 1.1138, + "step": 36750 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834427458904743e-05, + "loss": 0.5842, + "step": 36751 + }, + { + "epoch": 0.04, + "learning_rate": 4.983441843367852e-05, + "loss": 1.3033, + "step": 36752 + }, + { + "epoch": 0.04, + "learning_rate": 4.983440940820715e-05, + "loss": 0.8558, + "step": 36753 + }, + { + "epoch": 0.04, + "learning_rate": 4.983440038249062e-05, + "loss": 0.8939, + "step": 36754 + }, + { + "epoch": 0.04, + "learning_rate": 4.983439135652894e-05, + "loss": 0.7753, + "step": 36755 + }, + { + "epoch": 0.04, + "learning_rate": 4.98343823303221e-05, + "loss": 1.3038, + "step": 36756 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834373303870106e-05, + "loss": 1.045, + "step": 36757 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834364277172965e-05, + "loss": 0.8739, + "step": 36758 + }, + { + "epoch": 0.04, + "learning_rate": 4.983435525023066e-05, + "loss": 0.7775, + "step": 36759 + }, + { + "epoch": 0.04, + "learning_rate": 4.983434622304321e-05, + "loss": 1.0601, + "step": 36760 + }, + { + "epoch": 0.04, + "learning_rate": 4.983433719561059e-05, + "loss": 0.8737, + "step": 36761 + }, + { + "epoch": 0.04, + "learning_rate": 4.983432816793283e-05, + "loss": 0.952, + "step": 36762 + }, + { + "epoch": 0.04, + "learning_rate": 4.983431914000991e-05, + "loss": 0.2886, + "step": 36763 + }, + { + "epoch": 0.04, + "learning_rate": 4.983431011184184e-05, + "loss": 0.0911, + "step": 36764 + }, + { + "epoch": 0.04, + "learning_rate": 4.983430108342861e-05, + "loss": 0.7786, + "step": 36765 + }, + { + "epoch": 0.04, + "learning_rate": 4.983429205477024e-05, + "loss": 1.0432, + "step": 36766 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834283025866704e-05, + "loss": 0.9589, + "step": 36767 + }, + { + "epoch": 0.04, + "learning_rate": 4.983427399671802e-05, + "loss": 1.1058, + "step": 36768 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834264967324175e-05, + "loss": 1.0817, + "step": 36769 + }, + { + "epoch": 0.04, + "learning_rate": 4.983425593768518e-05, + "loss": 0.8909, + "step": 36770 + }, + { + "epoch": 0.04, + "learning_rate": 4.983424690780103e-05, + "loss": 1.3273, + "step": 36771 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834237877671734e-05, + "loss": 1.2326, + "step": 36772 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834228847297284e-05, + "loss": 0.8338, + "step": 36773 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834219816677675e-05, + "loss": 0.899, + "step": 36774 + }, + { + "epoch": 0.04, + "learning_rate": 4.983421078581292e-05, + "loss": 1.1268, + "step": 36775 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834201754703e-05, + "loss": 0.979, + "step": 36776 + }, + { + "epoch": 0.04, + "learning_rate": 4.983419272334794e-05, + "loss": 0.8565, + "step": 36777 + }, + { + "epoch": 0.04, + "learning_rate": 4.983418369174772e-05, + "loss": 0.8901, + "step": 36778 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834174659902356e-05, + "loss": 0.9932, + "step": 36779 + }, + { + "epoch": 0.04, + "learning_rate": 4.983416562781183e-05, + "loss": 0.8369, + "step": 36780 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834156595476154e-05, + "loss": 0.9724, + "step": 36781 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834147562895325e-05, + "loss": 1.4079, + "step": 36782 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834138530069344e-05, + "loss": 1.0093, + "step": 36783 + }, + { + "epoch": 0.04, + "learning_rate": 4.983412949699822e-05, + "loss": 1.0201, + "step": 36784 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834120463681935e-05, + "loss": 1.2304, + "step": 36785 + }, + { + "epoch": 0.04, + "learning_rate": 4.983411143012049e-05, + "loss": 0.9665, + "step": 36786 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834102396313905e-05, + "loss": 1.4841, + "step": 36787 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834093362262167e-05, + "loss": 1.146, + "step": 36788 + }, + { + "epoch": 0.04, + "learning_rate": 4.983408432796528e-05, + "loss": 0.9318, + "step": 36789 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834075293423234e-05, + "loss": 1.0801, + "step": 36790 + }, + { + "epoch": 0.04, + "learning_rate": 4.983406625863604e-05, + "loss": 1.1343, + "step": 36791 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834057223603695e-05, + "loss": 1.0616, + "step": 36792 + }, + { + "epoch": 0.04, + "learning_rate": 4.98340481883262e-05, + "loss": 1.2639, + "step": 36793 + }, + { + "epoch": 0.04, + "learning_rate": 4.983403915280355e-05, + "loss": 1.0968, + "step": 36794 + }, + { + "epoch": 0.04, + "learning_rate": 4.983403011703575e-05, + "loss": 0.9372, + "step": 36795 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834021081022796e-05, + "loss": 1.0039, + "step": 36796 + }, + { + "epoch": 0.04, + "learning_rate": 4.98340120447647e-05, + "loss": 0.3631, + "step": 36797 + }, + { + "epoch": 0.04, + "learning_rate": 4.9834003008261443e-05, + "loss": 0.3807, + "step": 36798 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833993971513036e-05, + "loss": 0.9153, + "step": 36799 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833984934519484e-05, + "loss": 1.0009, + "step": 36800 + }, + { + "epoch": 0.04, + "learning_rate": 4.983397589728078e-05, + "loss": 0.9898, + "step": 36801 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833966859796925e-05, + "loss": 1.1514, + "step": 36802 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833957822067925e-05, + "loss": 1.0338, + "step": 36803 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833948784093766e-05, + "loss": 1.3844, + "step": 36804 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833939745874456e-05, + "loss": 1.1031, + "step": 36805 + }, + { + "epoch": 0.04, + "learning_rate": 4.983393070741e-05, + "loss": 1.0982, + "step": 36806 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833921668700394e-05, + "loss": 1.0535, + "step": 36807 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833912629745635e-05, + "loss": 1.204, + "step": 36808 + }, + { + "epoch": 0.04, + "learning_rate": 4.983390359054573e-05, + "loss": 1.0828, + "step": 36809 + }, + { + "epoch": 0.04, + "learning_rate": 4.983389455110068e-05, + "loss": 1.0073, + "step": 36810 + }, + { + "epoch": 0.04, + "learning_rate": 4.983388551141047e-05, + "loss": 1.033, + "step": 36811 + }, + { + "epoch": 0.04, + "learning_rate": 4.983387647147512e-05, + "loss": 1.4636, + "step": 36812 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833867431294615e-05, + "loss": 1.0949, + "step": 36813 + }, + { + "epoch": 0.04, + "learning_rate": 4.983385839086895e-05, + "loss": 1.151, + "step": 36814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833849350198147e-05, + "loss": 1.1209, + "step": 36815 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833840309282195e-05, + "loss": 1.1683, + "step": 36816 + }, + { + "epoch": 0.04, + "learning_rate": 4.983383126812109e-05, + "loss": 0.6928, + "step": 36817 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833822226714844e-05, + "loss": 0.7399, + "step": 36818 + }, + { + "epoch": 0.04, + "learning_rate": 4.983381318506344e-05, + "loss": 1.0921, + "step": 36819 + }, + { + "epoch": 0.04, + "learning_rate": 4.983380414316689e-05, + "loss": 0.9695, + "step": 36820 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833795101025196e-05, + "loss": 1.1091, + "step": 36821 + }, + { + "epoch": 0.04, + "learning_rate": 4.983378605863834e-05, + "loss": 1.1981, + "step": 36822 + }, + { + "epoch": 0.04, + "learning_rate": 4.983377701600635e-05, + "loss": 0.8559, + "step": 36823 + }, + { + "epoch": 0.04, + "learning_rate": 4.983376797312921e-05, + "loss": 1.0491, + "step": 36824 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833758930006915e-05, + "loss": 1.1326, + "step": 36825 + }, + { + "epoch": 0.04, + "learning_rate": 4.983374988663947e-05, + "loss": 0.9639, + "step": 36826 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833740843026874e-05, + "loss": 1.2676, + "step": 36827 + }, + { + "epoch": 0.04, + "learning_rate": 4.983373179916914e-05, + "loss": 0.9993, + "step": 36828 + }, + { + "epoch": 0.04, + "learning_rate": 4.983372275506626e-05, + "loss": 0.6418, + "step": 36829 + }, + { + "epoch": 0.04, + "learning_rate": 4.983371371071822e-05, + "loss": 1.2203, + "step": 36830 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833704666125034e-05, + "loss": 0.9394, + "step": 36831 + }, + { + "epoch": 0.04, + "learning_rate": 4.983369562128671e-05, + "loss": 1.1914, + "step": 36832 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833686576203234e-05, + "loss": 1.031, + "step": 36833 + }, + { + "epoch": 0.04, + "learning_rate": 4.983367753087461e-05, + "loss": 1.0077, + "step": 36834 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833668485300835e-05, + "loss": 1.0008, + "step": 36835 + }, + { + "epoch": 0.04, + "learning_rate": 4.983365943948191e-05, + "loss": 0.9195, + "step": 36836 + }, + { + "epoch": 0.04, + "learning_rate": 4.983365039341784e-05, + "loss": 1.0188, + "step": 36837 + }, + { + "epoch": 0.04, + "learning_rate": 4.983364134710863e-05, + "loss": 0.9124, + "step": 36838 + }, + { + "epoch": 0.04, + "learning_rate": 4.983363230055427e-05, + "loss": 0.8233, + "step": 36839 + }, + { + "epoch": 0.04, + "learning_rate": 4.983362325375476e-05, + "loss": 0.9263, + "step": 36840 + }, + { + "epoch": 0.04, + "learning_rate": 4.98336142067101e-05, + "loss": 1.1354, + "step": 36841 + }, + { + "epoch": 0.04, + "learning_rate": 4.983360515942029e-05, + "loss": 1.0681, + "step": 36842 + }, + { + "epoch": 0.04, + "learning_rate": 4.983359611188535e-05, + "loss": 1.1032, + "step": 36843 + }, + { + "epoch": 0.04, + "learning_rate": 4.983358706410525e-05, + "loss": 1.4875, + "step": 36844 + }, + { + "epoch": 0.04, + "learning_rate": 4.983357801608e-05, + "loss": 1.8018, + "step": 36845 + }, + { + "epoch": 0.04, + "learning_rate": 4.983356896780961e-05, + "loss": 0.6858, + "step": 36846 + }, + { + "epoch": 0.04, + "learning_rate": 4.983355991929407e-05, + "loss": 1.0381, + "step": 36847 + }, + { + "epoch": 0.04, + "learning_rate": 4.983355087053339e-05, + "loss": 1.0104, + "step": 36848 + }, + { + "epoch": 0.04, + "learning_rate": 4.983354182152756e-05, + "loss": 0.9331, + "step": 36849 + }, + { + "epoch": 0.04, + "learning_rate": 4.983353277227658e-05, + "loss": 0.6825, + "step": 36850 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833523722780464e-05, + "loss": 1.1063, + "step": 36851 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833514673039195e-05, + "loss": 4.6928, + "step": 36852 + }, + { + "epoch": 0.04, + "learning_rate": 4.983350562305278e-05, + "loss": 5.9102, + "step": 36853 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833496572821215e-05, + "loss": 5.8578, + "step": 36854 + }, + { + "epoch": 0.04, + "learning_rate": 4.983348752234451e-05, + "loss": 3.2318, + "step": 36855 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833478471622656e-05, + "loss": 1.2916, + "step": 36856 + }, + { + "epoch": 0.04, + "learning_rate": 4.983346942065566e-05, + "loss": 1.0646, + "step": 36857 + }, + { + "epoch": 0.04, + "learning_rate": 4.983346036944352e-05, + "loss": 0.6109, + "step": 36858 + }, + { + "epoch": 0.04, + "learning_rate": 4.983345131798623e-05, + "loss": 0.9308, + "step": 36859 + }, + { + "epoch": 0.04, + "learning_rate": 4.983344226628379e-05, + "loss": 1.3508, + "step": 36860 + }, + { + "epoch": 0.04, + "learning_rate": 4.983343321433621e-05, + "loss": 0.9147, + "step": 36861 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833424162143494e-05, + "loss": 0.8364, + "step": 36862 + }, + { + "epoch": 0.04, + "learning_rate": 4.983341510970562e-05, + "loss": 0.9622, + "step": 36863 + }, + { + "epoch": 0.04, + "learning_rate": 4.98334060570226e-05, + "loss": 1.1907, + "step": 36864 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833397004094444e-05, + "loss": 1.0463, + "step": 36865 + }, + { + "epoch": 0.04, + "learning_rate": 4.983338795092114e-05, + "loss": 1.2072, + "step": 36866 + }, + { + "epoch": 0.04, + "learning_rate": 4.983337889750269e-05, + "loss": 0.9537, + "step": 36867 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833369843839096e-05, + "loss": 0.867, + "step": 36868 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833360789930364e-05, + "loss": 1.0726, + "step": 36869 + }, + { + "epoch": 0.04, + "learning_rate": 4.983335173577648e-05, + "loss": 1.4943, + "step": 36870 + }, + { + "epoch": 0.04, + "learning_rate": 4.983334268137745e-05, + "loss": 1.1519, + "step": 36871 + }, + { + "epoch": 0.04, + "learning_rate": 4.983333362673328e-05, + "loss": 1.2276, + "step": 36872 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833324571843966e-05, + "loss": 0.9812, + "step": 36873 + }, + { + "epoch": 0.04, + "learning_rate": 4.98333155167095e-05, + "loss": 0.6455, + "step": 36874 + }, + { + "epoch": 0.04, + "learning_rate": 4.98333064613299e-05, + "loss": 1.1467, + "step": 36875 + }, + { + "epoch": 0.04, + "learning_rate": 4.983329740570515e-05, + "loss": 1.1002, + "step": 36876 + }, + { + "epoch": 0.04, + "learning_rate": 4.983328834983526e-05, + "loss": 1.1585, + "step": 36877 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833279293720227e-05, + "loss": 1.4414, + "step": 36878 + }, + { + "epoch": 0.04, + "learning_rate": 4.983327023736004e-05, + "loss": 1.0959, + "step": 36879 + }, + { + "epoch": 0.04, + "learning_rate": 4.983326118075472e-05, + "loss": 1.1214, + "step": 36880 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833252123904255e-05, + "loss": 1.2223, + "step": 36881 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833243066808646e-05, + "loss": 1.3191, + "step": 36882 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833234009467886e-05, + "loss": 1.0163, + "step": 36883 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833224951881994e-05, + "loss": 0.8561, + "step": 36884 + }, + { + "epoch": 0.04, + "learning_rate": 4.983321589405096e-05, + "loss": 0.7964, + "step": 36885 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833206835974775e-05, + "loss": 0.8973, + "step": 36886 + }, + { + "epoch": 0.04, + "learning_rate": 4.983319777765345e-05, + "loss": 0.5044, + "step": 36887 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833188719086984e-05, + "loss": 1.0611, + "step": 36888 + }, + { + "epoch": 0.04, + "learning_rate": 4.983317966027537e-05, + "loss": 1.3026, + "step": 36889 + }, + { + "epoch": 0.04, + "learning_rate": 4.983317060121861e-05, + "loss": 1.4119, + "step": 36890 + }, + { + "epoch": 0.04, + "learning_rate": 4.983316154191672e-05, + "loss": 1.0772, + "step": 36891 + }, + { + "epoch": 0.04, + "learning_rate": 4.983315248236968e-05, + "loss": 1.1458, + "step": 36892 + }, + { + "epoch": 0.04, + "learning_rate": 4.98331434225775e-05, + "loss": 1.1477, + "step": 36893 + }, + { + "epoch": 0.04, + "learning_rate": 4.983313436254018e-05, + "loss": 0.8542, + "step": 36894 + }, + { + "epoch": 0.04, + "learning_rate": 4.983312530225771e-05, + "loss": 1.92, + "step": 36895 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833116241730104e-05, + "loss": 1.778, + "step": 36896 + }, + { + "epoch": 0.04, + "learning_rate": 4.983310718095735e-05, + "loss": 1.0704, + "step": 36897 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833098119939456e-05, + "loss": 0.7534, + "step": 36898 + }, + { + "epoch": 0.04, + "learning_rate": 4.983308905867643e-05, + "loss": 0.7985, + "step": 36899 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833079997168255e-05, + "loss": 0.663, + "step": 36900 + }, + { + "epoch": 0.04, + "learning_rate": 4.983307093541494e-05, + "loss": 0.6427, + "step": 36901 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833061873416474e-05, + "loss": 0.5918, + "step": 36902 + }, + { + "epoch": 0.04, + "learning_rate": 4.983305281117288e-05, + "loss": 0.5693, + "step": 36903 + }, + { + "epoch": 0.04, + "learning_rate": 4.983304374868414e-05, + "loss": 0.5037, + "step": 36904 + }, + { + "epoch": 0.04, + "learning_rate": 4.9833034685950254e-05, + "loss": 1.0081, + "step": 36905 + }, + { + "epoch": 0.04, + "learning_rate": 4.983302562297123e-05, + "loss": 1.3745, + "step": 36906 + }, + { + "epoch": 0.04, + "learning_rate": 4.983301655974706e-05, + "loss": 0.9751, + "step": 36907 + }, + { + "epoch": 0.04, + "learning_rate": 4.983300749627775e-05, + "loss": 1.1809, + "step": 36908 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832998432563304e-05, + "loss": 1.0226, + "step": 36909 + }, + { + "epoch": 0.04, + "learning_rate": 4.983298936860372e-05, + "loss": 1.1364, + "step": 36910 + }, + { + "epoch": 0.04, + "learning_rate": 4.983298030439899e-05, + "loss": 1.1925, + "step": 36911 + }, + { + "epoch": 0.04, + "learning_rate": 4.983297123994912e-05, + "loss": 1.3589, + "step": 36912 + }, + { + "epoch": 0.04, + "learning_rate": 4.98329621752541e-05, + "loss": 1.0044, + "step": 36913 + }, + { + "epoch": 0.04, + "learning_rate": 4.983295311031396e-05, + "loss": 0.6888, + "step": 36914 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832944045128664e-05, + "loss": 1.6713, + "step": 36915 + }, + { + "epoch": 0.04, + "learning_rate": 4.983293497969823e-05, + "loss": 1.2617, + "step": 36916 + }, + { + "epoch": 0.04, + "learning_rate": 4.983292591402266e-05, + "loss": 0.9379, + "step": 36917 + }, + { + "epoch": 0.04, + "learning_rate": 4.983291684810195e-05, + "loss": 0.8377, + "step": 36918 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832907781936095e-05, + "loss": 0.8497, + "step": 36919 + }, + { + "epoch": 0.04, + "learning_rate": 4.98328987155251e-05, + "loss": 0.6882, + "step": 36920 + }, + { + "epoch": 0.04, + "learning_rate": 4.983288964886898e-05, + "loss": 0.8182, + "step": 36921 + }, + { + "epoch": 0.04, + "learning_rate": 4.98328805819677e-05, + "loss": 0.9593, + "step": 36922 + }, + { + "epoch": 0.04, + "learning_rate": 4.983287151482129e-05, + "loss": 1.0801, + "step": 36923 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832862447429736e-05, + "loss": 1.3052, + "step": 36924 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832853379793046e-05, + "loss": 0.9283, + "step": 36925 + }, + { + "epoch": 0.04, + "learning_rate": 4.983284431191121e-05, + "loss": 1.0073, + "step": 36926 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832835243784245e-05, + "loss": 0.9043, + "step": 36927 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832826175412134e-05, + "loss": 0.8761, + "step": 36928 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832817106794885e-05, + "loss": 0.7138, + "step": 36929 + }, + { + "epoch": 0.04, + "learning_rate": 4.98328080379325e-05, + "loss": 1.1774, + "step": 36930 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832798968824965e-05, + "loss": 1.4419, + "step": 36931 + }, + { + "epoch": 0.04, + "learning_rate": 4.98327898994723e-05, + "loss": 1.2212, + "step": 36932 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832780829874506e-05, + "loss": 0.7972, + "step": 36933 + }, + { + "epoch": 0.04, + "learning_rate": 4.983277176003156e-05, + "loss": 1.0358, + "step": 36934 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832762689943475e-05, + "loss": 1.2322, + "step": 36935 + }, + { + "epoch": 0.04, + "learning_rate": 4.983275361961026e-05, + "loss": 1.1425, + "step": 36936 + }, + { + "epoch": 0.04, + "learning_rate": 4.98327445490319e-05, + "loss": 1.0652, + "step": 36937 + }, + { + "epoch": 0.04, + "learning_rate": 4.98327354782084e-05, + "loss": 1.1425, + "step": 36938 + }, + { + "epoch": 0.04, + "learning_rate": 4.983272640713976e-05, + "loss": 0.7006, + "step": 36939 + }, + { + "epoch": 0.04, + "learning_rate": 4.983271733582599e-05, + "loss": 0.8883, + "step": 36940 + }, + { + "epoch": 0.04, + "learning_rate": 4.983270826426708e-05, + "loss": 0.8184, + "step": 36941 + }, + { + "epoch": 0.04, + "learning_rate": 4.983269919246303e-05, + "loss": 0.9861, + "step": 36942 + }, + { + "epoch": 0.04, + "learning_rate": 4.983269012041384e-05, + "loss": 0.9146, + "step": 36943 + }, + { + "epoch": 0.04, + "learning_rate": 4.983268104811951e-05, + "loss": 0.9482, + "step": 36944 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832671975580045e-05, + "loss": 1.0169, + "step": 36945 + }, + { + "epoch": 0.04, + "learning_rate": 4.983266290279545e-05, + "loss": 0.9256, + "step": 36946 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832653829765706e-05, + "loss": 0.8774, + "step": 36947 + }, + { + "epoch": 0.04, + "learning_rate": 4.983264475649083e-05, + "loss": 1.2414, + "step": 36948 + }, + { + "epoch": 0.04, + "learning_rate": 4.983263568297082e-05, + "loss": 1.1182, + "step": 36949 + }, + { + "epoch": 0.04, + "learning_rate": 4.983262660920567e-05, + "loss": 0.9541, + "step": 36950 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832617535195384e-05, + "loss": 0.9535, + "step": 36951 + }, + { + "epoch": 0.04, + "learning_rate": 4.983260846093996e-05, + "loss": 1.0735, + "step": 36952 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832599386439394e-05, + "loss": 1.4105, + "step": 36953 + }, + { + "epoch": 0.04, + "learning_rate": 4.983259031169369e-05, + "loss": 1.0232, + "step": 36954 + }, + { + "epoch": 0.04, + "learning_rate": 4.983258123670286e-05, + "loss": 0.8404, + "step": 36955 + }, + { + "epoch": 0.04, + "learning_rate": 4.983257216146689e-05, + "loss": 0.8381, + "step": 36956 + }, + { + "epoch": 0.04, + "learning_rate": 4.983256308598577e-05, + "loss": 1.0837, + "step": 36957 + }, + { + "epoch": 0.04, + "learning_rate": 4.983255401025953e-05, + "loss": 1.1877, + "step": 36958 + }, + { + "epoch": 0.04, + "learning_rate": 4.983254493428815e-05, + "loss": 1.0531, + "step": 36959 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832535858071626e-05, + "loss": 1.0451, + "step": 36960 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832526781609964e-05, + "loss": 1.2923, + "step": 36961 + }, + { + "epoch": 0.04, + "learning_rate": 4.983251770490318e-05, + "loss": 0.9204, + "step": 36962 + }, + { + "epoch": 0.04, + "learning_rate": 4.983250862795125e-05, + "loss": 0.6064, + "step": 36963 + }, + { + "epoch": 0.04, + "learning_rate": 4.983249955075418e-05, + "loss": 1.2597, + "step": 36964 + }, + { + "epoch": 0.04, + "learning_rate": 4.983249047331199e-05, + "loss": 1.1686, + "step": 36965 + }, + { + "epoch": 0.04, + "learning_rate": 4.983248139562465e-05, + "loss": 1.2909, + "step": 36966 + }, + { + "epoch": 0.04, + "learning_rate": 4.983247231769218e-05, + "loss": 1.306, + "step": 36967 + }, + { + "epoch": 0.04, + "learning_rate": 4.983246323951457e-05, + "loss": 1.1465, + "step": 36968 + }, + { + "epoch": 0.04, + "learning_rate": 4.983245416109183e-05, + "loss": 1.083, + "step": 36969 + }, + { + "epoch": 0.04, + "learning_rate": 4.983244508242395e-05, + "loss": 0.9662, + "step": 36970 + }, + { + "epoch": 0.04, + "learning_rate": 4.983243600351094e-05, + "loss": 0.706, + "step": 36971 + }, + { + "epoch": 0.04, + "learning_rate": 4.983242692435279e-05, + "loss": 0.8063, + "step": 36972 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832417844949504e-05, + "loss": 0.7978, + "step": 36973 + }, + { + "epoch": 0.04, + "learning_rate": 4.983240876530109e-05, + "loss": 1.019, + "step": 36974 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832399685407526e-05, + "loss": 1.0451, + "step": 36975 + }, + { + "epoch": 0.04, + "learning_rate": 4.983239060526884e-05, + "loss": 1.0261, + "step": 36976 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832381524885015e-05, + "loss": 1.1023, + "step": 36977 + }, + { + "epoch": 0.04, + "learning_rate": 4.983237244425606e-05, + "loss": 0.9134, + "step": 36978 + }, + { + "epoch": 0.04, + "learning_rate": 4.983236336338196e-05, + "loss": 0.9541, + "step": 36979 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832354282262733e-05, + "loss": 1.0336, + "step": 36980 + }, + { + "epoch": 0.04, + "learning_rate": 4.983234520089837e-05, + "loss": 0.8777, + "step": 36981 + }, + { + "epoch": 0.04, + "learning_rate": 4.983233611928887e-05, + "loss": 0.8228, + "step": 36982 + }, + { + "epoch": 0.04, + "learning_rate": 4.983232703743424e-05, + "loss": 1.0182, + "step": 36983 + }, + { + "epoch": 0.04, + "learning_rate": 4.983231795533447e-05, + "loss": 1.0287, + "step": 36984 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832308872989576e-05, + "loss": 0.8631, + "step": 36985 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832299790399536e-05, + "loss": 0.8082, + "step": 36986 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832290707564365e-05, + "loss": 1.0037, + "step": 36987 + }, + { + "epoch": 0.04, + "learning_rate": 4.983228162448407e-05, + "loss": 0.9391, + "step": 36988 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832272541158634e-05, + "loss": 0.8964, + "step": 36989 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832263457588055e-05, + "loss": 1.101, + "step": 36990 + }, + { + "epoch": 0.04, + "learning_rate": 4.983225437377236e-05, + "loss": 1.066, + "step": 36991 + }, + { + "epoch": 0.04, + "learning_rate": 4.983224528971152e-05, + "loss": 0.9306, + "step": 36992 + }, + { + "epoch": 0.04, + "learning_rate": 4.983223620540555e-05, + "loss": 0.7544, + "step": 36993 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832227120854445e-05, + "loss": 1.0045, + "step": 36994 + }, + { + "epoch": 0.04, + "learning_rate": 4.983221803605821e-05, + "loss": 0.7731, + "step": 36995 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832208951016835e-05, + "loss": 1.1674, + "step": 36996 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832199865730337e-05, + "loss": 0.8587, + "step": 36997 + }, + { + "epoch": 0.04, + "learning_rate": 4.983219078019869e-05, + "loss": 0.9451, + "step": 36998 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832181694421925e-05, + "loss": 1.1191, + "step": 36999 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832172608400026e-05, + "loss": 0.9969, + "step": 37000 + }, + { + "epoch": 0.04, + "eval_loss": 1.0708471536636353, + "eval_runtime": 125.1415, + "eval_samples_per_second": 11.067, + "eval_steps_per_second": 5.538, + "step": 37000 + }, + { + "epoch": 0.04, + "learning_rate": 4.983216352213299e-05, + "loss": 0.9737, + "step": 37001 + }, + { + "epoch": 0.04, + "learning_rate": 4.983215443562082e-05, + "loss": 0.9251, + "step": 37002 + }, + { + "epoch": 0.04, + "learning_rate": 4.983214534886352e-05, + "loss": 1.1195, + "step": 37003 + }, + { + "epoch": 0.04, + "learning_rate": 4.983213626186108e-05, + "loss": 0.9834, + "step": 37004 + }, + { + "epoch": 0.04, + "learning_rate": 4.983212717461352e-05, + "loss": 0.7544, + "step": 37005 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832118087120824e-05, + "loss": 0.797, + "step": 37006 + }, + { + "epoch": 0.04, + "learning_rate": 4.983210899938299e-05, + "loss": 0.9263, + "step": 37007 + }, + { + "epoch": 0.04, + "learning_rate": 4.983209991140003e-05, + "loss": 0.8319, + "step": 37008 + }, + { + "epoch": 0.04, + "learning_rate": 4.983209082317194e-05, + "loss": 1.1437, + "step": 37009 + }, + { + "epoch": 0.04, + "learning_rate": 4.983208173469871e-05, + "loss": 0.979, + "step": 37010 + }, + { + "epoch": 0.04, + "learning_rate": 4.983207264598036e-05, + "loss": 0.94, + "step": 37011 + }, + { + "epoch": 0.04, + "learning_rate": 4.983206355701686e-05, + "loss": 1.0042, + "step": 37012 + }, + { + "epoch": 0.04, + "learning_rate": 4.983205446780824e-05, + "loss": 0.7373, + "step": 37013 + }, + { + "epoch": 0.04, + "learning_rate": 4.983204537835449e-05, + "loss": 1.4939, + "step": 37014 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832036288655606e-05, + "loss": 0.854, + "step": 37015 + }, + { + "epoch": 0.04, + "learning_rate": 4.983202719871159e-05, + "loss": 1.0409, + "step": 37016 + }, + { + "epoch": 0.04, + "learning_rate": 4.9832018108522447e-05, + "loss": 1.3098, + "step": 37017 + }, + { + "epoch": 0.04, + "learning_rate": 4.983200901808816e-05, + "loss": 1.0637, + "step": 37018 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831999927408755e-05, + "loss": 0.5199, + "step": 37019 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831990836484215e-05, + "loss": 0.509, + "step": 37020 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831981745314544e-05, + "loss": 0.9498, + "step": 37021 + }, + { + "epoch": 0.04, + "learning_rate": 4.983197265389974e-05, + "loss": 0.9969, + "step": 37022 + }, + { + "epoch": 0.04, + "learning_rate": 4.983196356223981e-05, + "loss": 1.0809, + "step": 37023 + }, + { + "epoch": 0.04, + "learning_rate": 4.983195447033474e-05, + "loss": 1.1254, + "step": 37024 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831945378184554e-05, + "loss": 1.185, + "step": 37025 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831936285789226e-05, + "loss": 1.4095, + "step": 37026 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831927193148774e-05, + "loss": 1.1762, + "step": 37027 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831918100263184e-05, + "loss": 0.9402, + "step": 37028 + }, + { + "epoch": 0.04, + "learning_rate": 4.983190900713247e-05, + "loss": 0.99, + "step": 37029 + }, + { + "epoch": 0.04, + "learning_rate": 4.983189991375662e-05, + "loss": 1.0937, + "step": 37030 + }, + { + "epoch": 0.04, + "learning_rate": 4.983189082013565e-05, + "loss": 1.0125, + "step": 37031 + }, + { + "epoch": 0.04, + "learning_rate": 4.983188172626955e-05, + "loss": 0.8702, + "step": 37032 + }, + { + "epoch": 0.04, + "learning_rate": 4.98318726321583e-05, + "loss": 0.8111, + "step": 37033 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831863537801945e-05, + "loss": 1.0178, + "step": 37034 + }, + { + "epoch": 0.04, + "learning_rate": 4.983185444320044e-05, + "loss": 0.7566, + "step": 37035 + }, + { + "epoch": 0.04, + "learning_rate": 4.983184534835382e-05, + "loss": 0.8191, + "step": 37036 + }, + { + "epoch": 0.04, + "learning_rate": 4.983183625326206e-05, + "loss": 1.0536, + "step": 37037 + }, + { + "epoch": 0.04, + "learning_rate": 4.983182715792518e-05, + "loss": 0.9071, + "step": 37038 + }, + { + "epoch": 0.04, + "learning_rate": 4.983181806234317e-05, + "loss": 1.1715, + "step": 37039 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831808966516026e-05, + "loss": 0.7357, + "step": 37040 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831799870443756e-05, + "loss": 0.5577, + "step": 37041 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831790774126354e-05, + "loss": 0.602, + "step": 37042 + }, + { + "epoch": 0.04, + "learning_rate": 4.983178167756383e-05, + "loss": 0.8586, + "step": 37043 + }, + { + "epoch": 0.04, + "learning_rate": 4.983177258075617e-05, + "loss": 0.7452, + "step": 37044 + }, + { + "epoch": 0.04, + "learning_rate": 4.983176348370338e-05, + "loss": 0.5757, + "step": 37045 + }, + { + "epoch": 0.04, + "learning_rate": 4.983175438640547e-05, + "loss": 1.0904, + "step": 37046 + }, + { + "epoch": 0.04, + "learning_rate": 4.983174528886242e-05, + "loss": 1.215, + "step": 37047 + }, + { + "epoch": 0.04, + "learning_rate": 4.983173619107425e-05, + "loss": 0.9451, + "step": 37048 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831727093040945e-05, + "loss": 1.0658, + "step": 37049 + }, + { + "epoch": 0.04, + "learning_rate": 4.983171799476252e-05, + "loss": 0.9416, + "step": 37050 + }, + { + "epoch": 0.04, + "learning_rate": 4.983170889623896e-05, + "loss": 0.8319, + "step": 37051 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831699797470275e-05, + "loss": 0.9028, + "step": 37052 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831690698456455e-05, + "loss": 1.1375, + "step": 37053 + }, + { + "epoch": 0.04, + "learning_rate": 4.983168159919752e-05, + "loss": 1.2942, + "step": 37054 + }, + { + "epoch": 0.04, + "learning_rate": 4.983167249969345e-05, + "loss": 1.0182, + "step": 37055 + }, + { + "epoch": 0.04, + "learning_rate": 4.983166339994425e-05, + "loss": 1.2371, + "step": 37056 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831654299949925e-05, + "loss": 1.449, + "step": 37057 + }, + { + "epoch": 0.04, + "learning_rate": 4.983164519971047e-05, + "loss": 1.0411, + "step": 37058 + }, + { + "epoch": 0.04, + "learning_rate": 4.983163609922589e-05, + "loss": 1.0434, + "step": 37059 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831626998496184e-05, + "loss": 0.8422, + "step": 37060 + }, + { + "epoch": 0.04, + "learning_rate": 4.983161789752134e-05, + "loss": 0.5488, + "step": 37061 + }, + { + "epoch": 0.04, + "learning_rate": 4.983160879630138e-05, + "loss": 0.3564, + "step": 37062 + }, + { + "epoch": 0.04, + "learning_rate": 4.983159969483629e-05, + "loss": 0.1234, + "step": 37063 + }, + { + "epoch": 0.04, + "learning_rate": 4.983159059312608e-05, + "loss": 0.7602, + "step": 37064 + }, + { + "epoch": 0.04, + "learning_rate": 4.983158149117073e-05, + "loss": 0.7228, + "step": 37065 + }, + { + "epoch": 0.04, + "learning_rate": 4.983157238897026e-05, + "loss": 0.8569, + "step": 37066 + }, + { + "epoch": 0.04, + "learning_rate": 4.983156328652466e-05, + "loss": 1.1943, + "step": 37067 + }, + { + "epoch": 0.04, + "learning_rate": 4.983155418383394e-05, + "loss": 0.6971, + "step": 37068 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831545080898086e-05, + "loss": 1.0632, + "step": 37069 + }, + { + "epoch": 0.04, + "learning_rate": 4.98315359777171e-05, + "loss": 0.8471, + "step": 37070 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831526874291e-05, + "loss": 1.4911, + "step": 37071 + }, + { + "epoch": 0.04, + "learning_rate": 4.983151777061977e-05, + "loss": 1.2158, + "step": 37072 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831508666703415e-05, + "loss": 1.0548, + "step": 37073 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831499562541925e-05, + "loss": 1.1568, + "step": 37074 + }, + { + "epoch": 0.04, + "learning_rate": 4.983149045813532e-05, + "loss": 0.9809, + "step": 37075 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831481353483586e-05, + "loss": 1.1139, + "step": 37076 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831472248586723e-05, + "loss": 0.6102, + "step": 37077 + }, + { + "epoch": 0.04, + "learning_rate": 4.983146314344474e-05, + "loss": 0.6285, + "step": 37078 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831454038057624e-05, + "loss": 0.4838, + "step": 37079 + }, + { + "epoch": 0.04, + "learning_rate": 4.983144493242539e-05, + "loss": 0.7148, + "step": 37080 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831435826548026e-05, + "loss": 0.7937, + "step": 37081 + }, + { + "epoch": 0.04, + "learning_rate": 4.983142672042553e-05, + "loss": 0.6947, + "step": 37082 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831417614057916e-05, + "loss": 1.1308, + "step": 37083 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831408507445174e-05, + "loss": 0.9788, + "step": 37084 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831399400587315e-05, + "loss": 1.1711, + "step": 37085 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831390293484324e-05, + "loss": 1.0112, + "step": 37086 + }, + { + "epoch": 0.04, + "learning_rate": 4.983138118613621e-05, + "loss": 1.0968, + "step": 37087 + }, + { + "epoch": 0.04, + "learning_rate": 4.983137207854297e-05, + "loss": 1.1584, + "step": 37088 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831362970704604e-05, + "loss": 1.7219, + "step": 37089 + }, + { + "epoch": 0.04, + "learning_rate": 4.983135386262111e-05, + "loss": 1.6138, + "step": 37090 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831344754292494e-05, + "loss": 1.2035, + "step": 37091 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831335645718756e-05, + "loss": 1.1442, + "step": 37092 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831326536899894e-05, + "loss": 1.0054, + "step": 37093 + }, + { + "epoch": 0.04, + "learning_rate": 4.983131742783591e-05, + "loss": 0.9216, + "step": 37094 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831308318526794e-05, + "loss": 0.9446, + "step": 37095 + }, + { + "epoch": 0.04, + "learning_rate": 4.983129920897256e-05, + "loss": 0.9727, + "step": 37096 + }, + { + "epoch": 0.04, + "learning_rate": 4.98312900991732e-05, + "loss": 0.6697, + "step": 37097 + }, + { + "epoch": 0.04, + "learning_rate": 4.983128098912871e-05, + "loss": 0.1486, + "step": 37098 + }, + { + "epoch": 0.04, + "learning_rate": 4.98312718788391e-05, + "loss": 0.1014, + "step": 37099 + }, + { + "epoch": 0.04, + "learning_rate": 4.983126276830437e-05, + "loss": 0.1094, + "step": 37100 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831253657524514e-05, + "loss": 0.1058, + "step": 37101 + }, + { + "epoch": 0.04, + "learning_rate": 4.983124454649953e-05, + "loss": 0.1476, + "step": 37102 + }, + { + "epoch": 0.04, + "learning_rate": 4.983123543522943e-05, + "loss": 0.7706, + "step": 37103 + }, + { + "epoch": 0.04, + "learning_rate": 4.98312263237142e-05, + "loss": 1.3213, + "step": 37104 + }, + { + "epoch": 0.04, + "learning_rate": 4.983121721195385e-05, + "loss": 0.886, + "step": 37105 + }, + { + "epoch": 0.04, + "learning_rate": 4.983120809994838e-05, + "loss": 1.0118, + "step": 37106 + }, + { + "epoch": 0.04, + "learning_rate": 4.983119898769778e-05, + "loss": 1.0911, + "step": 37107 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831189875202066e-05, + "loss": 0.7217, + "step": 37108 + }, + { + "epoch": 0.04, + "learning_rate": 4.983118076246122e-05, + "loss": 1.1211, + "step": 37109 + }, + { + "epoch": 0.04, + "learning_rate": 4.983117164947525e-05, + "loss": 1.1343, + "step": 37110 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831162536244166e-05, + "loss": 1.0611, + "step": 37111 + }, + { + "epoch": 0.04, + "learning_rate": 4.983115342276795e-05, + "loss": 0.7942, + "step": 37112 + }, + { + "epoch": 0.04, + "learning_rate": 4.983114430904662e-05, + "loss": 1.0665, + "step": 37113 + }, + { + "epoch": 0.04, + "learning_rate": 4.983113519508016e-05, + "loss": 1.018, + "step": 37114 + }, + { + "epoch": 0.04, + "learning_rate": 4.983112608086858e-05, + "loss": 1.096, + "step": 37115 + }, + { + "epoch": 0.04, + "learning_rate": 4.983111696641188e-05, + "loss": 0.9788, + "step": 37116 + }, + { + "epoch": 0.04, + "learning_rate": 4.983110785171006e-05, + "loss": 1.5928, + "step": 37117 + }, + { + "epoch": 0.04, + "learning_rate": 4.983109873676311e-05, + "loss": 0.7687, + "step": 37118 + }, + { + "epoch": 0.04, + "learning_rate": 4.983108962157103e-05, + "loss": 1.072, + "step": 37119 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831080506133855e-05, + "loss": 1.1903, + "step": 37120 + }, + { + "epoch": 0.04, + "learning_rate": 4.983107139045153e-05, + "loss": 1.0856, + "step": 37121 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831062274524104e-05, + "loss": 1.7782, + "step": 37122 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831053158351545e-05, + "loss": 1.4931, + "step": 37123 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831044041933875e-05, + "loss": 1.3828, + "step": 37124 + }, + { + "epoch": 0.04, + "learning_rate": 4.983103492527107e-05, + "loss": 1.0567, + "step": 37125 + }, + { + "epoch": 0.04, + "learning_rate": 4.983102580836315e-05, + "loss": 0.9978, + "step": 37126 + }, + { + "epoch": 0.04, + "learning_rate": 4.983101669121011e-05, + "loss": 0.9263, + "step": 37127 + }, + { + "epoch": 0.04, + "learning_rate": 4.983100757381194e-05, + "loss": 0.8946, + "step": 37128 + }, + { + "epoch": 0.04, + "learning_rate": 4.983099845616865e-05, + "loss": 1.2699, + "step": 37129 + }, + { + "epoch": 0.04, + "learning_rate": 4.983098933828025e-05, + "loss": 1.3854, + "step": 37130 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830980220146726e-05, + "loss": 1.3102, + "step": 37131 + }, + { + "epoch": 0.04, + "learning_rate": 4.983097110176807e-05, + "loss": 1.2847, + "step": 37132 + }, + { + "epoch": 0.04, + "learning_rate": 4.983096198314431e-05, + "loss": 0.9375, + "step": 37133 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830952864275414e-05, + "loss": 1.0293, + "step": 37134 + }, + { + "epoch": 0.04, + "learning_rate": 4.983094374516141e-05, + "loss": 1.0099, + "step": 37135 + }, + { + "epoch": 0.04, + "learning_rate": 4.983093462580227e-05, + "loss": 0.8977, + "step": 37136 + }, + { + "epoch": 0.04, + "learning_rate": 4.983092550619802e-05, + "loss": 1.1213, + "step": 37137 + }, + { + "epoch": 0.04, + "learning_rate": 4.983091638634865e-05, + "loss": 0.3391, + "step": 37138 + }, + { + "epoch": 0.04, + "learning_rate": 4.983090726625416e-05, + "loss": 1.1347, + "step": 37139 + }, + { + "epoch": 0.04, + "learning_rate": 4.983089814591454e-05, + "loss": 0.9314, + "step": 37140 + }, + { + "epoch": 0.04, + "learning_rate": 4.983088902532981e-05, + "loss": 1.0865, + "step": 37141 + }, + { + "epoch": 0.04, + "learning_rate": 4.983087990449996e-05, + "loss": 1.0551, + "step": 37142 + }, + { + "epoch": 0.04, + "learning_rate": 4.983087078342499e-05, + "loss": 0.7133, + "step": 37143 + }, + { + "epoch": 0.04, + "learning_rate": 4.983086166210489e-05, + "loss": 1.228, + "step": 37144 + }, + { + "epoch": 0.04, + "learning_rate": 4.983085254053967e-05, + "loss": 0.8772, + "step": 37145 + }, + { + "epoch": 0.04, + "learning_rate": 4.983084341872934e-05, + "loss": 1.0035, + "step": 37146 + }, + { + "epoch": 0.04, + "learning_rate": 4.983083429667389e-05, + "loss": 1.0922, + "step": 37147 + }, + { + "epoch": 0.04, + "learning_rate": 4.983082517437332e-05, + "loss": 0.9889, + "step": 37148 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830816051827636e-05, + "loss": 1.1327, + "step": 37149 + }, + { + "epoch": 0.04, + "learning_rate": 4.983080692903682e-05, + "loss": 0.969, + "step": 37150 + }, + { + "epoch": 0.04, + "learning_rate": 4.983079780600089e-05, + "loss": 0.9858, + "step": 37151 + }, + { + "epoch": 0.04, + "learning_rate": 4.983078868271984e-05, + "loss": 1.025, + "step": 37152 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830779559193676e-05, + "loss": 1.0544, + "step": 37153 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830770435422384e-05, + "loss": 1.3377, + "step": 37154 + }, + { + "epoch": 0.04, + "learning_rate": 4.983076131140598e-05, + "loss": 0.9999, + "step": 37155 + }, + { + "epoch": 0.04, + "learning_rate": 4.983075218714446e-05, + "loss": 1.7338, + "step": 37156 + }, + { + "epoch": 0.04, + "learning_rate": 4.983074306263782e-05, + "loss": 1.3804, + "step": 37157 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830733937886054e-05, + "loss": 0.9534, + "step": 37158 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830724812889174e-05, + "loss": 0.8779, + "step": 37159 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830715687647176e-05, + "loss": 0.7322, + "step": 37160 + }, + { + "epoch": 0.04, + "learning_rate": 4.983070656216006e-05, + "loss": 1.4618, + "step": 37161 + }, + { + "epoch": 0.04, + "learning_rate": 4.983069743642782e-05, + "loss": 0.8874, + "step": 37162 + }, + { + "epoch": 0.04, + "learning_rate": 4.983068831045047e-05, + "loss": 0.8696, + "step": 37163 + }, + { + "epoch": 0.04, + "learning_rate": 4.983067918422799e-05, + "loss": 0.8591, + "step": 37164 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830670057760405e-05, + "loss": 1.1636, + "step": 37165 + }, + { + "epoch": 0.04, + "learning_rate": 4.983066093104769e-05, + "loss": 1.0825, + "step": 37166 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830651804089864e-05, + "loss": 0.9028, + "step": 37167 + }, + { + "epoch": 0.04, + "learning_rate": 4.983064267688693e-05, + "loss": 1.2529, + "step": 37168 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830633549438865e-05, + "loss": 0.8118, + "step": 37169 + }, + { + "epoch": 0.04, + "learning_rate": 4.983062442174569e-05, + "loss": 0.7312, + "step": 37170 + }, + { + "epoch": 0.04, + "learning_rate": 4.983061529380739e-05, + "loss": 0.9203, + "step": 37171 + }, + { + "epoch": 0.04, + "learning_rate": 4.983060616562398e-05, + "loss": 0.9994, + "step": 37172 + }, + { + "epoch": 0.04, + "learning_rate": 4.983059703719545e-05, + "loss": 1.006, + "step": 37173 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830587908521794e-05, + "loss": 2.2062, + "step": 37174 + }, + { + "epoch": 0.04, + "learning_rate": 4.983057877960303e-05, + "loss": 1.5178, + "step": 37175 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830569650439154e-05, + "loss": 1.0704, + "step": 37176 + }, + { + "epoch": 0.04, + "learning_rate": 4.983056052103015e-05, + "loss": 1.0759, + "step": 37177 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830551391376035e-05, + "loss": 0.8348, + "step": 37178 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830542261476806e-05, + "loss": 0.6089, + "step": 37179 + }, + { + "epoch": 0.04, + "learning_rate": 4.983053313133246e-05, + "loss": 0.9969, + "step": 37180 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830524000942994e-05, + "loss": 0.8839, + "step": 37181 + }, + { + "epoch": 0.04, + "learning_rate": 4.983051487030841e-05, + "loss": 0.8493, + "step": 37182 + }, + { + "epoch": 0.04, + "learning_rate": 4.983050573942871e-05, + "loss": 1.1578, + "step": 37183 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830496608303886e-05, + "loss": 1.3786, + "step": 37184 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830487476933964e-05, + "loss": 1.5479, + "step": 37185 + }, + { + "epoch": 0.04, + "learning_rate": 4.983047834531892e-05, + "loss": 1.3355, + "step": 37186 + }, + { + "epoch": 0.04, + "learning_rate": 4.983046921345875e-05, + "loss": 0.9387, + "step": 37187 + }, + { + "epoch": 0.04, + "learning_rate": 4.983046008135347e-05, + "loss": 1.2502, + "step": 37188 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830450949003076e-05, + "loss": 1.2744, + "step": 37189 + }, + { + "epoch": 0.04, + "learning_rate": 4.983044181640757e-05, + "loss": 1.1792, + "step": 37190 + }, + { + "epoch": 0.04, + "learning_rate": 4.983043268356694e-05, + "loss": 1.1053, + "step": 37191 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830423550481196e-05, + "loss": 1.372, + "step": 37192 + }, + { + "epoch": 0.04, + "learning_rate": 4.983041441715034e-05, + "loss": 0.9316, + "step": 37193 + }, + { + "epoch": 0.04, + "learning_rate": 4.983040528357437e-05, + "loss": 1.0529, + "step": 37194 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830396149753276e-05, + "loss": 0.9536, + "step": 37195 + }, + { + "epoch": 0.04, + "learning_rate": 4.983038701568707e-05, + "loss": 0.8556, + "step": 37196 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830377881375756e-05, + "loss": 0.9784, + "step": 37197 + }, + { + "epoch": 0.04, + "learning_rate": 4.983036874681932e-05, + "loss": 0.9089, + "step": 37198 + }, + { + "epoch": 0.04, + "learning_rate": 4.983035961201777e-05, + "loss": 1.1029, + "step": 37199 + }, + { + "epoch": 0.04, + "learning_rate": 4.983035047697111e-05, + "loss": 0.4404, + "step": 37200 + }, + { + "epoch": 0.04, + "learning_rate": 4.983034134167933e-05, + "loss": 0.2576, + "step": 37201 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830332206142446e-05, + "loss": 0.9426, + "step": 37202 + }, + { + "epoch": 0.04, + "learning_rate": 4.983032307036044e-05, + "loss": 0.8975, + "step": 37203 + }, + { + "epoch": 0.04, + "learning_rate": 4.983031393433332e-05, + "loss": 1.0942, + "step": 37204 + }, + { + "epoch": 0.04, + "learning_rate": 4.983030479806108e-05, + "loss": 0.8694, + "step": 37205 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830295661543726e-05, + "loss": 0.9178, + "step": 37206 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830286524781266e-05, + "loss": 0.9669, + "step": 37207 + }, + { + "epoch": 0.04, + "learning_rate": 4.983027738777368e-05, + "loss": 1.4771, + "step": 37208 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830268250520994e-05, + "loss": 1.108, + "step": 37209 + }, + { + "epoch": 0.04, + "learning_rate": 4.983025911302318e-05, + "loss": 0.8844, + "step": 37210 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830249975280264e-05, + "loss": 1.2477, + "step": 37211 + }, + { + "epoch": 0.04, + "learning_rate": 4.983024083729223e-05, + "loss": 1.6851, + "step": 37212 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830231699059084e-05, + "loss": 1.2402, + "step": 37213 + }, + { + "epoch": 0.04, + "learning_rate": 4.983022256058082e-05, + "loss": 0.7785, + "step": 37214 + }, + { + "epoch": 0.04, + "learning_rate": 4.983021342185744e-05, + "loss": 1.0528, + "step": 37215 + }, + { + "epoch": 0.04, + "learning_rate": 4.983020428288896e-05, + "loss": 1.101, + "step": 37216 + }, + { + "epoch": 0.04, + "learning_rate": 4.983019514367535e-05, + "loss": 1.1284, + "step": 37217 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830186004216636e-05, + "loss": 1.0795, + "step": 37218 + }, + { + "epoch": 0.04, + "learning_rate": 4.983017686451281e-05, + "loss": 1.0727, + "step": 37219 + }, + { + "epoch": 0.04, + "learning_rate": 4.983016772456387e-05, + "loss": 1.2518, + "step": 37220 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830158584369814e-05, + "loss": 1.0589, + "step": 37221 + }, + { + "epoch": 0.04, + "learning_rate": 4.983014944393065e-05, + "loss": 1.0621, + "step": 37222 + }, + { + "epoch": 0.04, + "learning_rate": 4.983014030324637e-05, + "loss": 1.0302, + "step": 37223 + }, + { + "epoch": 0.04, + "learning_rate": 4.983013116231697e-05, + "loss": 1.0525, + "step": 37224 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830122021142474e-05, + "loss": 0.9058, + "step": 37225 + }, + { + "epoch": 0.04, + "learning_rate": 4.983011287972286e-05, + "loss": 1.0538, + "step": 37226 + }, + { + "epoch": 0.04, + "learning_rate": 4.983010373805812e-05, + "loss": 1.0014, + "step": 37227 + }, + { + "epoch": 0.04, + "learning_rate": 4.983009459614828e-05, + "loss": 0.7622, + "step": 37228 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830085453993324e-05, + "loss": 0.514, + "step": 37229 + }, + { + "epoch": 0.04, + "learning_rate": 4.983007631159326e-05, + "loss": 0.8012, + "step": 37230 + }, + { + "epoch": 0.04, + "learning_rate": 4.983006716894808e-05, + "loss": 0.9157, + "step": 37231 + }, + { + "epoch": 0.04, + "learning_rate": 4.983005802605779e-05, + "loss": 1.0883, + "step": 37232 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830048882922384e-05, + "loss": 1.037, + "step": 37233 + }, + { + "epoch": 0.04, + "learning_rate": 4.983003973954188e-05, + "loss": 0.9878, + "step": 37234 + }, + { + "epoch": 0.04, + "learning_rate": 4.983003059591625e-05, + "loss": 1.2869, + "step": 37235 + }, + { + "epoch": 0.04, + "learning_rate": 4.983002145204551e-05, + "loss": 1.3243, + "step": 37236 + }, + { + "epoch": 0.04, + "learning_rate": 4.983001230792966e-05, + "loss": 1.076, + "step": 37237 + }, + { + "epoch": 0.04, + "learning_rate": 4.98300031635687e-05, + "loss": 1.2077, + "step": 37238 + }, + { + "epoch": 0.04, + "learning_rate": 4.982999401896263e-05, + "loss": 0.9647, + "step": 37239 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829984874111444e-05, + "loss": 0.4348, + "step": 37240 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829975729015156e-05, + "loss": 0.8925, + "step": 37241 + }, + { + "epoch": 0.04, + "learning_rate": 4.982996658367375e-05, + "loss": 1.0116, + "step": 37242 + }, + { + "epoch": 0.04, + "learning_rate": 4.982995743808723e-05, + "loss": 0.9373, + "step": 37243 + }, + { + "epoch": 0.04, + "learning_rate": 4.98299482922556e-05, + "loss": 1.0082, + "step": 37244 + }, + { + "epoch": 0.04, + "learning_rate": 4.982993914617886e-05, + "loss": 0.9786, + "step": 37245 + }, + { + "epoch": 0.04, + "learning_rate": 4.982992999985702e-05, + "loss": 0.8296, + "step": 37246 + }, + { + "epoch": 0.04, + "learning_rate": 4.982992085329006e-05, + "loss": 0.9245, + "step": 37247 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829911706477986e-05, + "loss": 0.9429, + "step": 37248 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829902559420803e-05, + "loss": 1.1214, + "step": 37249 + }, + { + "epoch": 0.04, + "learning_rate": 4.982989341211852e-05, + "loss": 1.2272, + "step": 37250 + }, + { + "epoch": 0.04, + "learning_rate": 4.982988426457111e-05, + "loss": 1.1335, + "step": 37251 + }, + { + "epoch": 0.04, + "learning_rate": 4.98298751167786e-05, + "loss": 0.9392, + "step": 37252 + }, + { + "epoch": 0.04, + "learning_rate": 4.982986596874098e-05, + "loss": 1.0155, + "step": 37253 + }, + { + "epoch": 0.04, + "learning_rate": 4.982985682045825e-05, + "loss": 1.2835, + "step": 37254 + }, + { + "epoch": 0.04, + "learning_rate": 4.982984767193041e-05, + "loss": 1.077, + "step": 37255 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829838523157465e-05, + "loss": 1.2109, + "step": 37256 + }, + { + "epoch": 0.04, + "learning_rate": 4.98298293741394e-05, + "loss": 1.1714, + "step": 37257 + }, + { + "epoch": 0.04, + "learning_rate": 4.982982022487623e-05, + "loss": 1.1337, + "step": 37258 + }, + { + "epoch": 0.04, + "learning_rate": 4.982981107536795e-05, + "loss": 1.0369, + "step": 37259 + }, + { + "epoch": 0.04, + "learning_rate": 4.982980192561456e-05, + "loss": 1.4234, + "step": 37260 + }, + { + "epoch": 0.04, + "learning_rate": 4.982979277561606e-05, + "loss": 1.0189, + "step": 37261 + }, + { + "epoch": 0.04, + "learning_rate": 4.982978362537246e-05, + "loss": 0.8157, + "step": 37262 + }, + { + "epoch": 0.04, + "learning_rate": 4.982977447488374e-05, + "loss": 1.0641, + "step": 37263 + }, + { + "epoch": 0.04, + "learning_rate": 4.982976532414991e-05, + "loss": 1.0226, + "step": 37264 + }, + { + "epoch": 0.04, + "learning_rate": 4.982975617317098e-05, + "loss": 0.8365, + "step": 37265 + }, + { + "epoch": 0.04, + "learning_rate": 4.982974702194694e-05, + "loss": 0.6841, + "step": 37266 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829737870477785e-05, + "loss": 1.0007, + "step": 37267 + }, + { + "epoch": 0.04, + "learning_rate": 4.982972871876353e-05, + "loss": 0.8509, + "step": 37268 + }, + { + "epoch": 0.04, + "learning_rate": 4.982971956680416e-05, + "loss": 0.8774, + "step": 37269 + }, + { + "epoch": 0.04, + "learning_rate": 4.982971041459967e-05, + "loss": 0.8619, + "step": 37270 + }, + { + "epoch": 0.04, + "learning_rate": 4.982970126215009e-05, + "loss": 0.9694, + "step": 37271 + }, + { + "epoch": 0.04, + "learning_rate": 4.982969210945539e-05, + "loss": 1.9738, + "step": 37272 + }, + { + "epoch": 0.04, + "learning_rate": 4.982968295651559e-05, + "loss": 1.1949, + "step": 37273 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829673803330677e-05, + "loss": 1.0747, + "step": 37274 + }, + { + "epoch": 0.04, + "learning_rate": 4.982966464990066e-05, + "loss": 1.0532, + "step": 37275 + }, + { + "epoch": 0.04, + "learning_rate": 4.982965549622553e-05, + "loss": 1.1967, + "step": 37276 + }, + { + "epoch": 0.04, + "learning_rate": 4.982964634230529e-05, + "loss": 1.1589, + "step": 37277 + }, + { + "epoch": 0.04, + "learning_rate": 4.982963718813995e-05, + "loss": 0.9855, + "step": 37278 + }, + { + "epoch": 0.04, + "learning_rate": 4.98296280337295e-05, + "loss": 2.5648, + "step": 37279 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829618879073944e-05, + "loss": 1.0919, + "step": 37280 + }, + { + "epoch": 0.04, + "learning_rate": 4.982960972417328e-05, + "loss": 0.8846, + "step": 37281 + }, + { + "epoch": 0.04, + "learning_rate": 4.98296005690275e-05, + "loss": 0.909, + "step": 37282 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829591413636624e-05, + "loss": 1.0312, + "step": 37283 + }, + { + "epoch": 0.04, + "learning_rate": 4.982958225800064e-05, + "loss": 0.9721, + "step": 37284 + }, + { + "epoch": 0.04, + "learning_rate": 4.982957310211954e-05, + "loss": 0.9649, + "step": 37285 + }, + { + "epoch": 0.04, + "learning_rate": 4.982956394599334e-05, + "loss": 0.8168, + "step": 37286 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829554789622025e-05, + "loss": 0.907, + "step": 37287 + }, + { + "epoch": 0.04, + "learning_rate": 4.982954563300561e-05, + "loss": 0.9205, + "step": 37288 + }, + { + "epoch": 0.04, + "learning_rate": 4.982953647614409e-05, + "loss": 1.0218, + "step": 37289 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829527319037455e-05, + "loss": 1.1242, + "step": 37290 + }, + { + "epoch": 0.04, + "learning_rate": 4.982951816168572e-05, + "loss": 1.0537, + "step": 37291 + }, + { + "epoch": 0.04, + "learning_rate": 4.982950900408888e-05, + "loss": 0.7965, + "step": 37292 + }, + { + "epoch": 0.04, + "learning_rate": 4.982949984624693e-05, + "loss": 1.0696, + "step": 37293 + }, + { + "epoch": 0.04, + "learning_rate": 4.982949068815988e-05, + "loss": 1.0365, + "step": 37294 + }, + { + "epoch": 0.04, + "learning_rate": 4.982948152982771e-05, + "loss": 0.9073, + "step": 37295 + }, + { + "epoch": 0.04, + "learning_rate": 4.982947237125044e-05, + "loss": 0.9954, + "step": 37296 + }, + { + "epoch": 0.04, + "learning_rate": 4.982946321242807e-05, + "loss": 0.8947, + "step": 37297 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829454053360584e-05, + "loss": 1.082, + "step": 37298 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829444894048e-05, + "loss": 1.3861, + "step": 37299 + }, + { + "epoch": 0.04, + "learning_rate": 4.982943573449031e-05, + "loss": 0.7791, + "step": 37300 + }, + { + "epoch": 0.04, + "learning_rate": 4.982942657468751e-05, + "loss": 0.616, + "step": 37301 + }, + { + "epoch": 0.04, + "learning_rate": 4.98294174146396e-05, + "loss": 0.5533, + "step": 37302 + }, + { + "epoch": 0.04, + "learning_rate": 4.98294082543466e-05, + "loss": 0.673, + "step": 37303 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829399093808484e-05, + "loss": 0.6749, + "step": 37304 + }, + { + "epoch": 0.04, + "learning_rate": 4.982938993302526e-05, + "loss": 0.8762, + "step": 37305 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829380771996934e-05, + "loss": 0.8843, + "step": 37306 + }, + { + "epoch": 0.04, + "learning_rate": 4.982937161072351e-05, + "loss": 0.8328, + "step": 37307 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829362449204974e-05, + "loss": 0.8002, + "step": 37308 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829353287441334e-05, + "loss": 1.073, + "step": 37309 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829344125432584e-05, + "loss": 1.0972, + "step": 37310 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829334963178736e-05, + "loss": 1.1166, + "step": 37311 + }, + { + "epoch": 0.04, + "learning_rate": 4.982932580067978e-05, + "loss": 1.0947, + "step": 37312 + }, + { + "epoch": 0.04, + "learning_rate": 4.982931663793572e-05, + "loss": 1.0949, + "step": 37313 + }, + { + "epoch": 0.04, + "learning_rate": 4.982930747494656e-05, + "loss": 0.9524, + "step": 37314 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829298311712294e-05, + "loss": 1.0455, + "step": 37315 + }, + { + "epoch": 0.04, + "learning_rate": 4.982928914823292e-05, + "loss": 0.2604, + "step": 37316 + }, + { + "epoch": 0.04, + "learning_rate": 4.982927998450844e-05, + "loss": 1.0347, + "step": 37317 + }, + { + "epoch": 0.04, + "learning_rate": 4.982927082053886e-05, + "loss": 0.9017, + "step": 37318 + }, + { + "epoch": 0.04, + "learning_rate": 4.982926165632417e-05, + "loss": 0.5978, + "step": 37319 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829252491864385e-05, + "loss": 0.5941, + "step": 37320 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829243327159494e-05, + "loss": 0.782, + "step": 37321 + }, + { + "epoch": 0.04, + "learning_rate": 4.982923416220949e-05, + "loss": 0.9892, + "step": 37322 + }, + { + "epoch": 0.04, + "learning_rate": 4.982922499701439e-05, + "loss": 1.1659, + "step": 37323 + }, + { + "epoch": 0.04, + "learning_rate": 4.982921583157419e-05, + "loss": 1.0593, + "step": 37324 + }, + { + "epoch": 0.04, + "learning_rate": 4.982920666588888e-05, + "loss": 0.8747, + "step": 37325 + }, + { + "epoch": 0.04, + "learning_rate": 4.982919749995847e-05, + "loss": 0.6159, + "step": 37326 + }, + { + "epoch": 0.04, + "learning_rate": 4.982918833378295e-05, + "loss": 1.0445, + "step": 37327 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829179167362336e-05, + "loss": 0.8682, + "step": 37328 + }, + { + "epoch": 0.04, + "learning_rate": 4.982917000069661e-05, + "loss": 0.8788, + "step": 37329 + }, + { + "epoch": 0.04, + "learning_rate": 4.982916083378579e-05, + "loss": 0.9568, + "step": 37330 + }, + { + "epoch": 0.04, + "learning_rate": 4.982915166662986e-05, + "loss": 0.7741, + "step": 37331 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829142499228834e-05, + "loss": 1.2552, + "step": 37332 + }, + { + "epoch": 0.04, + "learning_rate": 4.98291333315827e-05, + "loss": 1.221, + "step": 37333 + }, + { + "epoch": 0.04, + "learning_rate": 4.982912416369146e-05, + "loss": 1.1357, + "step": 37334 + }, + { + "epoch": 0.04, + "learning_rate": 4.982911499555512e-05, + "loss": 0.8158, + "step": 37335 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829105827173685e-05, + "loss": 0.6852, + "step": 37336 + }, + { + "epoch": 0.04, + "learning_rate": 4.982909665854713e-05, + "loss": 0.9538, + "step": 37337 + }, + { + "epoch": 0.04, + "learning_rate": 4.982908748967549e-05, + "loss": 1.119, + "step": 37338 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829078320558745e-05, + "loss": 1.0769, + "step": 37339 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829069151196894e-05, + "loss": 1.1174, + "step": 37340 + }, + { + "epoch": 0.04, + "learning_rate": 4.982905998158994e-05, + "loss": 0.8614, + "step": 37341 + }, + { + "epoch": 0.04, + "learning_rate": 4.982905081173789e-05, + "loss": 0.8377, + "step": 37342 + }, + { + "epoch": 0.04, + "learning_rate": 4.982904164164073e-05, + "loss": 0.9851, + "step": 37343 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829032471298464e-05, + "loss": 1.5006, + "step": 37344 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829023300711106e-05, + "loss": 1.2153, + "step": 37345 + }, + { + "epoch": 0.04, + "learning_rate": 4.982901412987865e-05, + "loss": 1.0373, + "step": 37346 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829004958801084e-05, + "loss": 1.1634, + "step": 37347 + }, + { + "epoch": 0.04, + "learning_rate": 4.982899578747842e-05, + "loss": 0.9354, + "step": 37348 + }, + { + "epoch": 0.04, + "learning_rate": 4.982898661591066e-05, + "loss": 0.9677, + "step": 37349 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828977444097786e-05, + "loss": 1.0071, + "step": 37350 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828968272039816e-05, + "loss": 1.0833, + "step": 37351 + }, + { + "epoch": 0.04, + "learning_rate": 4.982895909973674e-05, + "loss": 0.8937, + "step": 37352 + }, + { + "epoch": 0.04, + "learning_rate": 4.982894992718857e-05, + "loss": 0.417, + "step": 37353 + }, + { + "epoch": 0.04, + "learning_rate": 4.98289407543953e-05, + "loss": 0.9609, + "step": 37354 + }, + { + "epoch": 0.04, + "learning_rate": 4.982893158135693e-05, + "loss": 0.9911, + "step": 37355 + }, + { + "epoch": 0.04, + "learning_rate": 4.982892240807345e-05, + "loss": 1.1674, + "step": 37356 + }, + { + "epoch": 0.04, + "learning_rate": 4.982891323454488e-05, + "loss": 0.7699, + "step": 37357 + }, + { + "epoch": 0.04, + "learning_rate": 4.982890406077121e-05, + "loss": 0.7248, + "step": 37358 + }, + { + "epoch": 0.04, + "learning_rate": 4.982889488675243e-05, + "loss": 1.0522, + "step": 37359 + }, + { + "epoch": 0.04, + "learning_rate": 4.982888571248856e-05, + "loss": 1.0011, + "step": 37360 + }, + { + "epoch": 0.04, + "learning_rate": 4.982887653797958e-05, + "loss": 1.1132, + "step": 37361 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828867363225505e-05, + "loss": 1.1456, + "step": 37362 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828858188226325e-05, + "loss": 1.199, + "step": 37363 + }, + { + "epoch": 0.04, + "learning_rate": 4.982884901298205e-05, + "loss": 1.0583, + "step": 37364 + }, + { + "epoch": 0.04, + "learning_rate": 4.982883983749267e-05, + "loss": 0.8987, + "step": 37365 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828830661758193e-05, + "loss": 1.3758, + "step": 37366 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828821485778624e-05, + "loss": 1.0459, + "step": 37367 + }, + { + "epoch": 0.04, + "learning_rate": 4.982881230955395e-05, + "loss": 1.116, + "step": 37368 + }, + { + "epoch": 0.04, + "learning_rate": 4.982880313308417e-05, + "loss": 1.1657, + "step": 37369 + }, + { + "epoch": 0.04, + "learning_rate": 4.98287939563693e-05, + "loss": 0.8983, + "step": 37370 + }, + { + "epoch": 0.04, + "learning_rate": 4.982878477940932e-05, + "loss": 0.6178, + "step": 37371 + }, + { + "epoch": 0.04, + "learning_rate": 4.982877560220425e-05, + "loss": 1.0327, + "step": 37372 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828766424754084e-05, + "loss": 1.1051, + "step": 37373 + }, + { + "epoch": 0.04, + "learning_rate": 4.982875724705881e-05, + "loss": 0.9129, + "step": 37374 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828748069118435e-05, + "loss": 0.9454, + "step": 37375 + }, + { + "epoch": 0.04, + "learning_rate": 4.982873889093297e-05, + "loss": 1.2836, + "step": 37376 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828729712502396e-05, + "loss": 1.2955, + "step": 37377 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828720533826734e-05, + "loss": 1.0645, + "step": 37378 + }, + { + "epoch": 0.04, + "learning_rate": 4.982871135490597e-05, + "loss": 1.1577, + "step": 37379 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828702175740104e-05, + "loss": 0.9552, + "step": 37380 + }, + { + "epoch": 0.04, + "learning_rate": 4.982869299632914e-05, + "loss": 1.3038, + "step": 37381 + }, + { + "epoch": 0.04, + "learning_rate": 4.982868381667308e-05, + "loss": 1.3183, + "step": 37382 + }, + { + "epoch": 0.04, + "learning_rate": 4.982867463677192e-05, + "loss": 1.1463, + "step": 37383 + }, + { + "epoch": 0.04, + "learning_rate": 4.982866545662566e-05, + "loss": 1.2771, + "step": 37384 + }, + { + "epoch": 0.04, + "learning_rate": 4.982865627623431e-05, + "loss": 0.603, + "step": 37385 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828647095597854e-05, + "loss": 0.409, + "step": 37386 + }, + { + "epoch": 0.04, + "learning_rate": 4.98286379147163e-05, + "loss": 0.9849, + "step": 37387 + }, + { + "epoch": 0.04, + "learning_rate": 4.982862873358965e-05, + "loss": 1.1076, + "step": 37388 + }, + { + "epoch": 0.04, + "learning_rate": 4.98286195522179e-05, + "loss": 1.2038, + "step": 37389 + }, + { + "epoch": 0.04, + "learning_rate": 4.982861037060106e-05, + "loss": 0.7608, + "step": 37390 + }, + { + "epoch": 0.04, + "learning_rate": 4.982860118873911e-05, + "loss": 1.1362, + "step": 37391 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828592006632076e-05, + "loss": 1.3803, + "step": 37392 + }, + { + "epoch": 0.04, + "learning_rate": 4.982858282427993e-05, + "loss": 0.9596, + "step": 37393 + }, + { + "epoch": 0.04, + "learning_rate": 4.98285736416827e-05, + "loss": 0.7998, + "step": 37394 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828564458840364e-05, + "loss": 1.0349, + "step": 37395 + }, + { + "epoch": 0.04, + "learning_rate": 4.982855527575294e-05, + "loss": 1.0931, + "step": 37396 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828546092420414e-05, + "loss": 1.2423, + "step": 37397 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828536908842796e-05, + "loss": 1.1727, + "step": 37398 + }, + { + "epoch": 0.04, + "learning_rate": 4.982852772502007e-05, + "loss": 0.6069, + "step": 37399 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828518540952254e-05, + "loss": 0.6873, + "step": 37400 + }, + { + "epoch": 0.04, + "learning_rate": 4.982850935663934e-05, + "loss": 1.0047, + "step": 37401 + }, + { + "epoch": 0.04, + "learning_rate": 4.982850017208133e-05, + "loss": 1.2878, + "step": 37402 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828490987278224e-05, + "loss": 1.0193, + "step": 37403 + }, + { + "epoch": 0.04, + "learning_rate": 4.982848180223002e-05, + "loss": 0.9599, + "step": 37404 + }, + { + "epoch": 0.04, + "learning_rate": 4.982847261693672e-05, + "loss": 1.0089, + "step": 37405 + }, + { + "epoch": 0.04, + "learning_rate": 4.982846343139832e-05, + "loss": 1.2323, + "step": 37406 + }, + { + "epoch": 0.04, + "learning_rate": 4.982845424561483e-05, + "loss": 0.9148, + "step": 37407 + }, + { + "epoch": 0.04, + "learning_rate": 4.982844505958625e-05, + "loss": 0.379, + "step": 37408 + }, + { + "epoch": 0.04, + "learning_rate": 4.982843587331256e-05, + "loss": 0.8886, + "step": 37409 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828426686793785e-05, + "loss": 1.2476, + "step": 37410 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828417500029914e-05, + "loss": 0.9793, + "step": 37411 + }, + { + "epoch": 0.04, + "learning_rate": 4.982840831302094e-05, + "loss": 0.7201, + "step": 37412 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828399125766865e-05, + "loss": 1.0182, + "step": 37413 + }, + { + "epoch": 0.04, + "learning_rate": 4.982838993826771e-05, + "loss": 1.0478, + "step": 37414 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828380750523454e-05, + "loss": 0.8948, + "step": 37415 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828371562534095e-05, + "loss": 0.8877, + "step": 37416 + }, + { + "epoch": 0.04, + "learning_rate": 4.982836237429965e-05, + "loss": 0.9808, + "step": 37417 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828353185820106e-05, + "loss": 0.8657, + "step": 37418 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828343997095476e-05, + "loss": 0.9924, + "step": 37419 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828334808125734e-05, + "loss": 1.0091, + "step": 37420 + }, + { + "epoch": 0.04, + "learning_rate": 4.982832561891091e-05, + "loss": 0.7487, + "step": 37421 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828316429450985e-05, + "loss": 1.2267, + "step": 37422 + }, + { + "epoch": 0.04, + "learning_rate": 4.982830723974597e-05, + "loss": 1.2121, + "step": 37423 + }, + { + "epoch": 0.04, + "learning_rate": 4.982829804979586e-05, + "loss": 1.1644, + "step": 37424 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828288859600645e-05, + "loss": 0.877, + "step": 37425 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828279669160346e-05, + "loss": 0.9697, + "step": 37426 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828270478474956e-05, + "loss": 1.0626, + "step": 37427 + }, + { + "epoch": 0.04, + "learning_rate": 4.982826128754446e-05, + "loss": 0.9484, + "step": 37428 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828252096368884e-05, + "loss": 0.7933, + "step": 37429 + }, + { + "epoch": 0.04, + "learning_rate": 4.98282429049482e-05, + "loss": 0.8295, + "step": 37430 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828233713282435e-05, + "loss": 0.9083, + "step": 37431 + }, + { + "epoch": 0.04, + "learning_rate": 4.982822452137157e-05, + "loss": 1.0233, + "step": 37432 + }, + { + "epoch": 0.04, + "learning_rate": 4.982821532921561e-05, + "loss": 0.9121, + "step": 37433 + }, + { + "epoch": 0.04, + "learning_rate": 4.982820613681455e-05, + "loss": 1.0966, + "step": 37434 + }, + { + "epoch": 0.04, + "learning_rate": 4.982819694416841e-05, + "loss": 1.1314, + "step": 37435 + }, + { + "epoch": 0.04, + "learning_rate": 4.982818775127717e-05, + "loss": 0.9794, + "step": 37436 + }, + { + "epoch": 0.04, + "learning_rate": 4.982817855814084e-05, + "loss": 1.3281, + "step": 37437 + }, + { + "epoch": 0.04, + "learning_rate": 4.982816936475941e-05, + "loss": 2.2194, + "step": 37438 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828160171132886e-05, + "loss": 4.7924, + "step": 37439 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828150977261276e-05, + "loss": 4.7663, + "step": 37440 + }, + { + "epoch": 0.04, + "learning_rate": 4.982814178314457e-05, + "loss": 2.8603, + "step": 37441 + }, + { + "epoch": 0.04, + "learning_rate": 4.982813258878277e-05, + "loss": 1.088, + "step": 37442 + }, + { + "epoch": 0.04, + "learning_rate": 4.982812339417588e-05, + "loss": 0.9302, + "step": 37443 + }, + { + "epoch": 0.04, + "learning_rate": 4.982811419932389e-05, + "loss": 0.9564, + "step": 37444 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828105004226815e-05, + "loss": 0.8171, + "step": 37445 + }, + { + "epoch": 0.04, + "learning_rate": 4.982809580888465e-05, + "loss": 0.6664, + "step": 37446 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828086613297383e-05, + "loss": 1.2954, + "step": 37447 + }, + { + "epoch": 0.04, + "learning_rate": 4.982807741746503e-05, + "loss": 0.9012, + "step": 37448 + }, + { + "epoch": 0.04, + "learning_rate": 4.982806822138758e-05, + "loss": 1.0894, + "step": 37449 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828059025065046e-05, + "loss": 1.0658, + "step": 37450 + }, + { + "epoch": 0.04, + "learning_rate": 4.982804982849741e-05, + "loss": 0.9781, + "step": 37451 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828040631684694e-05, + "loss": 1.0285, + "step": 37452 + }, + { + "epoch": 0.04, + "learning_rate": 4.982803143462688e-05, + "loss": 1.1707, + "step": 37453 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828022237323966e-05, + "loss": 1.3529, + "step": 37454 + }, + { + "epoch": 0.04, + "learning_rate": 4.982801303977597e-05, + "loss": 1.1045, + "step": 37455 + }, + { + "epoch": 0.04, + "learning_rate": 4.9828003841982875e-05, + "loss": 0.9579, + "step": 37456 + }, + { + "epoch": 0.04, + "learning_rate": 4.982799464394469e-05, + "loss": 1.0527, + "step": 37457 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827985445661415e-05, + "loss": 1.0128, + "step": 37458 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827976247133056e-05, + "loss": 1.0753, + "step": 37459 + }, + { + "epoch": 0.04, + "learning_rate": 4.98279670483596e-05, + "loss": 1.1842, + "step": 37460 + }, + { + "epoch": 0.04, + "learning_rate": 4.982795784934105e-05, + "loss": 0.5967, + "step": 37461 + }, + { + "epoch": 0.04, + "learning_rate": 4.982794865007741e-05, + "loss": 0.4156, + "step": 37462 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827939450568684e-05, + "loss": 0.2667, + "step": 37463 + }, + { + "epoch": 0.04, + "learning_rate": 4.982793025081486e-05, + "loss": 0.7829, + "step": 37464 + }, + { + "epoch": 0.04, + "learning_rate": 4.982792105081595e-05, + "loss": 0.9547, + "step": 37465 + }, + { + "epoch": 0.04, + "learning_rate": 4.982791185057195e-05, + "loss": 1.0089, + "step": 37466 + }, + { + "epoch": 0.04, + "learning_rate": 4.982790265008285e-05, + "loss": 1.4048, + "step": 37467 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827893449348666e-05, + "loss": 1.1296, + "step": 37468 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827884248369393e-05, + "loss": 0.746, + "step": 37469 + }, + { + "epoch": 0.04, + "learning_rate": 4.982787504714503e-05, + "loss": 1.0201, + "step": 37470 + }, + { + "epoch": 0.04, + "learning_rate": 4.982786584567557e-05, + "loss": 1.1548, + "step": 37471 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827856643961024e-05, + "loss": 1.0498, + "step": 37472 + }, + { + "epoch": 0.04, + "learning_rate": 4.982784744200139e-05, + "loss": 1.0413, + "step": 37473 + }, + { + "epoch": 0.04, + "learning_rate": 4.982783823979666e-05, + "loss": 0.9483, + "step": 37474 + }, + { + "epoch": 0.04, + "learning_rate": 4.982782903734685e-05, + "loss": 1.2854, + "step": 37475 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827819834651944e-05, + "loss": 1.0285, + "step": 37476 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827810631711946e-05, + "loss": 0.9625, + "step": 37477 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827801428526864e-05, + "loss": 1.1914, + "step": 37478 + }, + { + "epoch": 0.04, + "learning_rate": 4.982779222509669e-05, + "loss": 1.1203, + "step": 37479 + }, + { + "epoch": 0.04, + "learning_rate": 4.982778302142143e-05, + "loss": 1.2318, + "step": 37480 + }, + { + "epoch": 0.04, + "learning_rate": 4.982777381750107e-05, + "loss": 0.7368, + "step": 37481 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827764613335635e-05, + "loss": 1.1756, + "step": 37482 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827755408925106e-05, + "loss": 1.015, + "step": 37483 + }, + { + "epoch": 0.04, + "learning_rate": 4.982774620426948e-05, + "loss": 0.9292, + "step": 37484 + }, + { + "epoch": 0.04, + "learning_rate": 4.982773699936877e-05, + "loss": 1.0634, + "step": 37485 + }, + { + "epoch": 0.04, + "learning_rate": 4.982772779422297e-05, + "loss": 0.7604, + "step": 37486 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827718588832084e-05, + "loss": 0.9644, + "step": 37487 + }, + { + "epoch": 0.04, + "learning_rate": 4.98277093831961e-05, + "loss": 0.7888, + "step": 37488 + }, + { + "epoch": 0.04, + "learning_rate": 4.982770017731504e-05, + "loss": 1.0689, + "step": 37489 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827690971188885e-05, + "loss": 0.9213, + "step": 37490 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827681764817644e-05, + "loss": 1.1504, + "step": 37491 + }, + { + "epoch": 0.04, + "learning_rate": 4.982767255820131e-05, + "loss": 1.0361, + "step": 37492 + }, + { + "epoch": 0.04, + "learning_rate": 4.982766335133989e-05, + "loss": 1.0419, + "step": 37493 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827654144233385e-05, + "loss": 0.8787, + "step": 37494 + }, + { + "epoch": 0.04, + "learning_rate": 4.982764493688179e-05, + "loss": 0.8551, + "step": 37495 + }, + { + "epoch": 0.04, + "learning_rate": 4.982763572928511e-05, + "loss": 1.1008, + "step": 37496 + }, + { + "epoch": 0.04, + "learning_rate": 4.982762652144333e-05, + "loss": 0.6015, + "step": 37497 + }, + { + "epoch": 0.04, + "learning_rate": 4.982761731335648e-05, + "loss": 0.7811, + "step": 37498 + }, + { + "epoch": 0.04, + "learning_rate": 4.982760810502453e-05, + "loss": 0.8312, + "step": 37499 + }, + { + "epoch": 0.04, + "learning_rate": 4.982759889644749e-05, + "loss": 0.8835, + "step": 37500 + }, + { + "epoch": 0.04, + "eval_loss": 1.0715769529342651, + "eval_runtime": 128.7146, + "eval_samples_per_second": 10.76, + "eval_steps_per_second": 5.384, + "step": 37500 + }, + { + "epoch": 0.04, + "learning_rate": 4.982758968762537e-05, + "loss": 0.9383, + "step": 37501 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827580478558154e-05, + "loss": 0.9524, + "step": 37502 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827571269245866e-05, + "loss": 0.899, + "step": 37503 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827562059688474e-05, + "loss": 1.2275, + "step": 37504 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827552849886004e-05, + "loss": 1.1184, + "step": 37505 + }, + { + "epoch": 0.04, + "learning_rate": 4.982754363983845e-05, + "loss": 0.8637, + "step": 37506 + }, + { + "epoch": 0.04, + "learning_rate": 4.98275344295458e-05, + "loss": 0.9897, + "step": 37507 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827525219008065e-05, + "loss": 0.9372, + "step": 37508 + }, + { + "epoch": 0.04, + "learning_rate": 4.982751600822525e-05, + "loss": 1.4189, + "step": 37509 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827506797197344e-05, + "loss": 1.1145, + "step": 37510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827497585924344e-05, + "loss": 0.9853, + "step": 37511 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827488374406266e-05, + "loss": 1.1735, + "step": 37512 + }, + { + "epoch": 0.04, + "learning_rate": 4.98274791626431e-05, + "loss": 1.1175, + "step": 37513 + }, + { + "epoch": 0.04, + "learning_rate": 4.982746995063485e-05, + "loss": 1.1464, + "step": 37514 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827460738381504e-05, + "loss": 1.2004, + "step": 37515 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827451525883085e-05, + "loss": 1.0329, + "step": 37516 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827442313139575e-05, + "loss": 1.1149, + "step": 37517 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827433100150974e-05, + "loss": 0.8091, + "step": 37518 + }, + { + "epoch": 0.04, + "learning_rate": 4.982742388691729e-05, + "loss": 0.6117, + "step": 37519 + }, + { + "epoch": 0.04, + "learning_rate": 4.982741467343852e-05, + "loss": 0.8948, + "step": 37520 + }, + { + "epoch": 0.04, + "learning_rate": 4.982740545971467e-05, + "loss": 1.0885, + "step": 37521 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827396245745724e-05, + "loss": 0.9918, + "step": 37522 + }, + { + "epoch": 0.04, + "learning_rate": 4.98273870315317e-05, + "loss": 1.0615, + "step": 37523 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827377817072586e-05, + "loss": 0.8391, + "step": 37524 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827368602368384e-05, + "loss": 0.947, + "step": 37525 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827359387419105e-05, + "loss": 1.3187, + "step": 37526 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827350172224736e-05, + "loss": 1.3164, + "step": 37527 + }, + { + "epoch": 0.04, + "learning_rate": 4.982734095678528e-05, + "loss": 0.8011, + "step": 37528 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827331741100745e-05, + "loss": 0.922, + "step": 37529 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827322525171124e-05, + "loss": 1.0546, + "step": 37530 + }, + { + "epoch": 0.04, + "learning_rate": 4.982731330899641e-05, + "loss": 0.7481, + "step": 37531 + }, + { + "epoch": 0.04, + "learning_rate": 4.982730409257662e-05, + "loss": 0.9419, + "step": 37532 + }, + { + "epoch": 0.04, + "learning_rate": 4.982729487591174e-05, + "loss": 0.9485, + "step": 37533 + }, + { + "epoch": 0.04, + "learning_rate": 4.982728565900178e-05, + "loss": 0.8413, + "step": 37534 + }, + { + "epoch": 0.04, + "learning_rate": 4.982727644184674e-05, + "loss": 0.8795, + "step": 37535 + }, + { + "epoch": 0.04, + "learning_rate": 4.982726722444661e-05, + "loss": 1.2722, + "step": 37536 + }, + { + "epoch": 0.04, + "learning_rate": 4.982725800680139e-05, + "loss": 1.1379, + "step": 37537 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827248788911086e-05, + "loss": 1.1052, + "step": 37538 + }, + { + "epoch": 0.04, + "learning_rate": 4.982723957077571e-05, + "loss": 1.2486, + "step": 37539 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827230352395235e-05, + "loss": 1.8608, + "step": 37540 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827221133769684e-05, + "loss": 1.0146, + "step": 37541 + }, + { + "epoch": 0.04, + "learning_rate": 4.982721191489905e-05, + "loss": 0.8628, + "step": 37542 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827202695783324e-05, + "loss": 1.0994, + "step": 37543 + }, + { + "epoch": 0.04, + "learning_rate": 4.982719347642253e-05, + "loss": 1.05, + "step": 37544 + }, + { + "epoch": 0.04, + "learning_rate": 4.982718425681664e-05, + "loss": 0.8406, + "step": 37545 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827175036965664e-05, + "loss": 0.7722, + "step": 37546 + }, + { + "epoch": 0.04, + "learning_rate": 4.982716581686962e-05, + "loss": 0.7984, + "step": 37547 + }, + { + "epoch": 0.04, + "learning_rate": 4.982715659652848e-05, + "loss": 0.9945, + "step": 37548 + }, + { + "epoch": 0.04, + "learning_rate": 4.982714737594226e-05, + "loss": 0.9952, + "step": 37549 + }, + { + "epoch": 0.04, + "learning_rate": 4.982713815511095e-05, + "loss": 1.0038, + "step": 37550 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827128934034574e-05, + "loss": 0.9206, + "step": 37551 + }, + { + "epoch": 0.04, + "learning_rate": 4.98271197127131e-05, + "loss": 0.7225, + "step": 37552 + }, + { + "epoch": 0.04, + "learning_rate": 4.982711049114654e-05, + "loss": 1.2702, + "step": 37553 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827101269334914e-05, + "loss": 1.0824, + "step": 37554 + }, + { + "epoch": 0.04, + "learning_rate": 4.9827092047278195e-05, + "loss": 0.6195, + "step": 37555 + }, + { + "epoch": 0.04, + "learning_rate": 4.982708282497639e-05, + "loss": 0.8551, + "step": 37556 + }, + { + "epoch": 0.04, + "learning_rate": 4.982707360242951e-05, + "loss": 0.512, + "step": 37557 + }, + { + "epoch": 0.04, + "learning_rate": 4.982706437963754e-05, + "loss": 0.7669, + "step": 37558 + }, + { + "epoch": 0.04, + "learning_rate": 4.98270551566005e-05, + "loss": 1.1311, + "step": 37559 + }, + { + "epoch": 0.04, + "learning_rate": 4.982704593331837e-05, + "loss": 0.951, + "step": 37560 + }, + { + "epoch": 0.04, + "learning_rate": 4.982703670979116e-05, + "loss": 0.8504, + "step": 37561 + }, + { + "epoch": 0.04, + "learning_rate": 4.982702748601886e-05, + "loss": 1.3428, + "step": 37562 + }, + { + "epoch": 0.04, + "learning_rate": 4.982701826200149e-05, + "loss": 1.0657, + "step": 37563 + }, + { + "epoch": 0.04, + "learning_rate": 4.982700903773904e-05, + "loss": 0.83, + "step": 37564 + }, + { + "epoch": 0.04, + "learning_rate": 4.982699981323149e-05, + "loss": 0.9078, + "step": 37565 + }, + { + "epoch": 0.04, + "learning_rate": 4.982699058847887e-05, + "loss": 1.1336, + "step": 37566 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826981363481174e-05, + "loss": 0.5179, + "step": 37567 + }, + { + "epoch": 0.04, + "learning_rate": 4.982697213823838e-05, + "loss": 0.5586, + "step": 37568 + }, + { + "epoch": 0.04, + "learning_rate": 4.982696291275052e-05, + "loss": 1.2557, + "step": 37569 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826953687017575e-05, + "loss": 1.2038, + "step": 37570 + }, + { + "epoch": 0.04, + "learning_rate": 4.982694446103955e-05, + "loss": 1.3674, + "step": 37571 + }, + { + "epoch": 0.04, + "learning_rate": 4.982693523481644e-05, + "loss": 0.8918, + "step": 37572 + }, + { + "epoch": 0.04, + "learning_rate": 4.982692600834825e-05, + "loss": 1.249, + "step": 37573 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826916781634976e-05, + "loss": 1.6282, + "step": 37574 + }, + { + "epoch": 0.04, + "learning_rate": 4.982690755467662e-05, + "loss": 1.0107, + "step": 37575 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826898327473193e-05, + "loss": 1.1451, + "step": 37576 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826889100024676e-05, + "loss": 1.0426, + "step": 37577 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826879872331075e-05, + "loss": 1.0873, + "step": 37578 + }, + { + "epoch": 0.04, + "learning_rate": 4.982687064439241e-05, + "loss": 1.0867, + "step": 37579 + }, + { + "epoch": 0.04, + "learning_rate": 4.982686141620865e-05, + "loss": 0.8743, + "step": 37580 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826852187779816e-05, + "loss": 0.9376, + "step": 37581 + }, + { + "epoch": 0.04, + "learning_rate": 4.98268429591059e-05, + "loss": 0.6439, + "step": 37582 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826833730186906e-05, + "loss": 0.8197, + "step": 37583 + }, + { + "epoch": 0.04, + "learning_rate": 4.982682450102283e-05, + "loss": 1.2205, + "step": 37584 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826815271613674e-05, + "loss": 1.1206, + "step": 37585 + }, + { + "epoch": 0.04, + "learning_rate": 4.982680604195944e-05, + "loss": 1.0552, + "step": 37586 + }, + { + "epoch": 0.04, + "learning_rate": 4.982679681206012e-05, + "loss": 0.848, + "step": 37587 + }, + { + "epoch": 0.04, + "learning_rate": 4.982678758191573e-05, + "loss": 1.7233, + "step": 37588 + }, + { + "epoch": 0.04, + "learning_rate": 4.982677835152626e-05, + "loss": 1.2058, + "step": 37589 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826769120891704e-05, + "loss": 1.2495, + "step": 37590 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826759890012066e-05, + "loss": 1.2395, + "step": 37591 + }, + { + "epoch": 0.04, + "learning_rate": 4.982675065888736e-05, + "loss": 0.8732, + "step": 37592 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826741427517567e-05, + "loss": 0.9617, + "step": 37593 + }, + { + "epoch": 0.04, + "learning_rate": 4.982673219590269e-05, + "loss": 1.2338, + "step": 37594 + }, + { + "epoch": 0.04, + "learning_rate": 4.982672296404274e-05, + "loss": 0.6072, + "step": 37595 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826713731937715e-05, + "loss": 0.9944, + "step": 37596 + }, + { + "epoch": 0.04, + "learning_rate": 4.982670449958761e-05, + "loss": 1.2119, + "step": 37597 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826695266992417e-05, + "loss": 1.0546, + "step": 37598 + }, + { + "epoch": 0.04, + "learning_rate": 4.982668603415215e-05, + "loss": 0.5288, + "step": 37599 + }, + { + "epoch": 0.04, + "learning_rate": 4.982667680106681e-05, + "loss": 0.7271, + "step": 37600 + }, + { + "epoch": 0.04, + "learning_rate": 4.982666756773639e-05, + "loss": 1.155, + "step": 37601 + }, + { + "epoch": 0.04, + "learning_rate": 4.982665833416088e-05, + "loss": 1.2068, + "step": 37602 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826649100340305e-05, + "loss": 1.0057, + "step": 37603 + }, + { + "epoch": 0.04, + "learning_rate": 4.982663986627465e-05, + "loss": 1.1644, + "step": 37604 + }, + { + "epoch": 0.04, + "learning_rate": 4.982663063196391e-05, + "loss": 2.231, + "step": 37605 + }, + { + "epoch": 0.04, + "learning_rate": 4.98266213974081e-05, + "loss": 1.8357, + "step": 37606 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826612162607206e-05, + "loss": 0.6903, + "step": 37607 + }, + { + "epoch": 0.04, + "learning_rate": 4.982660292756124e-05, + "loss": 1.0285, + "step": 37608 + }, + { + "epoch": 0.04, + "learning_rate": 4.982659369227019e-05, + "loss": 1.0583, + "step": 37609 + }, + { + "epoch": 0.04, + "learning_rate": 4.982658445673407e-05, + "loss": 1.2284, + "step": 37610 + }, + { + "epoch": 0.04, + "learning_rate": 4.982657522095287e-05, + "loss": 1.0853, + "step": 37611 + }, + { + "epoch": 0.04, + "learning_rate": 4.982656598492658e-05, + "loss": 0.8913, + "step": 37612 + }, + { + "epoch": 0.04, + "learning_rate": 4.982655674865523e-05, + "loss": 1.2321, + "step": 37613 + }, + { + "epoch": 0.04, + "learning_rate": 4.982654751213879e-05, + "loss": 0.9982, + "step": 37614 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826538275377274e-05, + "loss": 0.8917, + "step": 37615 + }, + { + "epoch": 0.04, + "learning_rate": 4.982652903837069e-05, + "loss": 0.8834, + "step": 37616 + }, + { + "epoch": 0.04, + "learning_rate": 4.982651980111902e-05, + "loss": 1.0217, + "step": 37617 + }, + { + "epoch": 0.04, + "learning_rate": 4.982651056362228e-05, + "loss": 0.8874, + "step": 37618 + }, + { + "epoch": 0.04, + "learning_rate": 4.982650132588046e-05, + "loss": 1.0315, + "step": 37619 + }, + { + "epoch": 0.04, + "learning_rate": 4.982649208789356e-05, + "loss": 1.0172, + "step": 37620 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826482849661594e-05, + "loss": 0.8903, + "step": 37621 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826473611184545e-05, + "loss": 1.1619, + "step": 37622 + }, + { + "epoch": 0.04, + "learning_rate": 4.982646437246241e-05, + "loss": 1.2993, + "step": 37623 + }, + { + "epoch": 0.04, + "learning_rate": 4.982645513349521e-05, + "loss": 1.0269, + "step": 37624 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826445894282936e-05, + "loss": 0.9336, + "step": 37625 + }, + { + "epoch": 0.04, + "learning_rate": 4.982643665482558e-05, + "loss": 0.5989, + "step": 37626 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826427415123145e-05, + "loss": 0.5602, + "step": 37627 + }, + { + "epoch": 0.04, + "learning_rate": 4.982641817517564e-05, + "loss": 0.3217, + "step": 37628 + }, + { + "epoch": 0.04, + "learning_rate": 4.982640893498306e-05, + "loss": 1.0108, + "step": 37629 + }, + { + "epoch": 0.04, + "learning_rate": 4.98263996945454e-05, + "loss": 0.8134, + "step": 37630 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826390453862656e-05, + "loss": 0.4018, + "step": 37631 + }, + { + "epoch": 0.04, + "learning_rate": 4.982638121293485e-05, + "loss": 0.3629, + "step": 37632 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826371971761966e-05, + "loss": 0.3606, + "step": 37633 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826362730344e-05, + "loss": 0.9593, + "step": 37634 + }, + { + "epoch": 0.04, + "learning_rate": 4.982635348868097e-05, + "loss": 0.9633, + "step": 37635 + }, + { + "epoch": 0.04, + "learning_rate": 4.982634424677285e-05, + "loss": 0.9922, + "step": 37636 + }, + { + "epoch": 0.04, + "learning_rate": 4.982633500461967e-05, + "loss": 1.0503, + "step": 37637 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826325762221404e-05, + "loss": 1.1776, + "step": 37638 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826316519578065e-05, + "loss": 1.1534, + "step": 37639 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826307276689655e-05, + "loss": 1.0774, + "step": 37640 + }, + { + "epoch": 0.04, + "learning_rate": 4.982629803355617e-05, + "loss": 1.2555, + "step": 37641 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826288790177604e-05, + "loss": 0.9195, + "step": 37642 + }, + { + "epoch": 0.04, + "learning_rate": 4.982627954655397e-05, + "loss": 1.1163, + "step": 37643 + }, + { + "epoch": 0.04, + "learning_rate": 4.982627030268526e-05, + "loss": 1.1512, + "step": 37644 + }, + { + "epoch": 0.04, + "learning_rate": 4.982626105857147e-05, + "loss": 0.9096, + "step": 37645 + }, + { + "epoch": 0.04, + "learning_rate": 4.982625181421261e-05, + "loss": 1.0354, + "step": 37646 + }, + { + "epoch": 0.04, + "learning_rate": 4.982624256960868e-05, + "loss": 0.7497, + "step": 37647 + }, + { + "epoch": 0.04, + "learning_rate": 4.982623332475967e-05, + "loss": 1.0094, + "step": 37648 + }, + { + "epoch": 0.04, + "learning_rate": 4.982622407966558e-05, + "loss": 0.8887, + "step": 37649 + }, + { + "epoch": 0.04, + "learning_rate": 4.982621483432643e-05, + "loss": 1.1151, + "step": 37650 + }, + { + "epoch": 0.04, + "learning_rate": 4.98262055887422e-05, + "loss": 1.0481, + "step": 37651 + }, + { + "epoch": 0.04, + "learning_rate": 4.982619634291289e-05, + "loss": 1.157, + "step": 37652 + }, + { + "epoch": 0.04, + "learning_rate": 4.982618709683852e-05, + "loss": 1.0531, + "step": 37653 + }, + { + "epoch": 0.04, + "learning_rate": 4.982617785051906e-05, + "loss": 1.109, + "step": 37654 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826168603954535e-05, + "loss": 1.2945, + "step": 37655 + }, + { + "epoch": 0.04, + "learning_rate": 4.982615935714494e-05, + "loss": 0.7544, + "step": 37656 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826150110090265e-05, + "loss": 0.4126, + "step": 37657 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826140862790515e-05, + "loss": 1.1122, + "step": 37658 + }, + { + "epoch": 0.04, + "learning_rate": 4.98261316152457e-05, + "loss": 0.9373, + "step": 37659 + }, + { + "epoch": 0.04, + "learning_rate": 4.982612236745581e-05, + "loss": 0.6127, + "step": 37660 + }, + { + "epoch": 0.04, + "learning_rate": 4.982611311942084e-05, + "loss": 0.8029, + "step": 37661 + }, + { + "epoch": 0.04, + "learning_rate": 4.982610387114081e-05, + "loss": 0.8212, + "step": 37662 + }, + { + "epoch": 0.04, + "learning_rate": 4.982609462261569e-05, + "loss": 0.7931, + "step": 37663 + }, + { + "epoch": 0.04, + "learning_rate": 4.982608537384551e-05, + "loss": 0.8193, + "step": 37664 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826076124830256e-05, + "loss": 1.1159, + "step": 37665 + }, + { + "epoch": 0.04, + "learning_rate": 4.982606687556993e-05, + "loss": 1.0834, + "step": 37666 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826057626064524e-05, + "loss": 1.2108, + "step": 37667 + }, + { + "epoch": 0.04, + "learning_rate": 4.982604837631405e-05, + "loss": 1.1416, + "step": 37668 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826039126318505e-05, + "loss": 0.7328, + "step": 37669 + }, + { + "epoch": 0.04, + "learning_rate": 4.982602987607788e-05, + "loss": 0.3384, + "step": 37670 + }, + { + "epoch": 0.04, + "learning_rate": 4.98260206255922e-05, + "loss": 0.9686, + "step": 37671 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826011374861435e-05, + "loss": 0.9452, + "step": 37672 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826002123885595e-05, + "loss": 0.7548, + "step": 37673 + }, + { + "epoch": 0.04, + "learning_rate": 4.98259928726647e-05, + "loss": 0.6547, + "step": 37674 + }, + { + "epoch": 0.04, + "learning_rate": 4.982598362119871e-05, + "loss": 1.1219, + "step": 37675 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825974369487667e-05, + "loss": 0.9821, + "step": 37676 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825965117531545e-05, + "loss": 0.5288, + "step": 37677 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825955865330354e-05, + "loss": 0.9004, + "step": 37678 + }, + { + "epoch": 0.04, + "learning_rate": 4.982594661288409e-05, + "loss": 0.9357, + "step": 37679 + }, + { + "epoch": 0.04, + "learning_rate": 4.982593736019275e-05, + "loss": 1.1883, + "step": 37680 + }, + { + "epoch": 0.04, + "learning_rate": 4.982592810725635e-05, + "loss": 0.9168, + "step": 37681 + }, + { + "epoch": 0.04, + "learning_rate": 4.982591885407487e-05, + "loss": 1.1395, + "step": 37682 + }, + { + "epoch": 0.04, + "learning_rate": 4.982590960064833e-05, + "loss": 1.4296, + "step": 37683 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825900346976706e-05, + "loss": 1.1866, + "step": 37684 + }, + { + "epoch": 0.04, + "learning_rate": 4.982589109306001e-05, + "loss": 0.9594, + "step": 37685 + }, + { + "epoch": 0.04, + "learning_rate": 4.982588183889825e-05, + "loss": 0.7541, + "step": 37686 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825872584491416e-05, + "loss": 1.1676, + "step": 37687 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825863329839514e-05, + "loss": 0.9706, + "step": 37688 + }, + { + "epoch": 0.04, + "learning_rate": 4.982585407494255e-05, + "loss": 0.6147, + "step": 37689 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825844819800506e-05, + "loss": 1.2675, + "step": 37690 + }, + { + "epoch": 0.04, + "learning_rate": 4.982583556441339e-05, + "loss": 1.0199, + "step": 37691 + }, + { + "epoch": 0.04, + "learning_rate": 4.982582630878121e-05, + "loss": 1.1088, + "step": 37692 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825817052903956e-05, + "loss": 1.0665, + "step": 37693 + }, + { + "epoch": 0.04, + "learning_rate": 4.982580779678163e-05, + "loss": 1.0767, + "step": 37694 + }, + { + "epoch": 0.04, + "learning_rate": 4.982579854041424e-05, + "loss": 1.0137, + "step": 37695 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825789283801774e-05, + "loss": 0.8173, + "step": 37696 + }, + { + "epoch": 0.04, + "learning_rate": 4.982578002694424e-05, + "loss": 0.8918, + "step": 37697 + }, + { + "epoch": 0.04, + "learning_rate": 4.982577076984164e-05, + "loss": 1.3094, + "step": 37698 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825761512493964e-05, + "loss": 0.9356, + "step": 37699 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825752254901225e-05, + "loss": 1.0097, + "step": 37700 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825742997063415e-05, + "loss": 1.3157, + "step": 37701 + }, + { + "epoch": 0.04, + "learning_rate": 4.982573373898053e-05, + "loss": 1.1561, + "step": 37702 + }, + { + "epoch": 0.04, + "learning_rate": 4.982572448065258e-05, + "loss": 0.8695, + "step": 37703 + }, + { + "epoch": 0.04, + "learning_rate": 4.982571522207956e-05, + "loss": 1.0331, + "step": 37704 + }, + { + "epoch": 0.04, + "learning_rate": 4.982570596326147e-05, + "loss": 0.8174, + "step": 37705 + }, + { + "epoch": 0.04, + "learning_rate": 4.982569670419832e-05, + "loss": 1.0476, + "step": 37706 + }, + { + "epoch": 0.04, + "learning_rate": 4.982568744489009e-05, + "loss": 1.0244, + "step": 37707 + }, + { + "epoch": 0.04, + "learning_rate": 4.98256781853368e-05, + "loss": 1.101, + "step": 37708 + }, + { + "epoch": 0.04, + "learning_rate": 4.982566892553844e-05, + "loss": 1.1129, + "step": 37709 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825659665495004e-05, + "loss": 1.0222, + "step": 37710 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825650405206504e-05, + "loss": 1.0425, + "step": 37711 + }, + { + "epoch": 0.04, + "learning_rate": 4.982564114467294e-05, + "loss": 1.0513, + "step": 37712 + }, + { + "epoch": 0.04, + "learning_rate": 4.98256318838943e-05, + "loss": 1.0936, + "step": 37713 + }, + { + "epoch": 0.04, + "learning_rate": 4.98256226228706e-05, + "loss": 1.0382, + "step": 37714 + }, + { + "epoch": 0.04, + "learning_rate": 4.982561336160182e-05, + "loss": 1.0504, + "step": 37715 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825604100087985e-05, + "loss": 0.9562, + "step": 37716 + }, + { + "epoch": 0.04, + "learning_rate": 4.982559483832908e-05, + "loss": 1.0506, + "step": 37717 + }, + { + "epoch": 0.04, + "learning_rate": 4.98255855763251e-05, + "loss": 0.9885, + "step": 37718 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825576314076057e-05, + "loss": 0.8234, + "step": 37719 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825567051581944e-05, + "loss": 0.7615, + "step": 37720 + }, + { + "epoch": 0.04, + "learning_rate": 4.982555778884277e-05, + "loss": 0.8885, + "step": 37721 + }, + { + "epoch": 0.04, + "learning_rate": 4.982554852585852e-05, + "loss": 1.0444, + "step": 37722 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825539262629205e-05, + "loss": 0.9999, + "step": 37723 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825529999154825e-05, + "loss": 0.7871, + "step": 37724 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825520735435375e-05, + "loss": 0.9044, + "step": 37725 + }, + { + "epoch": 0.04, + "learning_rate": 4.982551147147086e-05, + "loss": 0.8813, + "step": 37726 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825502207261276e-05, + "loss": 1.2009, + "step": 37727 + }, + { + "epoch": 0.04, + "learning_rate": 4.982549294280663e-05, + "loss": 1.4119, + "step": 37728 + }, + { + "epoch": 0.04, + "learning_rate": 4.982548367810691e-05, + "loss": 1.0609, + "step": 37729 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825474413162134e-05, + "loss": 1.4825, + "step": 37730 + }, + { + "epoch": 0.04, + "learning_rate": 4.982546514797228e-05, + "loss": 1.1955, + "step": 37731 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825455882537366e-05, + "loss": 1.2237, + "step": 37732 + }, + { + "epoch": 0.04, + "learning_rate": 4.982544661685739e-05, + "loss": 1.0947, + "step": 37733 + }, + { + "epoch": 0.04, + "learning_rate": 4.982543735093234e-05, + "loss": 1.1269, + "step": 37734 + }, + { + "epoch": 0.04, + "learning_rate": 4.982542808476222e-05, + "loss": 0.9634, + "step": 37735 + }, + { + "epoch": 0.04, + "learning_rate": 4.982541881834703e-05, + "loss": 0.9413, + "step": 37736 + }, + { + "epoch": 0.04, + "learning_rate": 4.982540955168679e-05, + "loss": 0.8842, + "step": 37737 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825400284781475e-05, + "loss": 1.0507, + "step": 37738 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825391017631094e-05, + "loss": 0.475, + "step": 37739 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825381750235656e-05, + "loss": 0.2822, + "step": 37740 + }, + { + "epoch": 0.04, + "learning_rate": 4.982537248259515e-05, + "loss": 0.2194, + "step": 37741 + }, + { + "epoch": 0.04, + "learning_rate": 4.982536321470956e-05, + "loss": 0.237, + "step": 37742 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825353946578926e-05, + "loss": 0.1822, + "step": 37743 + }, + { + "epoch": 0.04, + "learning_rate": 4.982534467820322e-05, + "loss": 0.1458, + "step": 37744 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825335409582444e-05, + "loss": 0.916, + "step": 37745 + }, + { + "epoch": 0.04, + "learning_rate": 4.982532614071661e-05, + "loss": 0.8612, + "step": 37746 + }, + { + "epoch": 0.04, + "learning_rate": 4.98253168716057e-05, + "loss": 0.4972, + "step": 37747 + }, + { + "epoch": 0.04, + "learning_rate": 4.982530760224974e-05, + "loss": 0.6401, + "step": 37748 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825298332648704e-05, + "loss": 0.3499, + "step": 37749 + }, + { + "epoch": 0.04, + "learning_rate": 4.98252890628026e-05, + "loss": 0.2411, + "step": 37750 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825279792711445e-05, + "loss": 0.313, + "step": 37751 + }, + { + "epoch": 0.04, + "learning_rate": 4.982527052237521e-05, + "loss": 0.3636, + "step": 37752 + }, + { + "epoch": 0.04, + "learning_rate": 4.982526125179392e-05, + "loss": 0.5383, + "step": 37753 + }, + { + "epoch": 0.04, + "learning_rate": 4.982525198096757e-05, + "loss": 0.5007, + "step": 37754 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825242709896145e-05, + "loss": 0.3702, + "step": 37755 + }, + { + "epoch": 0.04, + "learning_rate": 4.982523343857966e-05, + "loss": 0.2086, + "step": 37756 + }, + { + "epoch": 0.04, + "learning_rate": 4.982522416701812e-05, + "loss": 0.2223, + "step": 37757 + }, + { + "epoch": 0.04, + "learning_rate": 4.98252148952115e-05, + "loss": 0.172, + "step": 37758 + }, + { + "epoch": 0.04, + "learning_rate": 4.982520562315982e-05, + "loss": 0.243, + "step": 37759 + }, + { + "epoch": 0.04, + "learning_rate": 4.982519635086308e-05, + "loss": 0.2865, + "step": 37760 + }, + { + "epoch": 0.04, + "learning_rate": 4.982518707832127e-05, + "loss": 0.1401, + "step": 37761 + }, + { + "epoch": 0.04, + "learning_rate": 4.982517780553441e-05, + "loss": 0.1747, + "step": 37762 + }, + { + "epoch": 0.04, + "learning_rate": 4.982516853250247e-05, + "loss": 0.1912, + "step": 37763 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825159259225475e-05, + "loss": 1.4096, + "step": 37764 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825149985703414e-05, + "loss": 0.992, + "step": 37765 + }, + { + "epoch": 0.04, + "learning_rate": 4.982514071193629e-05, + "loss": 0.9733, + "step": 37766 + }, + { + "epoch": 0.04, + "learning_rate": 4.98251314379241e-05, + "loss": 0.8868, + "step": 37767 + }, + { + "epoch": 0.04, + "learning_rate": 4.982512216366685e-05, + "loss": 1.1001, + "step": 37768 + }, + { + "epoch": 0.04, + "learning_rate": 4.982511288916454e-05, + "loss": 1.2865, + "step": 37769 + }, + { + "epoch": 0.04, + "learning_rate": 4.982510361441716e-05, + "loss": 1.2552, + "step": 37770 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825094339424725e-05, + "loss": 1.2305, + "step": 37771 + }, + { + "epoch": 0.04, + "learning_rate": 4.982508506418722e-05, + "loss": 1.0047, + "step": 37772 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825075788704656e-05, + "loss": 1.1827, + "step": 37773 + }, + { + "epoch": 0.04, + "learning_rate": 4.982506651297702e-05, + "loss": 1.0624, + "step": 37774 + }, + { + "epoch": 0.04, + "learning_rate": 4.982505723700433e-05, + "loss": 1.1467, + "step": 37775 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825047960786585e-05, + "loss": 0.8524, + "step": 37776 + }, + { + "epoch": 0.04, + "learning_rate": 4.982503868432377e-05, + "loss": 1.2205, + "step": 37777 + }, + { + "epoch": 0.04, + "learning_rate": 4.9825029407615894e-05, + "loss": 1.1961, + "step": 37778 + }, + { + "epoch": 0.04, + "learning_rate": 4.982502013066295e-05, + "loss": 1.1125, + "step": 37779 + }, + { + "epoch": 0.04, + "learning_rate": 4.982501085346495e-05, + "loss": 1.2402, + "step": 37780 + }, + { + "epoch": 0.04, + "learning_rate": 4.982500157602189e-05, + "loss": 1.2266, + "step": 37781 + }, + { + "epoch": 0.04, + "learning_rate": 4.982499229833376e-05, + "loss": 1.2703, + "step": 37782 + }, + { + "epoch": 0.04, + "learning_rate": 4.982498302040057e-05, + "loss": 0.8016, + "step": 37783 + }, + { + "epoch": 0.04, + "learning_rate": 4.982497374222233e-05, + "loss": 0.8698, + "step": 37784 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824964463799016e-05, + "loss": 1.0327, + "step": 37785 + }, + { + "epoch": 0.04, + "learning_rate": 4.982495518513064e-05, + "loss": 0.9866, + "step": 37786 + }, + { + "epoch": 0.04, + "learning_rate": 4.982494590621721e-05, + "loss": 1.1241, + "step": 37787 + }, + { + "epoch": 0.04, + "learning_rate": 4.982493662705871e-05, + "loss": 0.985, + "step": 37788 + }, + { + "epoch": 0.04, + "learning_rate": 4.982492734765515e-05, + "loss": 0.8905, + "step": 37789 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824918068006534e-05, + "loss": 1.1972, + "step": 37790 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824908788112854e-05, + "loss": 1.2648, + "step": 37791 + }, + { + "epoch": 0.04, + "learning_rate": 4.982489950797412e-05, + "loss": 1.2568, + "step": 37792 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824890227590317e-05, + "loss": 1.4227, + "step": 37793 + }, + { + "epoch": 0.04, + "learning_rate": 4.982488094696145e-05, + "loss": 1.1169, + "step": 37794 + }, + { + "epoch": 0.04, + "learning_rate": 4.982487166608753e-05, + "loss": 0.9101, + "step": 37795 + }, + { + "epoch": 0.04, + "learning_rate": 4.982486238496855e-05, + "loss": 0.9534, + "step": 37796 + }, + { + "epoch": 0.04, + "learning_rate": 4.98248531036045e-05, + "loss": 0.8965, + "step": 37797 + }, + { + "epoch": 0.04, + "learning_rate": 4.98248438219954e-05, + "loss": 1.0249, + "step": 37798 + }, + { + "epoch": 0.04, + "learning_rate": 4.982483454014123e-05, + "loss": 0.921, + "step": 37799 + }, + { + "epoch": 0.04, + "learning_rate": 4.982482525804201e-05, + "loss": 0.9748, + "step": 37800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824815975697725e-05, + "loss": 1.0824, + "step": 37801 + }, + { + "epoch": 0.04, + "learning_rate": 4.982480669310837e-05, + "loss": 0.9552, + "step": 37802 + }, + { + "epoch": 0.04, + "learning_rate": 4.982479741027397e-05, + "loss": 1.1658, + "step": 37803 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824788127194504e-05, + "loss": 0.9076, + "step": 37804 + }, + { + "epoch": 0.04, + "learning_rate": 4.982477884386998e-05, + "loss": 0.8755, + "step": 37805 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824769560300395e-05, + "loss": 0.8408, + "step": 37806 + }, + { + "epoch": 0.04, + "learning_rate": 4.982476027648575e-05, + "loss": 0.9859, + "step": 37807 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824750992426045e-05, + "loss": 1.2804, + "step": 37808 + }, + { + "epoch": 0.04, + "learning_rate": 4.982474170812128e-05, + "loss": 1.0153, + "step": 37809 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824732423571455e-05, + "loss": 0.9464, + "step": 37810 + }, + { + "epoch": 0.04, + "learning_rate": 4.982472313877658e-05, + "loss": 1.1302, + "step": 37811 + }, + { + "epoch": 0.04, + "learning_rate": 4.982471385373663e-05, + "loss": 1.4169, + "step": 37812 + }, + { + "epoch": 0.04, + "learning_rate": 4.982470456845163e-05, + "loss": 1.1861, + "step": 37813 + }, + { + "epoch": 0.04, + "learning_rate": 4.982469528292157e-05, + "loss": 1.1452, + "step": 37814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824685997146454e-05, + "loss": 1.1937, + "step": 37815 + }, + { + "epoch": 0.04, + "learning_rate": 4.982467671112627e-05, + "loss": 0.7352, + "step": 37816 + }, + { + "epoch": 0.04, + "learning_rate": 4.982466742486104e-05, + "loss": 1.2377, + "step": 37817 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824658138350746e-05, + "loss": 1.1742, + "step": 37818 + }, + { + "epoch": 0.04, + "learning_rate": 4.982464885159539e-05, + "loss": 1.1243, + "step": 37819 + }, + { + "epoch": 0.04, + "learning_rate": 4.982463956459498e-05, + "loss": 1.1449, + "step": 37820 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824630277349506e-05, + "loss": 1.3624, + "step": 37821 + }, + { + "epoch": 0.04, + "learning_rate": 4.982462098985898e-05, + "loss": 0.7169, + "step": 37822 + }, + { + "epoch": 0.04, + "learning_rate": 4.982461170212339e-05, + "loss": 0.9842, + "step": 37823 + }, + { + "epoch": 0.04, + "learning_rate": 4.982460241414275e-05, + "loss": 0.9507, + "step": 37824 + }, + { + "epoch": 0.04, + "learning_rate": 4.982459312591704e-05, + "loss": 0.9085, + "step": 37825 + }, + { + "epoch": 0.04, + "learning_rate": 4.982458383744628e-05, + "loss": 0.9287, + "step": 37826 + }, + { + "epoch": 0.04, + "learning_rate": 4.982457454873046e-05, + "loss": 0.9323, + "step": 37827 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824565259769584e-05, + "loss": 0.7083, + "step": 37828 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824555970563644e-05, + "loss": 1.0151, + "step": 37829 + }, + { + "epoch": 0.04, + "learning_rate": 4.982454668111266e-05, + "loss": 0.9819, + "step": 37830 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824537391416607e-05, + "loss": 1.153, + "step": 37831 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824528101475496e-05, + "loss": 1.1613, + "step": 37832 + }, + { + "epoch": 0.04, + "learning_rate": 4.982451881128934e-05, + "loss": 1.0539, + "step": 37833 + }, + { + "epoch": 0.04, + "learning_rate": 4.982450952085812e-05, + "loss": 1.2756, + "step": 37834 + }, + { + "epoch": 0.04, + "learning_rate": 4.982450023018184e-05, + "loss": 1.1452, + "step": 37835 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824490939260507e-05, + "loss": 0.3709, + "step": 37836 + }, + { + "epoch": 0.04, + "learning_rate": 4.982448164809411e-05, + "loss": 0.3397, + "step": 37837 + }, + { + "epoch": 0.04, + "learning_rate": 4.982447235668266e-05, + "loss": 0.5466, + "step": 37838 + }, + { + "epoch": 0.04, + "learning_rate": 4.982446306502615e-05, + "loss": 0.3393, + "step": 37839 + }, + { + "epoch": 0.04, + "learning_rate": 4.982445377312459e-05, + "loss": 0.9014, + "step": 37840 + }, + { + "epoch": 0.04, + "learning_rate": 4.982444448097797e-05, + "loss": 0.9129, + "step": 37841 + }, + { + "epoch": 0.04, + "learning_rate": 4.98244351885863e-05, + "loss": 0.4141, + "step": 37842 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824425895949566e-05, + "loss": 0.306, + "step": 37843 + }, + { + "epoch": 0.04, + "learning_rate": 4.982441660306778e-05, + "loss": 0.6596, + "step": 37844 + }, + { + "epoch": 0.04, + "learning_rate": 4.982440730994093e-05, + "loss": 1.3893, + "step": 37845 + }, + { + "epoch": 0.04, + "learning_rate": 4.982439801656903e-05, + "loss": 1.1021, + "step": 37846 + }, + { + "epoch": 0.04, + "learning_rate": 4.982438872295208e-05, + "loss": 0.8617, + "step": 37847 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824379429090065e-05, + "loss": 1.0718, + "step": 37848 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824370134983e-05, + "loss": 0.7606, + "step": 37849 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824360840630877e-05, + "loss": 0.9479, + "step": 37850 + }, + { + "epoch": 0.04, + "learning_rate": 4.98243515460337e-05, + "loss": 1.0252, + "step": 37851 + }, + { + "epoch": 0.04, + "learning_rate": 4.982434225119146e-05, + "loss": 1.1432, + "step": 37852 + }, + { + "epoch": 0.04, + "learning_rate": 4.982433295610417e-05, + "loss": 0.9762, + "step": 37853 + }, + { + "epoch": 0.04, + "learning_rate": 4.982432366077182e-05, + "loss": 1.1489, + "step": 37854 + }, + { + "epoch": 0.04, + "learning_rate": 4.982431436519442e-05, + "loss": 1.0301, + "step": 37855 + }, + { + "epoch": 0.04, + "learning_rate": 4.982430506937197e-05, + "loss": 0.9778, + "step": 37856 + }, + { + "epoch": 0.04, + "learning_rate": 4.982429577330445e-05, + "loss": 0.8897, + "step": 37857 + }, + { + "epoch": 0.04, + "learning_rate": 4.982428647699189e-05, + "loss": 1.1369, + "step": 37858 + }, + { + "epoch": 0.04, + "learning_rate": 4.982427718043427e-05, + "loss": 0.7642, + "step": 37859 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824267883631594e-05, + "loss": 1.1878, + "step": 37860 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824258586583864e-05, + "loss": 0.9003, + "step": 37861 + }, + { + "epoch": 0.04, + "learning_rate": 4.982424928929108e-05, + "loss": 1.0892, + "step": 37862 + }, + { + "epoch": 0.04, + "learning_rate": 4.982423999175324e-05, + "loss": 0.8865, + "step": 37863 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824230693970345e-05, + "loss": 0.7417, + "step": 37864 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824221395942394e-05, + "loss": 0.8427, + "step": 37865 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824212097669386e-05, + "loss": 1.3075, + "step": 37866 + }, + { + "epoch": 0.04, + "learning_rate": 4.982420279915133e-05, + "loss": 0.766, + "step": 37867 + }, + { + "epoch": 0.04, + "learning_rate": 4.982419350038822e-05, + "loss": 0.984, + "step": 37868 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824184201380056e-05, + "loss": 1.0708, + "step": 37869 + }, + { + "epoch": 0.04, + "learning_rate": 4.982417490212684e-05, + "loss": 0.8843, + "step": 37870 + }, + { + "epoch": 0.04, + "learning_rate": 4.982416560262857e-05, + "loss": 0.9662, + "step": 37871 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824156302885236e-05, + "loss": 0.8243, + "step": 37872 + }, + { + "epoch": 0.04, + "learning_rate": 4.982414700289686e-05, + "loss": 1.1143, + "step": 37873 + }, + { + "epoch": 0.04, + "learning_rate": 4.982413770266342e-05, + "loss": 1.1289, + "step": 37874 + }, + { + "epoch": 0.04, + "learning_rate": 4.982412840218494e-05, + "loss": 0.6466, + "step": 37875 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824119101461396e-05, + "loss": 0.9889, + "step": 37876 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824109800492804e-05, + "loss": 0.8796, + "step": 37877 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824100499279156e-05, + "loss": 0.9545, + "step": 37878 + }, + { + "epoch": 0.04, + "learning_rate": 4.982409119782046e-05, + "loss": 1.0508, + "step": 37879 + }, + { + "epoch": 0.04, + "learning_rate": 4.98240818961167e-05, + "loss": 0.941, + "step": 37880 + }, + { + "epoch": 0.04, + "learning_rate": 4.98240725941679e-05, + "loss": 1.1045, + "step": 37881 + }, + { + "epoch": 0.04, + "learning_rate": 4.982406329197404e-05, + "loss": 1.2596, + "step": 37882 + }, + { + "epoch": 0.04, + "learning_rate": 4.982405398953513e-05, + "loss": 0.9948, + "step": 37883 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824044686851166e-05, + "loss": 1.1863, + "step": 37884 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824035383922154e-05, + "loss": 1.3526, + "step": 37885 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824026080748085e-05, + "loss": 1.455, + "step": 37886 + }, + { + "epoch": 0.04, + "learning_rate": 4.9824016777328966e-05, + "loss": 1.0095, + "step": 37887 + }, + { + "epoch": 0.04, + "learning_rate": 4.982400747366479e-05, + "loss": 0.8219, + "step": 37888 + }, + { + "epoch": 0.04, + "learning_rate": 4.982399816975556e-05, + "loss": 1.1278, + "step": 37889 + }, + { + "epoch": 0.04, + "learning_rate": 4.982398886560129e-05, + "loss": 0.8639, + "step": 37890 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823979561201956e-05, + "loss": 0.7878, + "step": 37891 + }, + { + "epoch": 0.04, + "learning_rate": 4.982397025655757e-05, + "loss": 1.1648, + "step": 37892 + }, + { + "epoch": 0.04, + "learning_rate": 4.982396095166814e-05, + "loss": 0.8872, + "step": 37893 + }, + { + "epoch": 0.04, + "learning_rate": 4.982395164653366e-05, + "loss": 0.8958, + "step": 37894 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823942341154125e-05, + "loss": 0.9464, + "step": 37895 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823933035529535e-05, + "loss": 0.8305, + "step": 37896 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823923729659895e-05, + "loss": 0.6779, + "step": 37897 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823914423545206e-05, + "loss": 1.0682, + "step": 37898 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823905117185466e-05, + "loss": 1.165, + "step": 37899 + }, + { + "epoch": 0.04, + "learning_rate": 4.982389581058067e-05, + "loss": 1.0467, + "step": 37900 + }, + { + "epoch": 0.04, + "learning_rate": 4.982388650373083e-05, + "loss": 0.9446, + "step": 37901 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823877196635934e-05, + "loss": 0.7909, + "step": 37902 + }, + { + "epoch": 0.04, + "learning_rate": 4.982386788929598e-05, + "loss": 1.385, + "step": 37903 + }, + { + "epoch": 0.04, + "learning_rate": 4.982385858171099e-05, + "loss": 1.0452, + "step": 37904 + }, + { + "epoch": 0.04, + "learning_rate": 4.982384927388094e-05, + "loss": 1.05, + "step": 37905 + }, + { + "epoch": 0.04, + "learning_rate": 4.982383996580584e-05, + "loss": 0.9792, + "step": 37906 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823830657485694e-05, + "loss": 0.7622, + "step": 37907 + }, + { + "epoch": 0.04, + "learning_rate": 4.98238213489205e-05, + "loss": 0.9241, + "step": 37908 + }, + { + "epoch": 0.04, + "learning_rate": 4.982381204011025e-05, + "loss": 1.2248, + "step": 37909 + }, + { + "epoch": 0.04, + "learning_rate": 4.982380273105495e-05, + "loss": 0.7885, + "step": 37910 + }, + { + "epoch": 0.04, + "learning_rate": 4.98237934217546e-05, + "loss": 1.0127, + "step": 37911 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823784112209204e-05, + "loss": 1.1456, + "step": 37912 + }, + { + "epoch": 0.04, + "learning_rate": 4.982377480241876e-05, + "loss": 1.2071, + "step": 37913 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823765492383254e-05, + "loss": 0.7298, + "step": 37914 + }, + { + "epoch": 0.04, + "learning_rate": 4.982375618210271e-05, + "loss": 0.7436, + "step": 37915 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823746871577104e-05, + "loss": 1.2327, + "step": 37916 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823737560806464e-05, + "loss": 0.8231, + "step": 37917 + }, + { + "epoch": 0.04, + "learning_rate": 4.982372824979076e-05, + "loss": 1.4733, + "step": 37918 + }, + { + "epoch": 0.04, + "learning_rate": 4.982371893853002e-05, + "loss": 0.7967, + "step": 37919 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823709627024216e-05, + "loss": 1.3101, + "step": 37920 + }, + { + "epoch": 0.04, + "learning_rate": 4.982370031527337e-05, + "loss": 0.8584, + "step": 37921 + }, + { + "epoch": 0.04, + "learning_rate": 4.982369100327748e-05, + "loss": 1.1106, + "step": 37922 + }, + { + "epoch": 0.04, + "learning_rate": 4.982368169103653e-05, + "loss": 0.9103, + "step": 37923 + }, + { + "epoch": 0.04, + "learning_rate": 4.982367237855054e-05, + "loss": 1.0471, + "step": 37924 + }, + { + "epoch": 0.04, + "learning_rate": 4.98236630658195e-05, + "loss": 0.8692, + "step": 37925 + }, + { + "epoch": 0.04, + "learning_rate": 4.982365375284341e-05, + "loss": 1.2036, + "step": 37926 + }, + { + "epoch": 0.04, + "learning_rate": 4.982364443962227e-05, + "loss": 0.4632, + "step": 37927 + }, + { + "epoch": 0.04, + "learning_rate": 4.982363512615609e-05, + "loss": 0.2654, + "step": 37928 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823625812444855e-05, + "loss": 0.1399, + "step": 37929 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823616498488565e-05, + "loss": 0.1971, + "step": 37930 + }, + { + "epoch": 0.04, + "learning_rate": 4.982360718428724e-05, + "loss": 0.1327, + "step": 37931 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823597869840855e-05, + "loss": 0.0909, + "step": 37932 + }, + { + "epoch": 0.04, + "learning_rate": 4.982358855514943e-05, + "loss": 0.1521, + "step": 37933 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823579240212944e-05, + "loss": 0.2204, + "step": 37934 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823569925031424e-05, + "loss": 1.0943, + "step": 37935 + }, + { + "epoch": 0.04, + "learning_rate": 4.982356060960485e-05, + "loss": 0.6719, + "step": 37936 + }, + { + "epoch": 0.04, + "learning_rate": 4.982355129393323e-05, + "loss": 0.9924, + "step": 37937 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823541978016564e-05, + "loss": 0.9906, + "step": 37938 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823532661854844e-05, + "loss": 1.1826, + "step": 37939 + }, + { + "epoch": 0.04, + "learning_rate": 4.982352334544809e-05, + "loss": 0.8392, + "step": 37940 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823514028796274e-05, + "loss": 0.9719, + "step": 37941 + }, + { + "epoch": 0.04, + "learning_rate": 4.982350471189942e-05, + "loss": 0.8721, + "step": 37942 + }, + { + "epoch": 0.04, + "learning_rate": 4.982349539475751e-05, + "loss": 1.2905, + "step": 37943 + }, + { + "epoch": 0.04, + "learning_rate": 4.982348607737056e-05, + "loss": 1.2568, + "step": 37944 + }, + { + "epoch": 0.04, + "learning_rate": 4.982347675973856e-05, + "loss": 0.9812, + "step": 37945 + }, + { + "epoch": 0.04, + "learning_rate": 4.982346744186152e-05, + "loss": 0.9766, + "step": 37946 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823458123739425e-05, + "loss": 0.9826, + "step": 37947 + }, + { + "epoch": 0.04, + "learning_rate": 4.982344880537229e-05, + "loss": 0.7436, + "step": 37948 + }, + { + "epoch": 0.04, + "learning_rate": 4.98234394867601e-05, + "loss": 1.0601, + "step": 37949 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823430167902866e-05, + "loss": 0.9752, + "step": 37950 + }, + { + "epoch": 0.04, + "learning_rate": 4.982342084880059e-05, + "loss": 0.8136, + "step": 37951 + }, + { + "epoch": 0.04, + "learning_rate": 4.982341152945327e-05, + "loss": 0.95, + "step": 37952 + }, + { + "epoch": 0.04, + "learning_rate": 4.982340220986089e-05, + "loss": 1.1965, + "step": 37953 + }, + { + "epoch": 0.04, + "learning_rate": 4.982339289002348e-05, + "loss": 0.8611, + "step": 37954 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823383569941014e-05, + "loss": 1.0006, + "step": 37955 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823374249613505e-05, + "loss": 1.3582, + "step": 37956 + }, + { + "epoch": 0.04, + "learning_rate": 4.982336492904095e-05, + "loss": 1.1068, + "step": 37957 + }, + { + "epoch": 0.04, + "learning_rate": 4.982335560822334e-05, + "loss": 1.0497, + "step": 37958 + }, + { + "epoch": 0.04, + "learning_rate": 4.98233462871607e-05, + "loss": 1.0934, + "step": 37959 + }, + { + "epoch": 0.04, + "learning_rate": 4.982333696585301e-05, + "loss": 1.0228, + "step": 37960 + }, + { + "epoch": 0.04, + "learning_rate": 4.982332764430027e-05, + "loss": 0.83, + "step": 37961 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823318322502486e-05, + "loss": 0.8178, + "step": 37962 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823309000459653e-05, + "loss": 0.9877, + "step": 37963 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823299678171784e-05, + "loss": 0.8448, + "step": 37964 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823290355638865e-05, + "loss": 0.7719, + "step": 37965 + }, + { + "epoch": 0.04, + "learning_rate": 4.98232810328609e-05, + "loss": 1.0936, + "step": 37966 + }, + { + "epoch": 0.04, + "learning_rate": 4.982327170983789e-05, + "loss": 0.9873, + "step": 37967 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823262386569836e-05, + "loss": 0.7429, + "step": 37968 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823253063056744e-05, + "loss": 1.1295, + "step": 37969 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823243739298595e-05, + "loss": 1.2669, + "step": 37970 + }, + { + "epoch": 0.04, + "learning_rate": 4.98232344152954e-05, + "loss": 1.0981, + "step": 37971 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823225091047174e-05, + "loss": 1.1248, + "step": 37972 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823215766553896e-05, + "loss": 1.2389, + "step": 37973 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823206441815574e-05, + "loss": 1.027, + "step": 37974 + }, + { + "epoch": 0.04, + "learning_rate": 4.982319711683221e-05, + "loss": 0.9174, + "step": 37975 + }, + { + "epoch": 0.04, + "learning_rate": 4.98231877916038e-05, + "loss": 0.8941, + "step": 37976 + }, + { + "epoch": 0.04, + "learning_rate": 4.982317846613035e-05, + "loss": 0.9427, + "step": 37977 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823169140411855e-05, + "loss": 1.085, + "step": 37978 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823159814448303e-05, + "loss": 1.056, + "step": 37979 + }, + { + "epoch": 0.04, + "learning_rate": 4.982315048823972e-05, + "loss": 1.0494, + "step": 37980 + }, + { + "epoch": 0.04, + "learning_rate": 4.982314116178609e-05, + "loss": 1.1379, + "step": 37981 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823131835087423e-05, + "loss": 1.3106, + "step": 37982 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823122508143706e-05, + "loss": 1.2157, + "step": 37983 + }, + { + "epoch": 0.04, + "learning_rate": 4.982311318095494e-05, + "loss": 1.117, + "step": 37984 + }, + { + "epoch": 0.04, + "learning_rate": 4.982310385352114e-05, + "loss": 1.0977, + "step": 37985 + }, + { + "epoch": 0.04, + "learning_rate": 4.982309452584229e-05, + "loss": 1.1357, + "step": 37986 + }, + { + "epoch": 0.04, + "learning_rate": 4.982308519791841e-05, + "loss": 1.1279, + "step": 37987 + }, + { + "epoch": 0.04, + "learning_rate": 4.982307586974947e-05, + "loss": 1.1939, + "step": 37988 + }, + { + "epoch": 0.04, + "learning_rate": 4.982306654133549e-05, + "loss": 1.0487, + "step": 37989 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823057212676485e-05, + "loss": 1.006, + "step": 37990 + }, + { + "epoch": 0.04, + "learning_rate": 4.982304788377242e-05, + "loss": 1.0055, + "step": 37991 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823038554623314e-05, + "loss": 1.4001, + "step": 37992 + }, + { + "epoch": 0.04, + "learning_rate": 4.982302922522917e-05, + "loss": 0.9525, + "step": 37993 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823019895589984e-05, + "loss": 0.9925, + "step": 37994 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823010565705754e-05, + "loss": 0.9272, + "step": 37995 + }, + { + "epoch": 0.04, + "learning_rate": 4.9823001235576475e-05, + "loss": 0.5647, + "step": 37996 + }, + { + "epoch": 0.04, + "learning_rate": 4.982299190520216e-05, + "loss": 0.2192, + "step": 37997 + }, + { + "epoch": 0.04, + "learning_rate": 4.98229825745828e-05, + "loss": 0.1178, + "step": 37998 + }, + { + "epoch": 0.04, + "learning_rate": 4.982297324371841e-05, + "loss": 0.1188, + "step": 37999 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822963912608963e-05, + "loss": 0.1006, + "step": 38000 + }, + { + "epoch": 0.04, + "eval_loss": 1.0789852142333984, + "eval_runtime": 126.8511, + "eval_samples_per_second": 10.918, + "eval_steps_per_second": 5.463, + "step": 38000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822954581254474e-05, + "loss": 0.12, + "step": 38001 + }, + { + "epoch": 0.04, + "learning_rate": 4.982294524965495e-05, + "loss": 0.0891, + "step": 38002 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822935917810386e-05, + "loss": 0.1732, + "step": 38003 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822926585720774e-05, + "loss": 0.0763, + "step": 38004 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822917253386125e-05, + "loss": 0.1188, + "step": 38005 + }, + { + "epoch": 0.04, + "learning_rate": 4.982290792080644e-05, + "loss": 0.0688, + "step": 38006 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822898587981705e-05, + "loss": 0.098, + "step": 38007 + }, + { + "epoch": 0.04, + "learning_rate": 4.982288925491193e-05, + "loss": 0.1234, + "step": 38008 + }, + { + "epoch": 0.04, + "learning_rate": 4.982287992159712e-05, + "loss": 0.0912, + "step": 38009 + }, + { + "epoch": 0.04, + "learning_rate": 4.982287058803726e-05, + "loss": 0.0985, + "step": 38010 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822861254232365e-05, + "loss": 0.2144, + "step": 38011 + }, + { + "epoch": 0.04, + "learning_rate": 4.982285192018242e-05, + "loss": 0.1565, + "step": 38012 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822842585887446e-05, + "loss": 0.1193, + "step": 38013 + }, + { + "epoch": 0.04, + "learning_rate": 4.982283325134743e-05, + "loss": 0.0553, + "step": 38014 + }, + { + "epoch": 0.04, + "learning_rate": 4.982282391656237e-05, + "loss": 0.0748, + "step": 38015 + }, + { + "epoch": 0.04, + "learning_rate": 4.982281458153227e-05, + "loss": 0.0539, + "step": 38016 + }, + { + "epoch": 0.04, + "learning_rate": 4.982280524625713e-05, + "loss": 0.0743, + "step": 38017 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822795910736946e-05, + "loss": 0.051, + "step": 38018 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822786574971726e-05, + "loss": 0.055, + "step": 38019 + }, + { + "epoch": 0.04, + "learning_rate": 4.982277723896146e-05, + "loss": 0.1481, + "step": 38020 + }, + { + "epoch": 0.04, + "learning_rate": 4.982276790270617e-05, + "loss": 0.1237, + "step": 38021 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822758566205826e-05, + "loss": 0.1172, + "step": 38022 + }, + { + "epoch": 0.04, + "learning_rate": 4.982274922946044e-05, + "loss": 0.2439, + "step": 38023 + }, + { + "epoch": 0.04, + "learning_rate": 4.982273989247003e-05, + "loss": 0.2164, + "step": 38024 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822730555234564e-05, + "loss": 0.1949, + "step": 38025 + }, + { + "epoch": 0.04, + "learning_rate": 4.982272121775406e-05, + "loss": 0.1243, + "step": 38026 + }, + { + "epoch": 0.04, + "learning_rate": 4.982271188002853e-05, + "loss": 0.4218, + "step": 38027 + }, + { + "epoch": 0.04, + "learning_rate": 4.982270254205795e-05, + "loss": 0.5473, + "step": 38028 + }, + { + "epoch": 0.04, + "learning_rate": 4.982269320384233e-05, + "loss": 0.108, + "step": 38029 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822683865381666e-05, + "loss": 0.111, + "step": 38030 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822674526675975e-05, + "loss": 0.06, + "step": 38031 + }, + { + "epoch": 0.04, + "learning_rate": 4.982266518772524e-05, + "loss": 0.099, + "step": 38032 + }, + { + "epoch": 0.04, + "learning_rate": 4.982265584852947e-05, + "loss": 0.0797, + "step": 38033 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822646509088654e-05, + "loss": 0.0541, + "step": 38034 + }, + { + "epoch": 0.04, + "learning_rate": 4.982263716940281e-05, + "loss": 0.3332, + "step": 38035 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822627829471915e-05, + "loss": 0.8138, + "step": 38036 + }, + { + "epoch": 0.04, + "learning_rate": 4.982261848929599e-05, + "loss": 0.9497, + "step": 38037 + }, + { + "epoch": 0.04, + "learning_rate": 4.982260914887502e-05, + "loss": 0.8052, + "step": 38038 + }, + { + "epoch": 0.04, + "learning_rate": 4.982259980820901e-05, + "loss": 0.82, + "step": 38039 + }, + { + "epoch": 0.04, + "learning_rate": 4.982259046729797e-05, + "loss": 0.8285, + "step": 38040 + }, + { + "epoch": 0.04, + "learning_rate": 4.982258112614189e-05, + "loss": 1.3069, + "step": 38041 + }, + { + "epoch": 0.04, + "learning_rate": 4.982257178474077e-05, + "loss": 1.3065, + "step": 38042 + }, + { + "epoch": 0.04, + "learning_rate": 4.982256244309461e-05, + "loss": 1.0987, + "step": 38043 + }, + { + "epoch": 0.04, + "learning_rate": 4.982255310120342e-05, + "loss": 1.144, + "step": 38044 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822543759067184e-05, + "loss": 1.0826, + "step": 38045 + }, + { + "epoch": 0.04, + "learning_rate": 4.982253441668592e-05, + "loss": 1.4089, + "step": 38046 + }, + { + "epoch": 0.04, + "learning_rate": 4.982252507405961e-05, + "loss": 1.3401, + "step": 38047 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822515731188256e-05, + "loss": 1.1262, + "step": 38048 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822506388071874e-05, + "loss": 1.0422, + "step": 38049 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822497044710456e-05, + "loss": 1.1504, + "step": 38050 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822487701104e-05, + "loss": 0.8354, + "step": 38051 + }, + { + "epoch": 0.04, + "learning_rate": 4.98224783572525e-05, + "loss": 1.3611, + "step": 38052 + }, + { + "epoch": 0.04, + "learning_rate": 4.982246901315597e-05, + "loss": 0.9502, + "step": 38053 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822459668814404e-05, + "loss": 1.0657, + "step": 38054 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822450324227796e-05, + "loss": 0.8627, + "step": 38055 + }, + { + "epoch": 0.04, + "learning_rate": 4.982244097939616e-05, + "loss": 0.9574, + "step": 38056 + }, + { + "epoch": 0.04, + "learning_rate": 4.982243163431948e-05, + "loss": 1.1658, + "step": 38057 + }, + { + "epoch": 0.04, + "learning_rate": 4.982242228899776e-05, + "loss": 1.1974, + "step": 38058 + }, + { + "epoch": 0.04, + "learning_rate": 4.982241294343101e-05, + "loss": 0.8426, + "step": 38059 + }, + { + "epoch": 0.04, + "learning_rate": 4.982240359761922e-05, + "loss": 1.0038, + "step": 38060 + }, + { + "epoch": 0.04, + "learning_rate": 4.982239425156239e-05, + "loss": 1.0122, + "step": 38061 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822384905260535e-05, + "loss": 1.026, + "step": 38062 + }, + { + "epoch": 0.04, + "learning_rate": 4.982237555871363e-05, + "loss": 1.1412, + "step": 38063 + }, + { + "epoch": 0.04, + "learning_rate": 4.98223662119217e-05, + "loss": 0.9071, + "step": 38064 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822356864884735e-05, + "loss": 0.9825, + "step": 38065 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822347517602726e-05, + "loss": 1.2179, + "step": 38066 + }, + { + "epoch": 0.04, + "learning_rate": 4.982233817007568e-05, + "loss": 1.4152, + "step": 38067 + }, + { + "epoch": 0.04, + "learning_rate": 4.982232882230361e-05, + "loss": 0.8876, + "step": 38068 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822319474286494e-05, + "loss": 1.0453, + "step": 38069 + }, + { + "epoch": 0.04, + "learning_rate": 4.982231012602435e-05, + "loss": 0.9065, + "step": 38070 + }, + { + "epoch": 0.04, + "learning_rate": 4.982230077751716e-05, + "loss": 1.0494, + "step": 38071 + }, + { + "epoch": 0.04, + "learning_rate": 4.982229142876494e-05, + "loss": 0.9769, + "step": 38072 + }, + { + "epoch": 0.04, + "learning_rate": 4.982228207976769e-05, + "loss": 1.093, + "step": 38073 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822272730525404e-05, + "loss": 1.0402, + "step": 38074 + }, + { + "epoch": 0.04, + "learning_rate": 4.982226338103808e-05, + "loss": 1.1817, + "step": 38075 + }, + { + "epoch": 0.04, + "learning_rate": 4.982225403130572e-05, + "loss": 0.9066, + "step": 38076 + }, + { + "epoch": 0.04, + "learning_rate": 4.982224468132833e-05, + "loss": 1.0534, + "step": 38077 + }, + { + "epoch": 0.04, + "learning_rate": 4.98222353311059e-05, + "loss": 0.9999, + "step": 38078 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822225980638436e-05, + "loss": 1.0229, + "step": 38079 + }, + { + "epoch": 0.04, + "learning_rate": 4.982221662992594e-05, + "loss": 1.1281, + "step": 38080 + }, + { + "epoch": 0.04, + "learning_rate": 4.982220727896841e-05, + "loss": 1.0489, + "step": 38081 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822197927765843e-05, + "loss": 1.0261, + "step": 38082 + }, + { + "epoch": 0.04, + "learning_rate": 4.982218857631824e-05, + "loss": 1.0254, + "step": 38083 + }, + { + "epoch": 0.04, + "learning_rate": 4.982217922462561e-05, + "loss": 1.3157, + "step": 38084 + }, + { + "epoch": 0.04, + "learning_rate": 4.982216987268794e-05, + "loss": 0.9929, + "step": 38085 + }, + { + "epoch": 0.04, + "learning_rate": 4.982216052050523e-05, + "loss": 1.084, + "step": 38086 + }, + { + "epoch": 0.04, + "learning_rate": 4.98221511680775e-05, + "loss": 1.3969, + "step": 38087 + }, + { + "epoch": 0.04, + "learning_rate": 4.982214181540473e-05, + "loss": 0.9169, + "step": 38088 + }, + { + "epoch": 0.04, + "learning_rate": 4.982213246248693e-05, + "loss": 1.1104, + "step": 38089 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822123109324084e-05, + "loss": 1.0561, + "step": 38090 + }, + { + "epoch": 0.04, + "learning_rate": 4.982211375591621e-05, + "loss": 1.0665, + "step": 38091 + }, + { + "epoch": 0.04, + "learning_rate": 4.982210440226331e-05, + "loss": 1.2545, + "step": 38092 + }, + { + "epoch": 0.04, + "learning_rate": 4.982209504836537e-05, + "loss": 1.0088, + "step": 38093 + }, + { + "epoch": 0.04, + "learning_rate": 4.98220856942224e-05, + "loss": 0.818, + "step": 38094 + }, + { + "epoch": 0.04, + "learning_rate": 4.982207633983439e-05, + "loss": 0.5331, + "step": 38095 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822066985201354e-05, + "loss": 1.3049, + "step": 38096 + }, + { + "epoch": 0.04, + "learning_rate": 4.982205763032328e-05, + "loss": 0.9678, + "step": 38097 + }, + { + "epoch": 0.04, + "learning_rate": 4.982204827520018e-05, + "loss": 0.9624, + "step": 38098 + }, + { + "epoch": 0.04, + "learning_rate": 4.982203891983205e-05, + "loss": 1.0975, + "step": 38099 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822029564218876e-05, + "loss": 1.1626, + "step": 38100 + }, + { + "epoch": 0.04, + "learning_rate": 4.982202020836068e-05, + "loss": 0.8275, + "step": 38101 + }, + { + "epoch": 0.04, + "learning_rate": 4.982201085225744e-05, + "loss": 0.5216, + "step": 38102 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822001495909176e-05, + "loss": 1.1174, + "step": 38103 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821992139315876e-05, + "loss": 1.039, + "step": 38104 + }, + { + "epoch": 0.04, + "learning_rate": 4.982198278247754e-05, + "loss": 1.1559, + "step": 38105 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821973425394174e-05, + "loss": 1.1449, + "step": 38106 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821964068065785e-05, + "loss": 0.9628, + "step": 38107 + }, + { + "epoch": 0.04, + "learning_rate": 4.982195471049236e-05, + "loss": 1.15, + "step": 38108 + }, + { + "epoch": 0.04, + "learning_rate": 4.982194535267389e-05, + "loss": 0.9535, + "step": 38109 + }, + { + "epoch": 0.04, + "learning_rate": 4.98219359946104e-05, + "loss": 1.1012, + "step": 38110 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821926636301886e-05, + "loss": 1.005, + "step": 38111 + }, + { + "epoch": 0.04, + "learning_rate": 4.982191727774833e-05, + "loss": 0.9317, + "step": 38112 + }, + { + "epoch": 0.04, + "learning_rate": 4.982190791894974e-05, + "loss": 1.1276, + "step": 38113 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821898559906125e-05, + "loss": 0.9433, + "step": 38114 + }, + { + "epoch": 0.04, + "learning_rate": 4.982188920061748e-05, + "loss": 0.8623, + "step": 38115 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821879841083795e-05, + "loss": 0.902, + "step": 38116 + }, + { + "epoch": 0.04, + "learning_rate": 4.982187048130509e-05, + "loss": 1.5502, + "step": 38117 + }, + { + "epoch": 0.04, + "learning_rate": 4.982186112128135e-05, + "loss": 1.7813, + "step": 38118 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821851761012575e-05, + "loss": 1.9267, + "step": 38119 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821842400498766e-05, + "loss": 1.3723, + "step": 38120 + }, + { + "epoch": 0.04, + "learning_rate": 4.982183303973993e-05, + "loss": 1.5272, + "step": 38121 + }, + { + "epoch": 0.04, + "learning_rate": 4.982182367873607e-05, + "loss": 0.7117, + "step": 38122 + }, + { + "epoch": 0.04, + "learning_rate": 4.982181431748717e-05, + "loss": 1.0498, + "step": 38123 + }, + { + "epoch": 0.04, + "learning_rate": 4.982180495599325e-05, + "loss": 1.1127, + "step": 38124 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821795594254285e-05, + "loss": 1.1161, + "step": 38125 + }, + { + "epoch": 0.04, + "learning_rate": 4.98217862322703e-05, + "loss": 1.2453, + "step": 38126 + }, + { + "epoch": 0.04, + "learning_rate": 4.982177687004128e-05, + "loss": 1.2939, + "step": 38127 + }, + { + "epoch": 0.04, + "learning_rate": 4.982176750756723e-05, + "loss": 0.8829, + "step": 38128 + }, + { + "epoch": 0.04, + "learning_rate": 4.982175814484816e-05, + "loss": 1.0575, + "step": 38129 + }, + { + "epoch": 0.04, + "learning_rate": 4.982174878188405e-05, + "loss": 1.0802, + "step": 38130 + }, + { + "epoch": 0.04, + "learning_rate": 4.982173941867491e-05, + "loss": 0.945, + "step": 38131 + }, + { + "epoch": 0.04, + "learning_rate": 4.982173005522075e-05, + "loss": 0.8847, + "step": 38132 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821720691521546e-05, + "loss": 0.9847, + "step": 38133 + }, + { + "epoch": 0.04, + "learning_rate": 4.982171132757732e-05, + "loss": 1.1142, + "step": 38134 + }, + { + "epoch": 0.04, + "learning_rate": 4.982170196338807e-05, + "loss": 1.0618, + "step": 38135 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821692598953784e-05, + "loss": 0.8979, + "step": 38136 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821683234274464e-05, + "loss": 0.7951, + "step": 38137 + }, + { + "epoch": 0.04, + "learning_rate": 4.982167386935012e-05, + "loss": 1.0714, + "step": 38138 + }, + { + "epoch": 0.04, + "learning_rate": 4.982166450418075e-05, + "loss": 0.9983, + "step": 38139 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821655138766346e-05, + "loss": 1.155, + "step": 38140 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821645773106914e-05, + "loss": 0.7751, + "step": 38141 + }, + { + "epoch": 0.04, + "learning_rate": 4.982163640720245e-05, + "loss": 0.8457, + "step": 38142 + }, + { + "epoch": 0.04, + "learning_rate": 4.982162704105297e-05, + "loss": 0.9632, + "step": 38143 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821617674658447e-05, + "loss": 0.9474, + "step": 38144 + }, + { + "epoch": 0.04, + "learning_rate": 4.982160830801891e-05, + "loss": 0.9177, + "step": 38145 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821598941134335e-05, + "loss": 0.9551, + "step": 38146 + }, + { + "epoch": 0.04, + "learning_rate": 4.982158957400473e-05, + "loss": 1.0378, + "step": 38147 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821580206630106e-05, + "loss": 1.056, + "step": 38148 + }, + { + "epoch": 0.04, + "learning_rate": 4.982157083901044e-05, + "loss": 0.9823, + "step": 38149 + }, + { + "epoch": 0.04, + "learning_rate": 4.982156147114576e-05, + "loss": 0.7644, + "step": 38150 + }, + { + "epoch": 0.04, + "learning_rate": 4.982155210303604e-05, + "loss": 0.9333, + "step": 38151 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821542734681297e-05, + "loss": 0.5851, + "step": 38152 + }, + { + "epoch": 0.04, + "learning_rate": 4.982153336608153e-05, + "loss": 1.0811, + "step": 38153 + }, + { + "epoch": 0.04, + "learning_rate": 4.982152399723673e-05, + "loss": 1.0723, + "step": 38154 + }, + { + "epoch": 0.04, + "learning_rate": 4.98215146281469e-05, + "loss": 3.9783, + "step": 38155 + }, + { + "epoch": 0.04, + "learning_rate": 4.982150525881205e-05, + "loss": 3.2295, + "step": 38156 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821495889232166e-05, + "loss": 2.426, + "step": 38157 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821486519407257e-05, + "loss": 2.6046, + "step": 38158 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821477149337324e-05, + "loss": 3.1165, + "step": 38159 + }, + { + "epoch": 0.04, + "learning_rate": 4.982146777902236e-05, + "loss": 3.1365, + "step": 38160 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821458408462376e-05, + "loss": 3.9309, + "step": 38161 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821449037657355e-05, + "loss": 3.2747, + "step": 38162 + }, + { + "epoch": 0.04, + "learning_rate": 4.982143966660731e-05, + "loss": 3.3296, + "step": 38163 + }, + { + "epoch": 0.04, + "learning_rate": 4.982143029531224e-05, + "loss": 2.5927, + "step": 38164 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821420923772145e-05, + "loss": 3.1314, + "step": 38165 + }, + { + "epoch": 0.04, + "learning_rate": 4.982141155198702e-05, + "loss": 2.8503, + "step": 38166 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821402179956875e-05, + "loss": 3.0498, + "step": 38167 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821392807681696e-05, + "loss": 2.6347, + "step": 38168 + }, + { + "epoch": 0.04, + "learning_rate": 4.982138343516149e-05, + "loss": 3.356, + "step": 38169 + }, + { + "epoch": 0.04, + "learning_rate": 4.982137406239626e-05, + "loss": 3.0016, + "step": 38170 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821364689386006e-05, + "loss": 2.3588, + "step": 38171 + }, + { + "epoch": 0.04, + "learning_rate": 4.982135531613072e-05, + "loss": 2.7685, + "step": 38172 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821345942630414e-05, + "loss": 3.4255, + "step": 38173 + }, + { + "epoch": 0.04, + "learning_rate": 4.982133656888508e-05, + "loss": 2.5384, + "step": 38174 + }, + { + "epoch": 0.04, + "learning_rate": 4.982132719489473e-05, + "loss": 5.8486, + "step": 38175 + }, + { + "epoch": 0.04, + "learning_rate": 4.982131782065934e-05, + "loss": 5.9549, + "step": 38176 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821308446178934e-05, + "loss": 5.9054, + "step": 38177 + }, + { + "epoch": 0.04, + "learning_rate": 4.98212990714535e-05, + "loss": 5.8674, + "step": 38178 + }, + { + "epoch": 0.04, + "learning_rate": 4.982128969648303e-05, + "loss": 5.8574, + "step": 38179 + }, + { + "epoch": 0.04, + "learning_rate": 4.982128032126755e-05, + "loss": 5.7708, + "step": 38180 + }, + { + "epoch": 0.04, + "learning_rate": 4.982127094580703e-05, + "loss": 5.8301, + "step": 38181 + }, + { + "epoch": 0.04, + "learning_rate": 4.98212615701015e-05, + "loss": 5.8647, + "step": 38182 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821252194150935e-05, + "loss": 5.8378, + "step": 38183 + }, + { + "epoch": 0.04, + "learning_rate": 4.982124281795535e-05, + "loss": 5.8297, + "step": 38184 + }, + { + "epoch": 0.04, + "learning_rate": 4.982123344151474e-05, + "loss": 5.851, + "step": 38185 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821224064829106e-05, + "loss": 5.7983, + "step": 38186 + }, + { + "epoch": 0.04, + "learning_rate": 4.982121468789844e-05, + "loss": 5.7924, + "step": 38187 + }, + { + "epoch": 0.04, + "learning_rate": 4.982120531072276e-05, + "loss": 5.7823, + "step": 38188 + }, + { + "epoch": 0.04, + "learning_rate": 4.982119593330204e-05, + "loss": 5.8203, + "step": 38189 + }, + { + "epoch": 0.04, + "learning_rate": 4.982118655563631e-05, + "loss": 5.4615, + "step": 38190 + }, + { + "epoch": 0.04, + "learning_rate": 4.982117717772555e-05, + "loss": 0.5152, + "step": 38191 + }, + { + "epoch": 0.04, + "learning_rate": 4.982116779956977e-05, + "loss": 0.5855, + "step": 38192 + }, + { + "epoch": 0.04, + "learning_rate": 4.982115842116897e-05, + "loss": 1.1657, + "step": 38193 + }, + { + "epoch": 0.04, + "learning_rate": 4.982114904252313e-05, + "loss": 1.0176, + "step": 38194 + }, + { + "epoch": 0.04, + "learning_rate": 4.982113966363228e-05, + "loss": 0.8379, + "step": 38195 + }, + { + "epoch": 0.04, + "learning_rate": 4.98211302844964e-05, + "loss": 1.3808, + "step": 38196 + }, + { + "epoch": 0.04, + "learning_rate": 4.982112090511549e-05, + "loss": 1.151, + "step": 38197 + }, + { + "epoch": 0.04, + "learning_rate": 4.982111152548957e-05, + "loss": 0.8669, + "step": 38198 + }, + { + "epoch": 0.04, + "learning_rate": 4.982110214561862e-05, + "loss": 0.7616, + "step": 38199 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821092765502646e-05, + "loss": 1.221, + "step": 38200 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821083385141656e-05, + "loss": 0.9557, + "step": 38201 + }, + { + "epoch": 0.04, + "learning_rate": 4.982107400453563e-05, + "loss": 1.1799, + "step": 38202 + }, + { + "epoch": 0.04, + "learning_rate": 4.982106462368459e-05, + "loss": 0.9695, + "step": 38203 + }, + { + "epoch": 0.04, + "learning_rate": 4.982105524258852e-05, + "loss": 1.0699, + "step": 38204 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821045861247427e-05, + "loss": 0.8565, + "step": 38205 + }, + { + "epoch": 0.04, + "learning_rate": 4.982103647966132e-05, + "loss": 1.4334, + "step": 38206 + }, + { + "epoch": 0.04, + "learning_rate": 4.982102709783019e-05, + "loss": 1.2557, + "step": 38207 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821017715754024e-05, + "loss": 1.1115, + "step": 38208 + }, + { + "epoch": 0.04, + "learning_rate": 4.982100833343285e-05, + "loss": 0.9721, + "step": 38209 + }, + { + "epoch": 0.04, + "learning_rate": 4.982099895086664e-05, + "loss": 1.1341, + "step": 38210 + }, + { + "epoch": 0.04, + "learning_rate": 4.982098956805542e-05, + "loss": 1.167, + "step": 38211 + }, + { + "epoch": 0.04, + "learning_rate": 4.982098018499917e-05, + "loss": 0.9263, + "step": 38212 + }, + { + "epoch": 0.04, + "learning_rate": 4.98209708016979e-05, + "loss": 0.9583, + "step": 38213 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820961418151614e-05, + "loss": 0.8236, + "step": 38214 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820952034360296e-05, + "loss": 1.3324, + "step": 38215 + }, + { + "epoch": 0.04, + "learning_rate": 4.982094265032396e-05, + "loss": 1.211, + "step": 38216 + }, + { + "epoch": 0.04, + "learning_rate": 4.98209332660426e-05, + "loss": 1.0093, + "step": 38217 + }, + { + "epoch": 0.04, + "learning_rate": 4.982092388151623e-05, + "loss": 1.0561, + "step": 38218 + }, + { + "epoch": 0.04, + "learning_rate": 4.982091449674482e-05, + "loss": 1.1684, + "step": 38219 + }, + { + "epoch": 0.04, + "learning_rate": 4.98209051117284e-05, + "loss": 0.8693, + "step": 38220 + }, + { + "epoch": 0.04, + "learning_rate": 4.982089572646695e-05, + "loss": 0.912, + "step": 38221 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820886340960486e-05, + "loss": 0.8524, + "step": 38222 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820876955209e-05, + "loss": 0.9464, + "step": 38223 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820867569212496e-05, + "loss": 0.6016, + "step": 38224 + }, + { + "epoch": 0.04, + "learning_rate": 4.982085818297096e-05, + "loss": 0.3749, + "step": 38225 + }, + { + "epoch": 0.04, + "learning_rate": 4.982084879648441e-05, + "loss": 0.9142, + "step": 38226 + }, + { + "epoch": 0.04, + "learning_rate": 4.982083940975284e-05, + "loss": 1.1594, + "step": 38227 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820830022776246e-05, + "loss": 1.282, + "step": 38228 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820820635554635e-05, + "loss": 0.9402, + "step": 38229 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820811248087994e-05, + "loss": 0.7548, + "step": 38230 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820801860376344e-05, + "loss": 0.6946, + "step": 38231 + }, + { + "epoch": 0.04, + "learning_rate": 4.982079247241967e-05, + "loss": 1.0818, + "step": 38232 + }, + { + "epoch": 0.04, + "learning_rate": 4.982078308421797e-05, + "loss": 1.0565, + "step": 38233 + }, + { + "epoch": 0.04, + "learning_rate": 4.982077369577126e-05, + "loss": 1.3118, + "step": 38234 + }, + { + "epoch": 0.04, + "learning_rate": 4.982076430707953e-05, + "loss": 0.9632, + "step": 38235 + }, + { + "epoch": 0.04, + "learning_rate": 4.982075491814276e-05, + "loss": 0.9824, + "step": 38236 + }, + { + "epoch": 0.04, + "learning_rate": 4.982074552896099e-05, + "loss": 1.3887, + "step": 38237 + }, + { + "epoch": 0.04, + "learning_rate": 4.98207361395342e-05, + "loss": 1.0321, + "step": 38238 + }, + { + "epoch": 0.04, + "learning_rate": 4.982072674986238e-05, + "loss": 0.9153, + "step": 38239 + }, + { + "epoch": 0.04, + "learning_rate": 4.982071735994555e-05, + "loss": 0.7762, + "step": 38240 + }, + { + "epoch": 0.04, + "learning_rate": 4.982070796978369e-05, + "loss": 1.0465, + "step": 38241 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820698579376814e-05, + "loss": 0.5702, + "step": 38242 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820689188724916e-05, + "loss": 0.4105, + "step": 38243 + }, + { + "epoch": 0.04, + "learning_rate": 4.982067979782801e-05, + "loss": 0.2697, + "step": 38244 + }, + { + "epoch": 0.04, + "learning_rate": 4.982067040668607e-05, + "loss": 0.4437, + "step": 38245 + }, + { + "epoch": 0.04, + "learning_rate": 4.982066101529912e-05, + "loss": 0.8899, + "step": 38246 + }, + { + "epoch": 0.04, + "learning_rate": 4.982065162366715e-05, + "loss": 0.9983, + "step": 38247 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820642231790156e-05, + "loss": 0.9695, + "step": 38248 + }, + { + "epoch": 0.04, + "learning_rate": 4.982063283966815e-05, + "loss": 0.6302, + "step": 38249 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820623447301116e-05, + "loss": 0.7445, + "step": 38250 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820614054689076e-05, + "loss": 0.8329, + "step": 38251 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820604661832005e-05, + "loss": 1.5827, + "step": 38252 + }, + { + "epoch": 0.04, + "learning_rate": 4.982059526872992e-05, + "loss": 2.1867, + "step": 38253 + }, + { + "epoch": 0.04, + "learning_rate": 4.982058587538282e-05, + "loss": 1.9641, + "step": 38254 + }, + { + "epoch": 0.04, + "learning_rate": 4.98205764817907e-05, + "loss": 2.2447, + "step": 38255 + }, + { + "epoch": 0.04, + "learning_rate": 4.982056708795356e-05, + "loss": 1.9846, + "step": 38256 + }, + { + "epoch": 0.04, + "learning_rate": 4.98205576938714e-05, + "loss": 1.9337, + "step": 38257 + }, + { + "epoch": 0.04, + "learning_rate": 4.982054829954422e-05, + "loss": 1.9961, + "step": 38258 + }, + { + "epoch": 0.04, + "learning_rate": 4.982053890497203e-05, + "loss": 1.9138, + "step": 38259 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820529510154826e-05, + "loss": 2.1728, + "step": 38260 + }, + { + "epoch": 0.04, + "learning_rate": 4.98205201150926e-05, + "loss": 2.021, + "step": 38261 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820510719785345e-05, + "loss": 1.9859, + "step": 38262 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820501324233083e-05, + "loss": 2.0462, + "step": 38263 + }, + { + "epoch": 0.04, + "learning_rate": 4.98204919284358e-05, + "loss": 1.9, + "step": 38264 + }, + { + "epoch": 0.04, + "learning_rate": 4.98204825323935e-05, + "loss": 2.321, + "step": 38265 + }, + { + "epoch": 0.04, + "learning_rate": 4.982047313610618e-05, + "loss": 1.9908, + "step": 38266 + }, + { + "epoch": 0.04, + "learning_rate": 4.982046373957385e-05, + "loss": 1.8927, + "step": 38267 + }, + { + "epoch": 0.04, + "learning_rate": 4.98204543427965e-05, + "loss": 1.9252, + "step": 38268 + }, + { + "epoch": 0.04, + "learning_rate": 4.982044494577413e-05, + "loss": 1.941, + "step": 38269 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820435548506746e-05, + "loss": 1.7562, + "step": 38270 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820426150994335e-05, + "loss": 1.7425, + "step": 38271 + }, + { + "epoch": 0.04, + "learning_rate": 4.982041675323692e-05, + "loss": 1.4208, + "step": 38272 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820407355234485e-05, + "loss": 1.2827, + "step": 38273 + }, + { + "epoch": 0.04, + "learning_rate": 4.982039795698703e-05, + "loss": 1.1166, + "step": 38274 + }, + { + "epoch": 0.04, + "learning_rate": 4.982038855849457e-05, + "loss": 1.1381, + "step": 38275 + }, + { + "epoch": 0.04, + "learning_rate": 4.982037915975708e-05, + "loss": 1.0243, + "step": 38276 + }, + { + "epoch": 0.04, + "learning_rate": 4.982036976077458e-05, + "loss": 0.846, + "step": 38277 + }, + { + "epoch": 0.04, + "learning_rate": 4.982036036154706e-05, + "loss": 0.8511, + "step": 38278 + }, + { + "epoch": 0.04, + "learning_rate": 4.982035096207453e-05, + "loss": 1.0444, + "step": 38279 + }, + { + "epoch": 0.04, + "learning_rate": 4.982034156235698e-05, + "loss": 1.3235, + "step": 38280 + }, + { + "epoch": 0.04, + "learning_rate": 4.982033216239441e-05, + "loss": 0.8964, + "step": 38281 + }, + { + "epoch": 0.04, + "learning_rate": 4.982032276218683e-05, + "loss": 0.8448, + "step": 38282 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820313361734226e-05, + "loss": 0.9642, + "step": 38283 + }, + { + "epoch": 0.04, + "learning_rate": 4.982030396103662e-05, + "loss": 1.1826, + "step": 38284 + }, + { + "epoch": 0.04, + "learning_rate": 4.982029456009399e-05, + "loss": 1.1928, + "step": 38285 + }, + { + "epoch": 0.04, + "learning_rate": 4.982028515890634e-05, + "loss": 1.1797, + "step": 38286 + }, + { + "epoch": 0.04, + "learning_rate": 4.982027575747369e-05, + "loss": 0.8409, + "step": 38287 + }, + { + "epoch": 0.04, + "learning_rate": 4.982026635579601e-05, + "loss": 0.9058, + "step": 38288 + }, + { + "epoch": 0.04, + "learning_rate": 4.982025695387332e-05, + "loss": 1.2135, + "step": 38289 + }, + { + "epoch": 0.04, + "learning_rate": 4.982024755170561e-05, + "loss": 1.2771, + "step": 38290 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820238149292896e-05, + "loss": 0.989, + "step": 38291 + }, + { + "epoch": 0.04, + "learning_rate": 4.982022874663515e-05, + "loss": 1.1629, + "step": 38292 + }, + { + "epoch": 0.04, + "learning_rate": 4.982021934373241e-05, + "loss": 0.6651, + "step": 38293 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820209940584645e-05, + "loss": 0.8528, + "step": 38294 + }, + { + "epoch": 0.04, + "learning_rate": 4.982020053719186e-05, + "loss": 2.1207, + "step": 38295 + }, + { + "epoch": 0.04, + "learning_rate": 4.982019113355407e-05, + "loss": 0.9249, + "step": 38296 + }, + { + "epoch": 0.04, + "learning_rate": 4.982018172967127e-05, + "loss": 0.7325, + "step": 38297 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820172325543443e-05, + "loss": 0.9436, + "step": 38298 + }, + { + "epoch": 0.04, + "learning_rate": 4.98201629211706e-05, + "loss": 1.0186, + "step": 38299 + }, + { + "epoch": 0.04, + "learning_rate": 4.982015351655275e-05, + "loss": 0.7706, + "step": 38300 + }, + { + "epoch": 0.04, + "learning_rate": 4.982014411168989e-05, + "loss": 1.0349, + "step": 38301 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820134706582004e-05, + "loss": 1.134, + "step": 38302 + }, + { + "epoch": 0.04, + "learning_rate": 4.982012530122912e-05, + "loss": 0.9498, + "step": 38303 + }, + { + "epoch": 0.04, + "learning_rate": 4.98201158956312e-05, + "loss": 1.0202, + "step": 38304 + }, + { + "epoch": 0.04, + "learning_rate": 4.982010648978829e-05, + "loss": 1.5474, + "step": 38305 + }, + { + "epoch": 0.04, + "learning_rate": 4.982009708370035e-05, + "loss": 1.2205, + "step": 38306 + }, + { + "epoch": 0.04, + "learning_rate": 4.98200876773674e-05, + "loss": 1.0119, + "step": 38307 + }, + { + "epoch": 0.04, + "learning_rate": 4.982007827078944e-05, + "loss": 1.072, + "step": 38308 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820068863966464e-05, + "loss": 0.9588, + "step": 38309 + }, + { + "epoch": 0.04, + "learning_rate": 4.982005945689848e-05, + "loss": 1.1203, + "step": 38310 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820050049585476e-05, + "loss": 0.9451, + "step": 38311 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820040642027464e-05, + "loss": 0.9936, + "step": 38312 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820031234224436e-05, + "loss": 1.0132, + "step": 38313 + }, + { + "epoch": 0.04, + "learning_rate": 4.982002182617639e-05, + "loss": 0.7905, + "step": 38314 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820012417883346e-05, + "loss": 0.7191, + "step": 38315 + }, + { + "epoch": 0.04, + "learning_rate": 4.9820003009345276e-05, + "loss": 0.6211, + "step": 38316 + }, + { + "epoch": 0.04, + "learning_rate": 4.98199936005622e-05, + "loss": 0.6226, + "step": 38317 + }, + { + "epoch": 0.04, + "learning_rate": 4.981998419153411e-05, + "loss": 0.8518, + "step": 38318 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819974782261005e-05, + "loss": 1.266, + "step": 38319 + }, + { + "epoch": 0.04, + "learning_rate": 4.981996537274289e-05, + "loss": 1.1437, + "step": 38320 + }, + { + "epoch": 0.04, + "learning_rate": 4.981995596297976e-05, + "loss": 0.8909, + "step": 38321 + }, + { + "epoch": 0.04, + "learning_rate": 4.981994655297162e-05, + "loss": 1.1259, + "step": 38322 + }, + { + "epoch": 0.04, + "learning_rate": 4.981993714271847e-05, + "loss": 1.2279, + "step": 38323 + }, + { + "epoch": 0.04, + "learning_rate": 4.981992773222031e-05, + "loss": 0.8839, + "step": 38324 + }, + { + "epoch": 0.04, + "learning_rate": 4.981991832147713e-05, + "loss": 0.9806, + "step": 38325 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819908910488945e-05, + "loss": 1.2215, + "step": 38326 + }, + { + "epoch": 0.04, + "learning_rate": 4.981989949925574e-05, + "loss": 1.2816, + "step": 38327 + }, + { + "epoch": 0.04, + "learning_rate": 4.981989008777753e-05, + "loss": 0.96, + "step": 38328 + }, + { + "epoch": 0.04, + "learning_rate": 4.981988067605431e-05, + "loss": 1.1456, + "step": 38329 + }, + { + "epoch": 0.04, + "learning_rate": 4.981987126408607e-05, + "loss": 1.0151, + "step": 38330 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819861851872826e-05, + "loss": 0.9391, + "step": 38331 + }, + { + "epoch": 0.04, + "learning_rate": 4.981985243941457e-05, + "loss": 1.1205, + "step": 38332 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819843026711304e-05, + "loss": 1.0132, + "step": 38333 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819833613763025e-05, + "loss": 1.0287, + "step": 38334 + }, + { + "epoch": 0.04, + "learning_rate": 4.981982420056974e-05, + "loss": 0.8527, + "step": 38335 + }, + { + "epoch": 0.04, + "learning_rate": 4.981981478713143e-05, + "loss": 0.894, + "step": 38336 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819805373448126e-05, + "loss": 0.823, + "step": 38337 + }, + { + "epoch": 0.04, + "learning_rate": 4.98197959595198e-05, + "loss": 0.7801, + "step": 38338 + }, + { + "epoch": 0.04, + "learning_rate": 4.981978654534647e-05, + "loss": 1.1055, + "step": 38339 + }, + { + "epoch": 0.04, + "learning_rate": 4.981977713092812e-05, + "loss": 1.0594, + "step": 38340 + }, + { + "epoch": 0.04, + "learning_rate": 4.981976771626478e-05, + "loss": 1.1607, + "step": 38341 + }, + { + "epoch": 0.04, + "learning_rate": 4.981975830135641e-05, + "loss": 0.8448, + "step": 38342 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819748886203035e-05, + "loss": 1.0283, + "step": 38343 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819739470804656e-05, + "loss": 1.1144, + "step": 38344 + }, + { + "epoch": 0.04, + "learning_rate": 4.981973005516126e-05, + "loss": 0.9208, + "step": 38345 + }, + { + "epoch": 0.04, + "learning_rate": 4.981972063927286e-05, + "loss": 0.8463, + "step": 38346 + }, + { + "epoch": 0.04, + "learning_rate": 4.981971122313944e-05, + "loss": 0.8361, + "step": 38347 + }, + { + "epoch": 0.04, + "learning_rate": 4.981970180676102e-05, + "loss": 0.6333, + "step": 38348 + }, + { + "epoch": 0.04, + "learning_rate": 4.981969239013759e-05, + "loss": 0.7243, + "step": 38349 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819682973269145e-05, + "loss": 1.3726, + "step": 38350 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819673556155693e-05, + "loss": 1.1268, + "step": 38351 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819664138797226e-05, + "loss": 0.7949, + "step": 38352 + }, + { + "epoch": 0.04, + "learning_rate": 4.981965472119376e-05, + "loss": 1.0546, + "step": 38353 + }, + { + "epoch": 0.04, + "learning_rate": 4.981964530334528e-05, + "loss": 1.2077, + "step": 38354 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819635885251794e-05, + "loss": 0.8945, + "step": 38355 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819626466913296e-05, + "loss": 0.2313, + "step": 38356 + }, + { + "epoch": 0.04, + "learning_rate": 4.981961704832979e-05, + "loss": 0.8967, + "step": 38357 + }, + { + "epoch": 0.04, + "learning_rate": 4.981960762950127e-05, + "loss": 1.8335, + "step": 38358 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819598210427746e-05, + "loss": 2.5156, + "step": 38359 + }, + { + "epoch": 0.04, + "learning_rate": 4.981958879110922e-05, + "loss": 1.4753, + "step": 38360 + }, + { + "epoch": 0.04, + "learning_rate": 4.981957937154568e-05, + "loss": 0.9679, + "step": 38361 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819569951737124e-05, + "loss": 1.5134, + "step": 38362 + }, + { + "epoch": 0.04, + "learning_rate": 4.981956053168357e-05, + "loss": 1.4071, + "step": 38363 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819551111385e-05, + "loss": 1.2637, + "step": 38364 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819541690841425e-05, + "loss": 1.3477, + "step": 38365 + }, + { + "epoch": 0.04, + "learning_rate": 4.981953227005284e-05, + "loss": 0.91, + "step": 38366 + }, + { + "epoch": 0.04, + "learning_rate": 4.981952284901925e-05, + "loss": 1.0328, + "step": 38367 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819513427740656e-05, + "loss": 0.7044, + "step": 38368 + }, + { + "epoch": 0.04, + "learning_rate": 4.981950400621705e-05, + "loss": 1.2595, + "step": 38369 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819494584448434e-05, + "loss": 0.5962, + "step": 38370 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819485162434816e-05, + "loss": 0.9166, + "step": 38371 + }, + { + "epoch": 0.04, + "learning_rate": 4.981947574017619e-05, + "loss": 0.7567, + "step": 38372 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819466317672544e-05, + "loss": 0.5856, + "step": 38373 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819456894923905e-05, + "loss": 1.0513, + "step": 38374 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819447471930256e-05, + "loss": 1.0143, + "step": 38375 + }, + { + "epoch": 0.04, + "learning_rate": 4.98194380486916e-05, + "loss": 1.0559, + "step": 38376 + }, + { + "epoch": 0.04, + "learning_rate": 4.981942862520793e-05, + "loss": 1.1043, + "step": 38377 + }, + { + "epoch": 0.04, + "learning_rate": 4.981941920147925e-05, + "loss": 0.9608, + "step": 38378 + }, + { + "epoch": 0.04, + "learning_rate": 4.981940977750558e-05, + "loss": 0.9287, + "step": 38379 + }, + { + "epoch": 0.04, + "learning_rate": 4.981940035328689e-05, + "loss": 0.5108, + "step": 38380 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819390928823206e-05, + "loss": 0.9565, + "step": 38381 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819381504114505e-05, + "loss": 1.0739, + "step": 38382 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819372079160795e-05, + "loss": 1.0763, + "step": 38383 + }, + { + "epoch": 0.04, + "learning_rate": 4.981936265396209e-05, + "loss": 1.0349, + "step": 38384 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819353228518374e-05, + "loss": 0.8668, + "step": 38385 + }, + { + "epoch": 0.04, + "learning_rate": 4.981934380282964e-05, + "loss": 0.8446, + "step": 38386 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819334376895915e-05, + "loss": 0.8885, + "step": 38387 + }, + { + "epoch": 0.04, + "learning_rate": 4.981932495071718e-05, + "loss": 1.413, + "step": 38388 + }, + { + "epoch": 0.04, + "learning_rate": 4.981931552429344e-05, + "loss": 1.0934, + "step": 38389 + }, + { + "epoch": 0.04, + "learning_rate": 4.981930609762468e-05, + "loss": 0.8893, + "step": 38390 + }, + { + "epoch": 0.04, + "learning_rate": 4.981929667071094e-05, + "loss": 0.6819, + "step": 38391 + }, + { + "epoch": 0.04, + "learning_rate": 4.981928724355218e-05, + "loss": 0.6188, + "step": 38392 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819277816148415e-05, + "loss": 0.3937, + "step": 38393 + }, + { + "epoch": 0.04, + "learning_rate": 4.981926838849964e-05, + "loss": 0.3549, + "step": 38394 + }, + { + "epoch": 0.04, + "learning_rate": 4.981925896060587e-05, + "loss": 0.5067, + "step": 38395 + }, + { + "epoch": 0.04, + "learning_rate": 4.981924953246708e-05, + "loss": 0.3479, + "step": 38396 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819240104083295e-05, + "loss": 0.4608, + "step": 38397 + }, + { + "epoch": 0.04, + "learning_rate": 4.981923067545451e-05, + "loss": 0.5041, + "step": 38398 + }, + { + "epoch": 0.04, + "learning_rate": 4.981922124658071e-05, + "loss": 0.3056, + "step": 38399 + }, + { + "epoch": 0.04, + "learning_rate": 4.981921181746191e-05, + "loss": 0.5782, + "step": 38400 + }, + { + "epoch": 0.04, + "learning_rate": 4.981920238809811e-05, + "loss": 1.2711, + "step": 38401 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819192958489294e-05, + "loss": 0.9225, + "step": 38402 + }, + { + "epoch": 0.04, + "learning_rate": 4.981918352863548e-05, + "loss": 0.8207, + "step": 38403 + }, + { + "epoch": 0.04, + "learning_rate": 4.981917409853666e-05, + "loss": 0.9274, + "step": 38404 + }, + { + "epoch": 0.04, + "learning_rate": 4.981916466819283e-05, + "loss": 0.91, + "step": 38405 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819155237604006e-05, + "loss": 0.8545, + "step": 38406 + }, + { + "epoch": 0.04, + "learning_rate": 4.981914580677017e-05, + "loss": 1.2749, + "step": 38407 + }, + { + "epoch": 0.04, + "learning_rate": 4.981913637569133e-05, + "loss": 0.7851, + "step": 38408 + }, + { + "epoch": 0.04, + "learning_rate": 4.981912694436749e-05, + "loss": 0.855, + "step": 38409 + }, + { + "epoch": 0.04, + "learning_rate": 4.981911751279865e-05, + "loss": 0.9687, + "step": 38410 + }, + { + "epoch": 0.04, + "learning_rate": 4.98191080809848e-05, + "loss": 1.0177, + "step": 38411 + }, + { + "epoch": 0.04, + "learning_rate": 4.981909864892595e-05, + "loss": 0.9417, + "step": 38412 + }, + { + "epoch": 0.04, + "learning_rate": 4.981908921662209e-05, + "loss": 0.9325, + "step": 38413 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819079784073234e-05, + "loss": 0.8303, + "step": 38414 + }, + { + "epoch": 0.04, + "learning_rate": 4.981907035127937e-05, + "loss": 0.9853, + "step": 38415 + }, + { + "epoch": 0.04, + "learning_rate": 4.98190609182405e-05, + "loss": 1.1194, + "step": 38416 + }, + { + "epoch": 0.04, + "learning_rate": 4.981905148495664e-05, + "loss": 1.301, + "step": 38417 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819042051427755e-05, + "loss": 1.0164, + "step": 38418 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819032617653883e-05, + "loss": 0.547, + "step": 38419 + }, + { + "epoch": 0.04, + "learning_rate": 4.9819023183635e-05, + "loss": 0.9329, + "step": 38420 + }, + { + "epoch": 0.04, + "learning_rate": 4.981901374937113e-05, + "loss": 0.9355, + "step": 38421 + }, + { + "epoch": 0.04, + "learning_rate": 4.981900431486224e-05, + "loss": 0.9196, + "step": 38422 + }, + { + "epoch": 0.04, + "learning_rate": 4.981899488010835e-05, + "loss": 1.1356, + "step": 38423 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818985445109455e-05, + "loss": 1.003, + "step": 38424 + }, + { + "epoch": 0.04, + "learning_rate": 4.981897600986556e-05, + "loss": 1.0198, + "step": 38425 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818966574376666e-05, + "loss": 1.3223, + "step": 38426 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818957138642774e-05, + "loss": 1.3583, + "step": 38427 + }, + { + "epoch": 0.04, + "learning_rate": 4.981894770266387e-05, + "loss": 1.812, + "step": 38428 + }, + { + "epoch": 0.04, + "learning_rate": 4.981893826643997e-05, + "loss": 1.952, + "step": 38429 + }, + { + "epoch": 0.04, + "learning_rate": 4.981892882997106e-05, + "loss": 1.8678, + "step": 38430 + }, + { + "epoch": 0.04, + "learning_rate": 4.981891939325716e-05, + "loss": 1.831, + "step": 38431 + }, + { + "epoch": 0.04, + "learning_rate": 4.981890995629825e-05, + "loss": 1.6897, + "step": 38432 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818900519094335e-05, + "loss": 1.3581, + "step": 38433 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818891081645425e-05, + "loss": 1.0568, + "step": 38434 + }, + { + "epoch": 0.04, + "learning_rate": 4.981888164395151e-05, + "loss": 0.932, + "step": 38435 + }, + { + "epoch": 0.04, + "learning_rate": 4.98188722060126e-05, + "loss": 0.6176, + "step": 38436 + }, + { + "epoch": 0.04, + "learning_rate": 4.981886276782868e-05, + "loss": 0.5352, + "step": 38437 + }, + { + "epoch": 0.04, + "learning_rate": 4.981885332939976e-05, + "loss": 0.5742, + "step": 38438 + }, + { + "epoch": 0.04, + "learning_rate": 4.981884389072584e-05, + "loss": 0.5076, + "step": 38439 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818834451806925e-05, + "loss": 0.2194, + "step": 38440 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818825012643e-05, + "loss": 2.0516, + "step": 38441 + }, + { + "epoch": 0.04, + "learning_rate": 4.981881557323408e-05, + "loss": 1.6366, + "step": 38442 + }, + { + "epoch": 0.04, + "learning_rate": 4.981880613358015e-05, + "loss": 1.2064, + "step": 38443 + }, + { + "epoch": 0.04, + "learning_rate": 4.981879669368123e-05, + "loss": 1.3509, + "step": 38444 + }, + { + "epoch": 0.04, + "learning_rate": 4.98187872535373e-05, + "loss": 0.9231, + "step": 38445 + }, + { + "epoch": 0.04, + "learning_rate": 4.981877781314838e-05, + "loss": 1.0718, + "step": 38446 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818768372514455e-05, + "loss": 1.4016, + "step": 38447 + }, + { + "epoch": 0.04, + "learning_rate": 4.981875893163553e-05, + "loss": 1.4218, + "step": 38448 + }, + { + "epoch": 0.04, + "learning_rate": 4.98187494905116e-05, + "loss": 1.2848, + "step": 38449 + }, + { + "epoch": 0.04, + "learning_rate": 4.981874004914268e-05, + "loss": 1.0948, + "step": 38450 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818730607528744e-05, + "loss": 0.9559, + "step": 38451 + }, + { + "epoch": 0.04, + "learning_rate": 4.981872116566982e-05, + "loss": 0.2871, + "step": 38452 + }, + { + "epoch": 0.04, + "learning_rate": 4.981871172356589e-05, + "loss": 0.7197, + "step": 38453 + }, + { + "epoch": 0.04, + "learning_rate": 4.981870228121697e-05, + "loss": 1.2391, + "step": 38454 + }, + { + "epoch": 0.04, + "learning_rate": 4.981869283862304e-05, + "loss": 0.7527, + "step": 38455 + }, + { + "epoch": 0.04, + "learning_rate": 4.981868339578411e-05, + "loss": 0.8071, + "step": 38456 + }, + { + "epoch": 0.04, + "learning_rate": 4.981867395270018e-05, + "loss": 0.7371, + "step": 38457 + }, + { + "epoch": 0.04, + "learning_rate": 4.981866450937126e-05, + "loss": 0.6113, + "step": 38458 + }, + { + "epoch": 0.04, + "learning_rate": 4.981865506579734e-05, + "loss": 0.3383, + "step": 38459 + }, + { + "epoch": 0.04, + "learning_rate": 4.981864562197841e-05, + "loss": 0.4462, + "step": 38460 + }, + { + "epoch": 0.04, + "learning_rate": 4.981863617791449e-05, + "loss": 0.3533, + "step": 38461 + }, + { + "epoch": 0.04, + "learning_rate": 4.981862673360557e-05, + "loss": 0.4039, + "step": 38462 + }, + { + "epoch": 0.04, + "learning_rate": 4.981861728905164e-05, + "loss": 0.3231, + "step": 38463 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818607844252724e-05, + "loss": 0.4778, + "step": 38464 + }, + { + "epoch": 0.04, + "learning_rate": 4.98185983992088e-05, + "loss": 0.3968, + "step": 38465 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818588953919884e-05, + "loss": 0.3479, + "step": 38466 + }, + { + "epoch": 0.04, + "learning_rate": 4.981857950838596e-05, + "loss": 1.0353, + "step": 38467 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818570062607054e-05, + "loss": 1.3072, + "step": 38468 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818560616583134e-05, + "loss": 1.2412, + "step": 38469 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818551170314226e-05, + "loss": 1.0858, + "step": 38470 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818541723800316e-05, + "loss": 0.9076, + "step": 38471 + }, + { + "epoch": 0.04, + "learning_rate": 4.981853227704141e-05, + "loss": 1.1609, + "step": 38472 + }, + { + "epoch": 0.04, + "learning_rate": 4.98185228300375e-05, + "loss": 0.735, + "step": 38473 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818513382788595e-05, + "loss": 0.9604, + "step": 38474 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818503935294694e-05, + "loss": 1.2164, + "step": 38475 + }, + { + "epoch": 0.04, + "learning_rate": 4.981849448755579e-05, + "loss": 0.9835, + "step": 38476 + }, + { + "epoch": 0.04, + "learning_rate": 4.981848503957189e-05, + "loss": 0.8852, + "step": 38477 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818475591343004e-05, + "loss": 1.0263, + "step": 38478 + }, + { + "epoch": 0.04, + "learning_rate": 4.98184661428691e-05, + "loss": 0.9205, + "step": 38479 + }, + { + "epoch": 0.04, + "learning_rate": 4.981845669415022e-05, + "loss": 1.021, + "step": 38480 + }, + { + "epoch": 0.04, + "learning_rate": 4.981844724518633e-05, + "loss": 1.0312, + "step": 38481 + }, + { + "epoch": 0.04, + "learning_rate": 4.981843779597744e-05, + "loss": 0.7331, + "step": 38482 + }, + { + "epoch": 0.04, + "learning_rate": 4.981842834652356e-05, + "loss": 0.9252, + "step": 38483 + }, + { + "epoch": 0.04, + "learning_rate": 4.981841889682468e-05, + "loss": 1.1737, + "step": 38484 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818409446880795e-05, + "loss": 1.1676, + "step": 38485 + }, + { + "epoch": 0.04, + "learning_rate": 4.981839999669193e-05, + "loss": 1.4161, + "step": 38486 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818390546258056e-05, + "loss": 0.8829, + "step": 38487 + }, + { + "epoch": 0.04, + "learning_rate": 4.981838109557919e-05, + "loss": 1.1827, + "step": 38488 + }, + { + "epoch": 0.04, + "learning_rate": 4.981837164465533e-05, + "loss": 0.9142, + "step": 38489 + }, + { + "epoch": 0.04, + "learning_rate": 4.981836219348647e-05, + "loss": 0.7533, + "step": 38490 + }, + { + "epoch": 0.04, + "learning_rate": 4.981835274207261e-05, + "loss": 0.8827, + "step": 38491 + }, + { + "epoch": 0.04, + "learning_rate": 4.981834329041376e-05, + "loss": 0.9854, + "step": 38492 + }, + { + "epoch": 0.04, + "learning_rate": 4.981833383850991e-05, + "loss": 1.1419, + "step": 38493 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818324386361064e-05, + "loss": 0.8786, + "step": 38494 + }, + { + "epoch": 0.04, + "learning_rate": 4.981831493396723e-05, + "loss": 0.1386, + "step": 38495 + }, + { + "epoch": 0.04, + "learning_rate": 4.981830548132839e-05, + "loss": 0.7674, + "step": 38496 + }, + { + "epoch": 0.04, + "learning_rate": 4.981829602844456e-05, + "loss": 1.2672, + "step": 38497 + }, + { + "epoch": 0.04, + "learning_rate": 4.981828657531573e-05, + "loss": 1.1475, + "step": 38498 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818277121941906e-05, + "loss": 0.9492, + "step": 38499 + }, + { + "epoch": 0.04, + "learning_rate": 4.981826766832308e-05, + "loss": 0.8442, + "step": 38500 + }, + { + "epoch": 0.04, + "eval_loss": 1.11681067943573, + "eval_runtime": 126.5635, + "eval_samples_per_second": 10.943, + "eval_steps_per_second": 5.476, + "step": 38500 + }, + { + "epoch": 0.04, + "learning_rate": 4.981825821445927e-05, + "loss": 0.9903, + "step": 38501 + }, + { + "epoch": 0.04, + "learning_rate": 4.981824876035045e-05, + "loss": 0.8488, + "step": 38502 + }, + { + "epoch": 0.04, + "learning_rate": 4.981823930599665e-05, + "loss": 0.8859, + "step": 38503 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818229851397846e-05, + "loss": 0.9386, + "step": 38504 + }, + { + "epoch": 0.04, + "learning_rate": 4.981822039655405e-05, + "loss": 0.8622, + "step": 38505 + }, + { + "epoch": 0.04, + "learning_rate": 4.981821094146526e-05, + "loss": 0.9196, + "step": 38506 + }, + { + "epoch": 0.04, + "learning_rate": 4.981820148613147e-05, + "loss": 1.1095, + "step": 38507 + }, + { + "epoch": 0.04, + "learning_rate": 4.981819203055269e-05, + "loss": 1.112, + "step": 38508 + }, + { + "epoch": 0.04, + "learning_rate": 4.981818257472891e-05, + "loss": 1.1259, + "step": 38509 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818173118660144e-05, + "loss": 0.9027, + "step": 38510 + }, + { + "epoch": 0.04, + "learning_rate": 4.981816366234638e-05, + "loss": 0.904, + "step": 38511 + }, + { + "epoch": 0.04, + "learning_rate": 4.981815420578762e-05, + "loss": 0.949, + "step": 38512 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818144748983866e-05, + "loss": 0.4521, + "step": 38513 + }, + { + "epoch": 0.04, + "learning_rate": 4.981813529193512e-05, + "loss": 0.8411, + "step": 38514 + }, + { + "epoch": 0.04, + "learning_rate": 4.981812583464137e-05, + "loss": 1.2238, + "step": 38515 + }, + { + "epoch": 0.04, + "learning_rate": 4.981811637710264e-05, + "loss": 1.275, + "step": 38516 + }, + { + "epoch": 0.04, + "learning_rate": 4.981810691931891e-05, + "loss": 0.9603, + "step": 38517 + }, + { + "epoch": 0.04, + "learning_rate": 4.981809746129018e-05, + "loss": 0.9464, + "step": 38518 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818088003016464e-05, + "loss": 0.9716, + "step": 38519 + }, + { + "epoch": 0.04, + "learning_rate": 4.981807854449775e-05, + "loss": 0.9918, + "step": 38520 + }, + { + "epoch": 0.04, + "learning_rate": 4.981806908573405e-05, + "loss": 1.133, + "step": 38521 + }, + { + "epoch": 0.04, + "learning_rate": 4.981805962672535e-05, + "loss": 1.1406, + "step": 38522 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818050167471654e-05, + "loss": 1.0353, + "step": 38523 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818040707972966e-05, + "loss": 0.8461, + "step": 38524 + }, + { + "epoch": 0.04, + "learning_rate": 4.981803124822929e-05, + "loss": 0.988, + "step": 38525 + }, + { + "epoch": 0.04, + "learning_rate": 4.981802178824062e-05, + "loss": 1.2159, + "step": 38526 + }, + { + "epoch": 0.04, + "learning_rate": 4.981801232800696e-05, + "loss": 0.7613, + "step": 38527 + }, + { + "epoch": 0.04, + "learning_rate": 4.9818002867528294e-05, + "loss": 1.032, + "step": 38528 + }, + { + "epoch": 0.04, + "learning_rate": 4.981799340680464e-05, + "loss": 1.0049, + "step": 38529 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817983945836e-05, + "loss": 0.8946, + "step": 38530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817974484622364e-05, + "loss": 1.1809, + "step": 38531 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817965023163736e-05, + "loss": 1.0515, + "step": 38532 + }, + { + "epoch": 0.04, + "learning_rate": 4.981795556146012e-05, + "loss": 1.271, + "step": 38533 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817946099511506e-05, + "loss": 1.1589, + "step": 38534 + }, + { + "epoch": 0.04, + "learning_rate": 4.98179366373179e-05, + "loss": 1.2779, + "step": 38535 + }, + { + "epoch": 0.04, + "learning_rate": 4.98179271748793e-05, + "loss": 1.2558, + "step": 38536 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817917712195706e-05, + "loss": 1.2964, + "step": 38537 + }, + { + "epoch": 0.04, + "learning_rate": 4.981790824926712e-05, + "loss": 1.4458, + "step": 38538 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817898786093545e-05, + "loss": 1.1785, + "step": 38539 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817889322674985e-05, + "loss": 1.158, + "step": 38540 + }, + { + "epoch": 0.04, + "learning_rate": 4.981787985901142e-05, + "loss": 0.8979, + "step": 38541 + }, + { + "epoch": 0.04, + "learning_rate": 4.981787039510287e-05, + "loss": 0.6473, + "step": 38542 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817860930949335e-05, + "loss": 0.7189, + "step": 38543 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817851466550805e-05, + "loss": 1.0136, + "step": 38544 + }, + { + "epoch": 0.04, + "learning_rate": 4.981784200190728e-05, + "loss": 1.2871, + "step": 38545 + }, + { + "epoch": 0.04, + "learning_rate": 4.981783253701876e-05, + "loss": 1.3234, + "step": 38546 + }, + { + "epoch": 0.04, + "learning_rate": 4.981782307188525e-05, + "loss": 0.9848, + "step": 38547 + }, + { + "epoch": 0.04, + "learning_rate": 4.981781360650676e-05, + "loss": 0.6009, + "step": 38548 + }, + { + "epoch": 0.04, + "learning_rate": 4.981780414088327e-05, + "loss": 0.6753, + "step": 38549 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817794675014785e-05, + "loss": 0.3237, + "step": 38550 + }, + { + "epoch": 0.04, + "learning_rate": 4.981778520890131e-05, + "loss": 0.9412, + "step": 38551 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817775742542855e-05, + "loss": 0.7891, + "step": 38552 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817766275939403e-05, + "loss": 0.8832, + "step": 38553 + }, + { + "epoch": 0.04, + "learning_rate": 4.981775680909096e-05, + "loss": 1.1064, + "step": 38554 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817747341997526e-05, + "loss": 0.8338, + "step": 38555 + }, + { + "epoch": 0.04, + "learning_rate": 4.981773787465911e-05, + "loss": 0.8247, + "step": 38556 + }, + { + "epoch": 0.04, + "learning_rate": 4.981772840707569e-05, + "loss": 1.0508, + "step": 38557 + }, + { + "epoch": 0.04, + "learning_rate": 4.981771893924729e-05, + "loss": 1.2707, + "step": 38558 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817709471173895e-05, + "loss": 1.1821, + "step": 38559 + }, + { + "epoch": 0.04, + "learning_rate": 4.981770000285551e-05, + "loss": 1.0856, + "step": 38560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817690534292135e-05, + "loss": 0.5991, + "step": 38561 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817681065483775e-05, + "loss": 1.1235, + "step": 38562 + }, + { + "epoch": 0.04, + "learning_rate": 4.981767159643042e-05, + "loss": 1.1102, + "step": 38563 + }, + { + "epoch": 0.04, + "learning_rate": 4.981766212713208e-05, + "loss": 1.156, + "step": 38564 + }, + { + "epoch": 0.04, + "learning_rate": 4.981765265758875e-05, + "loss": 0.9242, + "step": 38565 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817643187800425e-05, + "loss": 0.2639, + "step": 38566 + }, + { + "epoch": 0.04, + "learning_rate": 4.981763371776711e-05, + "loss": 0.7855, + "step": 38567 + }, + { + "epoch": 0.04, + "learning_rate": 4.981762424748881e-05, + "loss": 0.8915, + "step": 38568 + }, + { + "epoch": 0.04, + "learning_rate": 4.981761477696553e-05, + "loss": 0.9768, + "step": 38569 + }, + { + "epoch": 0.04, + "learning_rate": 4.981760530619724e-05, + "loss": 0.6877, + "step": 38570 + }, + { + "epoch": 0.04, + "learning_rate": 4.981759583518398e-05, + "loss": 1.0143, + "step": 38571 + }, + { + "epoch": 0.04, + "learning_rate": 4.981758636392573e-05, + "loss": 0.8901, + "step": 38572 + }, + { + "epoch": 0.04, + "learning_rate": 4.981757689242248e-05, + "loss": 1.1905, + "step": 38573 + }, + { + "epoch": 0.04, + "learning_rate": 4.981756742067425e-05, + "loss": 0.7455, + "step": 38574 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817557948681026e-05, + "loss": 1.2296, + "step": 38575 + }, + { + "epoch": 0.04, + "learning_rate": 4.981754847644282e-05, + "loss": 1.1804, + "step": 38576 + }, + { + "epoch": 0.04, + "learning_rate": 4.981753900395961e-05, + "loss": 0.914, + "step": 38577 + }, + { + "epoch": 0.04, + "learning_rate": 4.981752953123143e-05, + "loss": 0.9731, + "step": 38578 + }, + { + "epoch": 0.04, + "learning_rate": 4.981752005825825e-05, + "loss": 0.927, + "step": 38579 + }, + { + "epoch": 0.04, + "learning_rate": 4.981751058504009e-05, + "loss": 0.92, + "step": 38580 + }, + { + "epoch": 0.04, + "learning_rate": 4.981750111157694e-05, + "loss": 1.158, + "step": 38581 + }, + { + "epoch": 0.04, + "learning_rate": 4.98174916378688e-05, + "loss": 1.0457, + "step": 38582 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817482163915674e-05, + "loss": 1.0584, + "step": 38583 + }, + { + "epoch": 0.04, + "learning_rate": 4.981747268971756e-05, + "loss": 0.9534, + "step": 38584 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817463215274454e-05, + "loss": 1.0026, + "step": 38585 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817453740586364e-05, + "loss": 1.1582, + "step": 38586 + }, + { + "epoch": 0.04, + "learning_rate": 4.981744426565329e-05, + "loss": 0.9715, + "step": 38587 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817434790475224e-05, + "loss": 1.1092, + "step": 38588 + }, + { + "epoch": 0.04, + "learning_rate": 4.981742531505217e-05, + "loss": 1.1128, + "step": 38589 + }, + { + "epoch": 0.04, + "learning_rate": 4.981741583938413e-05, + "loss": 1.1797, + "step": 38590 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817406363471106e-05, + "loss": 0.8735, + "step": 38591 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817396887313095e-05, + "loss": 0.9589, + "step": 38592 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817387410910095e-05, + "loss": 0.9594, + "step": 38593 + }, + { + "epoch": 0.04, + "learning_rate": 4.981737793426211e-05, + "loss": 0.7385, + "step": 38594 + }, + { + "epoch": 0.04, + "learning_rate": 4.981736845736913e-05, + "loss": 0.7655, + "step": 38595 + }, + { + "epoch": 0.04, + "learning_rate": 4.981735898023117e-05, + "loss": 0.952, + "step": 38596 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817349502848227e-05, + "loss": 0.8738, + "step": 38597 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817340025220295e-05, + "loss": 0.7058, + "step": 38598 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817330547347375e-05, + "loss": 1.1552, + "step": 38599 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817321069229465e-05, + "loss": 0.5237, + "step": 38600 + }, + { + "epoch": 0.04, + "learning_rate": 4.981731159086658e-05, + "loss": 0.7605, + "step": 38601 + }, + { + "epoch": 0.04, + "learning_rate": 4.981730211225869e-05, + "loss": 1.1657, + "step": 38602 + }, + { + "epoch": 0.04, + "learning_rate": 4.981729263340583e-05, + "loss": 1.2891, + "step": 38603 + }, + { + "epoch": 0.04, + "learning_rate": 4.981728315430798e-05, + "loss": 1.045, + "step": 38604 + }, + { + "epoch": 0.04, + "learning_rate": 4.981727367496515e-05, + "loss": 1.187, + "step": 38605 + }, + { + "epoch": 0.04, + "learning_rate": 4.981726419537733e-05, + "loss": 0.927, + "step": 38606 + }, + { + "epoch": 0.04, + "learning_rate": 4.981725471554452e-05, + "loss": 0.8706, + "step": 38607 + }, + { + "epoch": 0.04, + "learning_rate": 4.981724523546673e-05, + "loss": 1.1098, + "step": 38608 + }, + { + "epoch": 0.04, + "learning_rate": 4.981723575514395e-05, + "loss": 1.4504, + "step": 38609 + }, + { + "epoch": 0.04, + "learning_rate": 4.981722627457619e-05, + "loss": 1.2554, + "step": 38610 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817216793763443e-05, + "loss": 1.1427, + "step": 38611 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817207312705706e-05, + "loss": 0.7877, + "step": 38612 + }, + { + "epoch": 0.04, + "learning_rate": 4.981719783140299e-05, + "loss": 0.8211, + "step": 38613 + }, + { + "epoch": 0.04, + "learning_rate": 4.981718834985528e-05, + "loss": 1.1989, + "step": 38614 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817178868062595e-05, + "loss": 1.1931, + "step": 38615 + }, + { + "epoch": 0.04, + "learning_rate": 4.981716938602493e-05, + "loss": 1.3992, + "step": 38616 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817159903742266e-05, + "loss": 1.0418, + "step": 38617 + }, + { + "epoch": 0.04, + "learning_rate": 4.981715042121463e-05, + "loss": 1.0445, + "step": 38618 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817140938441996e-05, + "loss": 1.2045, + "step": 38619 + }, + { + "epoch": 0.04, + "learning_rate": 4.981713145542439e-05, + "loss": 1.4177, + "step": 38620 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817121972161796e-05, + "loss": 1.0583, + "step": 38621 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817112488654216e-05, + "loss": 1.0966, + "step": 38622 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817103004901654e-05, + "loss": 0.8859, + "step": 38623 + }, + { + "epoch": 0.04, + "learning_rate": 4.98170935209041e-05, + "loss": 0.3799, + "step": 38624 + }, + { + "epoch": 0.04, + "learning_rate": 4.9817084036661575e-05, + "loss": 0.5734, + "step": 38625 + }, + { + "epoch": 0.04, + "learning_rate": 4.981707455217406e-05, + "loss": 0.9611, + "step": 38626 + }, + { + "epoch": 0.04, + "learning_rate": 4.981706506744156e-05, + "loss": 0.8345, + "step": 38627 + }, + { + "epoch": 0.04, + "learning_rate": 4.981705558246408e-05, + "loss": 0.8553, + "step": 38628 + }, + { + "epoch": 0.04, + "learning_rate": 4.981704609724162e-05, + "loss": 0.9553, + "step": 38629 + }, + { + "epoch": 0.04, + "learning_rate": 4.981703661177416e-05, + "loss": 0.8642, + "step": 38630 + }, + { + "epoch": 0.04, + "learning_rate": 4.981702712606173e-05, + "loss": 1.0516, + "step": 38631 + }, + { + "epoch": 0.04, + "learning_rate": 4.981701764010431e-05, + "loss": 1.0887, + "step": 38632 + }, + { + "epoch": 0.04, + "learning_rate": 4.981700815390192e-05, + "loss": 0.9784, + "step": 38633 + }, + { + "epoch": 0.04, + "learning_rate": 4.981699866745453e-05, + "loss": 0.6115, + "step": 38634 + }, + { + "epoch": 0.04, + "learning_rate": 4.981698918076217e-05, + "loss": 1.0661, + "step": 38635 + }, + { + "epoch": 0.04, + "learning_rate": 4.981697969382482e-05, + "loss": 1.2892, + "step": 38636 + }, + { + "epoch": 0.04, + "learning_rate": 4.981697020664249e-05, + "loss": 1.1018, + "step": 38637 + }, + { + "epoch": 0.04, + "learning_rate": 4.981696071921518e-05, + "loss": 1.0108, + "step": 38638 + }, + { + "epoch": 0.04, + "learning_rate": 4.981695123154289e-05, + "loss": 1.091, + "step": 38639 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816941743625606e-05, + "loss": 1.3134, + "step": 38640 + }, + { + "epoch": 0.04, + "learning_rate": 4.981693225546334e-05, + "loss": 1.3431, + "step": 38641 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816922767056104e-05, + "loss": 0.6938, + "step": 38642 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816913278403876e-05, + "loss": 1.5029, + "step": 38643 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816903789506666e-05, + "loss": 1.2726, + "step": 38644 + }, + { + "epoch": 0.04, + "learning_rate": 4.981689430036448e-05, + "loss": 1.2996, + "step": 38645 + }, + { + "epoch": 0.04, + "learning_rate": 4.981688481097731e-05, + "loss": 0.845, + "step": 38646 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816875321345156e-05, + "loss": 0.9617, + "step": 38647 + }, + { + "epoch": 0.04, + "learning_rate": 4.981686583146802e-05, + "loss": 0.9718, + "step": 38648 + }, + { + "epoch": 0.04, + "learning_rate": 4.981685634134591e-05, + "loss": 0.8916, + "step": 38649 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816846850978806e-05, + "loss": 0.9379, + "step": 38650 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816837360366734e-05, + "loss": 1.0016, + "step": 38651 + }, + { + "epoch": 0.04, + "learning_rate": 4.981682786950967e-05, + "loss": 1.144, + "step": 38652 + }, + { + "epoch": 0.04, + "learning_rate": 4.981681837840763e-05, + "loss": 0.9776, + "step": 38653 + }, + { + "epoch": 0.04, + "learning_rate": 4.981680888706061e-05, + "loss": 1.3057, + "step": 38654 + }, + { + "epoch": 0.04, + "learning_rate": 4.98167993954686e-05, + "loss": 1.6263, + "step": 38655 + }, + { + "epoch": 0.04, + "learning_rate": 4.981678990363161e-05, + "loss": 1.2968, + "step": 38656 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816780411549645e-05, + "loss": 0.8111, + "step": 38657 + }, + { + "epoch": 0.04, + "learning_rate": 4.98167709192227e-05, + "loss": 1.0673, + "step": 38658 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816761426650774e-05, + "loss": 0.8924, + "step": 38659 + }, + { + "epoch": 0.04, + "learning_rate": 4.981675193383387e-05, + "loss": 0.7548, + "step": 38660 + }, + { + "epoch": 0.04, + "learning_rate": 4.981674244077198e-05, + "loss": 0.8485, + "step": 38661 + }, + { + "epoch": 0.04, + "learning_rate": 4.98167329474651e-05, + "loss": 0.8163, + "step": 38662 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816723453913264e-05, + "loss": 0.8431, + "step": 38663 + }, + { + "epoch": 0.04, + "learning_rate": 4.981671396011643e-05, + "loss": 0.9805, + "step": 38664 + }, + { + "epoch": 0.04, + "learning_rate": 4.981670446607461e-05, + "loss": 1.0454, + "step": 38665 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816694971787826e-05, + "loss": 1.1272, + "step": 38666 + }, + { + "epoch": 0.04, + "learning_rate": 4.981668547725606e-05, + "loss": 0.9254, + "step": 38667 + }, + { + "epoch": 0.04, + "learning_rate": 4.981667598247931e-05, + "loss": 0.8338, + "step": 38668 + }, + { + "epoch": 0.04, + "learning_rate": 4.981666648745758e-05, + "loss": 0.7451, + "step": 38669 + }, + { + "epoch": 0.04, + "learning_rate": 4.981665699219087e-05, + "loss": 0.8548, + "step": 38670 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816647496679184e-05, + "loss": 0.9996, + "step": 38671 + }, + { + "epoch": 0.04, + "learning_rate": 4.981663800092251e-05, + "loss": 0.9367, + "step": 38672 + }, + { + "epoch": 0.04, + "learning_rate": 4.981662850492086e-05, + "loss": 0.9548, + "step": 38673 + }, + { + "epoch": 0.04, + "learning_rate": 4.981661900867424e-05, + "loss": 1.189, + "step": 38674 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816609512182634e-05, + "loss": 0.9223, + "step": 38675 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816600015446046e-05, + "loss": 1.0735, + "step": 38676 + }, + { + "epoch": 0.04, + "learning_rate": 4.981659051846448e-05, + "loss": 1.1271, + "step": 38677 + }, + { + "epoch": 0.04, + "learning_rate": 4.981658102123794e-05, + "loss": 0.9198, + "step": 38678 + }, + { + "epoch": 0.04, + "learning_rate": 4.981657152376642e-05, + "loss": 0.8597, + "step": 38679 + }, + { + "epoch": 0.04, + "learning_rate": 4.981656202604992e-05, + "loss": 1.1156, + "step": 38680 + }, + { + "epoch": 0.04, + "learning_rate": 4.981655252808844e-05, + "loss": 0.9388, + "step": 38681 + }, + { + "epoch": 0.04, + "learning_rate": 4.981654302988198e-05, + "loss": 1.2234, + "step": 38682 + }, + { + "epoch": 0.04, + "learning_rate": 4.981653353143054e-05, + "loss": 1.01, + "step": 38683 + }, + { + "epoch": 0.04, + "learning_rate": 4.981652403273413e-05, + "loss": 1.1347, + "step": 38684 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816514533792735e-05, + "loss": 1.2967, + "step": 38685 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816505034606365e-05, + "loss": 0.9802, + "step": 38686 + }, + { + "epoch": 0.04, + "learning_rate": 4.981649553517501e-05, + "loss": 1.0123, + "step": 38687 + }, + { + "epoch": 0.04, + "learning_rate": 4.981648603549869e-05, + "loss": 1.065, + "step": 38688 + }, + { + "epoch": 0.04, + "learning_rate": 4.981647653557738e-05, + "loss": 1.0703, + "step": 38689 + }, + { + "epoch": 0.04, + "learning_rate": 4.98164670354111e-05, + "loss": 1.0233, + "step": 38690 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816457534999835e-05, + "loss": 0.9699, + "step": 38691 + }, + { + "epoch": 0.04, + "learning_rate": 4.98164480343436e-05, + "loss": 0.9528, + "step": 38692 + }, + { + "epoch": 0.04, + "learning_rate": 4.981643853344239e-05, + "loss": 1.1872, + "step": 38693 + }, + { + "epoch": 0.04, + "learning_rate": 4.98164290322962e-05, + "loss": 1.1629, + "step": 38694 + }, + { + "epoch": 0.04, + "learning_rate": 4.981641953090502e-05, + "loss": 0.9527, + "step": 38695 + }, + { + "epoch": 0.04, + "learning_rate": 4.981641002926888e-05, + "loss": 1.1046, + "step": 38696 + }, + { + "epoch": 0.04, + "learning_rate": 4.981640052738775e-05, + "loss": 1.2397, + "step": 38697 + }, + { + "epoch": 0.04, + "learning_rate": 4.981639102526164e-05, + "loss": 0.8335, + "step": 38698 + }, + { + "epoch": 0.04, + "learning_rate": 4.981638152289056e-05, + "loss": 0.9702, + "step": 38699 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816372020274516e-05, + "loss": 1.1826, + "step": 38700 + }, + { + "epoch": 0.04, + "learning_rate": 4.981636251741349e-05, + "loss": 1.0416, + "step": 38701 + }, + { + "epoch": 0.04, + "learning_rate": 4.981635301430747e-05, + "loss": 1.1855, + "step": 38702 + }, + { + "epoch": 0.04, + "learning_rate": 4.981634351095649e-05, + "loss": 1.005, + "step": 38703 + }, + { + "epoch": 0.04, + "learning_rate": 4.981633400736052e-05, + "loss": 1.1964, + "step": 38704 + }, + { + "epoch": 0.04, + "learning_rate": 4.981632450351959e-05, + "loss": 1.2172, + "step": 38705 + }, + { + "epoch": 0.04, + "learning_rate": 4.981631499943367e-05, + "loss": 1.2207, + "step": 38706 + }, + { + "epoch": 0.04, + "learning_rate": 4.981630549510278e-05, + "loss": 0.9199, + "step": 38707 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816295990526914e-05, + "loss": 1.0676, + "step": 38708 + }, + { + "epoch": 0.04, + "learning_rate": 4.981628648570607e-05, + "loss": 1.0762, + "step": 38709 + }, + { + "epoch": 0.04, + "learning_rate": 4.981627698064025e-05, + "loss": 1.2543, + "step": 38710 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816267475329454e-05, + "loss": 1.1425, + "step": 38711 + }, + { + "epoch": 0.04, + "learning_rate": 4.981625796977368e-05, + "loss": 1.0488, + "step": 38712 + }, + { + "epoch": 0.04, + "learning_rate": 4.981624846397294e-05, + "loss": 0.2537, + "step": 38713 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816238957927215e-05, + "loss": 0.8525, + "step": 38714 + }, + { + "epoch": 0.04, + "learning_rate": 4.981622945163652e-05, + "loss": 0.856, + "step": 38715 + }, + { + "epoch": 0.04, + "learning_rate": 4.981621994510085e-05, + "loss": 0.9352, + "step": 38716 + }, + { + "epoch": 0.04, + "learning_rate": 4.98162104383202e-05, + "loss": 1.1838, + "step": 38717 + }, + { + "epoch": 0.04, + "learning_rate": 4.981620093129458e-05, + "loss": 0.8104, + "step": 38718 + }, + { + "epoch": 0.04, + "learning_rate": 4.981619142402397e-05, + "loss": 0.9906, + "step": 38719 + }, + { + "epoch": 0.04, + "learning_rate": 4.98161819165084e-05, + "loss": 1.2507, + "step": 38720 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816172408747854e-05, + "loss": 0.8479, + "step": 38721 + }, + { + "epoch": 0.04, + "learning_rate": 4.981616290074233e-05, + "loss": 1.166, + "step": 38722 + }, + { + "epoch": 0.04, + "learning_rate": 4.981615339249183e-05, + "loss": 0.5848, + "step": 38723 + }, + { + "epoch": 0.04, + "learning_rate": 4.981614388399636e-05, + "loss": 0.9485, + "step": 38724 + }, + { + "epoch": 0.04, + "learning_rate": 4.981613437525591e-05, + "loss": 0.9201, + "step": 38725 + }, + { + "epoch": 0.04, + "learning_rate": 4.981612486627049e-05, + "loss": 0.9666, + "step": 38726 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816115357040095e-05, + "loss": 0.9216, + "step": 38727 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816105847564734e-05, + "loss": 0.9109, + "step": 38728 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816096337844385e-05, + "loss": 0.7993, + "step": 38729 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816086827879066e-05, + "loss": 0.6785, + "step": 38730 + }, + { + "epoch": 0.04, + "learning_rate": 4.981607731766877e-05, + "loss": 0.9905, + "step": 38731 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816067807213504e-05, + "loss": 1.0374, + "step": 38732 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816058296513266e-05, + "loss": 0.9712, + "step": 38733 + }, + { + "epoch": 0.04, + "learning_rate": 4.981604878556806e-05, + "loss": 1.1934, + "step": 38734 + }, + { + "epoch": 0.04, + "learning_rate": 4.981603927437787e-05, + "loss": 1.2141, + "step": 38735 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816029762942706e-05, + "loss": 1.064, + "step": 38736 + }, + { + "epoch": 0.04, + "learning_rate": 4.981602025126257e-05, + "loss": 0.9321, + "step": 38737 + }, + { + "epoch": 0.04, + "learning_rate": 4.981601073933747e-05, + "loss": 1.0492, + "step": 38738 + }, + { + "epoch": 0.04, + "learning_rate": 4.981600122716739e-05, + "loss": 1.1333, + "step": 38739 + }, + { + "epoch": 0.04, + "learning_rate": 4.981599171475233e-05, + "loss": 1.2262, + "step": 38740 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815982202092306e-05, + "loss": 1.0315, + "step": 38741 + }, + { + "epoch": 0.04, + "learning_rate": 4.981597268918731e-05, + "loss": 1.1986, + "step": 38742 + }, + { + "epoch": 0.04, + "learning_rate": 4.981596317603734e-05, + "loss": 1.296, + "step": 38743 + }, + { + "epoch": 0.04, + "learning_rate": 4.981595366264239e-05, + "loss": 1.1183, + "step": 38744 + }, + { + "epoch": 0.04, + "learning_rate": 4.981594414900247e-05, + "loss": 0.7375, + "step": 38745 + }, + { + "epoch": 0.04, + "learning_rate": 4.981593463511758e-05, + "loss": 0.8656, + "step": 38746 + }, + { + "epoch": 0.04, + "learning_rate": 4.981592512098772e-05, + "loss": 1.007, + "step": 38747 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815915606612884e-05, + "loss": 1.0768, + "step": 38748 + }, + { + "epoch": 0.04, + "learning_rate": 4.981590609199308e-05, + "loss": 0.8513, + "step": 38749 + }, + { + "epoch": 0.04, + "learning_rate": 4.98158965771283e-05, + "loss": 0.975, + "step": 38750 + }, + { + "epoch": 0.04, + "learning_rate": 4.981588706201855e-05, + "loss": 0.9673, + "step": 38751 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815877546663825e-05, + "loss": 0.9905, + "step": 38752 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815868031064125e-05, + "loss": 1.0837, + "step": 38753 + }, + { + "epoch": 0.04, + "learning_rate": 4.981585851521946e-05, + "loss": 0.8079, + "step": 38754 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815848999129825e-05, + "loss": 0.2223, + "step": 38755 + }, + { + "epoch": 0.04, + "learning_rate": 4.981583948279521e-05, + "loss": 0.3054, + "step": 38756 + }, + { + "epoch": 0.04, + "learning_rate": 4.981582996621563e-05, + "loss": 0.5691, + "step": 38757 + }, + { + "epoch": 0.04, + "learning_rate": 4.981582044939108e-05, + "loss": 1.0323, + "step": 38758 + }, + { + "epoch": 0.04, + "learning_rate": 4.981581093232156e-05, + "loss": 0.9665, + "step": 38759 + }, + { + "epoch": 0.04, + "learning_rate": 4.981580141500706e-05, + "loss": 0.8187, + "step": 38760 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815791897447596e-05, + "loss": 1.0539, + "step": 38761 + }, + { + "epoch": 0.04, + "learning_rate": 4.981578237964316e-05, + "loss": 0.9874, + "step": 38762 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815772861593744e-05, + "loss": 1.1652, + "step": 38763 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815763343299374e-05, + "loss": 1.4567, + "step": 38764 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815753824760016e-05, + "loss": 1.8322, + "step": 38765 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815744305975695e-05, + "loss": 1.1357, + "step": 38766 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815734786946406e-05, + "loss": 1.0296, + "step": 38767 + }, + { + "epoch": 0.04, + "learning_rate": 4.981572526767214e-05, + "loss": 0.9136, + "step": 38768 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815715748152915e-05, + "loss": 1.0399, + "step": 38769 + }, + { + "epoch": 0.04, + "learning_rate": 4.98157062283887e-05, + "loss": 0.8444, + "step": 38770 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815696708379536e-05, + "loss": 0.9544, + "step": 38771 + }, + { + "epoch": 0.04, + "learning_rate": 4.981568718812539e-05, + "loss": 0.8772, + "step": 38772 + }, + { + "epoch": 0.04, + "learning_rate": 4.981567766762628e-05, + "loss": 0.8287, + "step": 38773 + }, + { + "epoch": 0.04, + "learning_rate": 4.981566814688219e-05, + "loss": 1.0904, + "step": 38774 + }, + { + "epoch": 0.04, + "learning_rate": 4.981565862589314e-05, + "loss": 0.9208, + "step": 38775 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815649104659125e-05, + "loss": 0.6437, + "step": 38776 + }, + { + "epoch": 0.04, + "learning_rate": 4.981563958318013e-05, + "loss": 0.8286, + "step": 38777 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815630061456164e-05, + "loss": 0.9481, + "step": 38778 + }, + { + "epoch": 0.04, + "learning_rate": 4.981562053948724e-05, + "loss": 1.0182, + "step": 38779 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815611017273334e-05, + "loss": 1.1951, + "step": 38780 + }, + { + "epoch": 0.04, + "learning_rate": 4.981560149481447e-05, + "loss": 1.1186, + "step": 38781 + }, + { + "epoch": 0.04, + "learning_rate": 4.981559197211062e-05, + "loss": 1.2194, + "step": 38782 + }, + { + "epoch": 0.04, + "learning_rate": 4.981558244916182e-05, + "loss": 0.5216, + "step": 38783 + }, + { + "epoch": 0.04, + "learning_rate": 4.981557292596804e-05, + "loss": 0.9674, + "step": 38784 + }, + { + "epoch": 0.04, + "learning_rate": 4.98155634025293e-05, + "loss": 0.8322, + "step": 38785 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815553878845576e-05, + "loss": 0.7888, + "step": 38786 + }, + { + "epoch": 0.04, + "learning_rate": 4.98155443549169e-05, + "loss": 0.5348, + "step": 38787 + }, + { + "epoch": 0.04, + "learning_rate": 4.981553483074324e-05, + "loss": 0.4246, + "step": 38788 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815525306324627e-05, + "loss": 0.6317, + "step": 38789 + }, + { + "epoch": 0.04, + "learning_rate": 4.981551578166104e-05, + "loss": 0.9471, + "step": 38790 + }, + { + "epoch": 0.04, + "learning_rate": 4.981550625675248e-05, + "loss": 0.6664, + "step": 38791 + }, + { + "epoch": 0.04, + "learning_rate": 4.981549673159895e-05, + "loss": 1.1274, + "step": 38792 + }, + { + "epoch": 0.04, + "learning_rate": 4.981548720620046e-05, + "loss": 1.117, + "step": 38793 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815477680557e-05, + "loss": 1.0252, + "step": 38794 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815468154668565e-05, + "loss": 0.9435, + "step": 38795 + }, + { + "epoch": 0.04, + "learning_rate": 4.981545862853517e-05, + "loss": 1.0388, + "step": 38796 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815449102156807e-05, + "loss": 1.1585, + "step": 38797 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815439575533474e-05, + "loss": 1.1186, + "step": 38798 + }, + { + "epoch": 0.04, + "learning_rate": 4.981543004866517e-05, + "loss": 1.19, + "step": 38799 + }, + { + "epoch": 0.04, + "learning_rate": 4.98154205215519e-05, + "loss": 0.6933, + "step": 38800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815410994193665e-05, + "loss": 0.7491, + "step": 38801 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815401466590464e-05, + "loss": 0.7396, + "step": 38802 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815391938742295e-05, + "loss": 0.3466, + "step": 38803 + }, + { + "epoch": 0.04, + "learning_rate": 4.981538241064916e-05, + "loss": 0.3558, + "step": 38804 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815372882311064e-05, + "loss": 0.5738, + "step": 38805 + }, + { + "epoch": 0.04, + "learning_rate": 4.981536335372798e-05, + "loss": 1.2387, + "step": 38806 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815353824899945e-05, + "loss": 1.5099, + "step": 38807 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815344295826946e-05, + "loss": 1.0678, + "step": 38808 + }, + { + "epoch": 0.04, + "learning_rate": 4.981533476650898e-05, + "loss": 1.0844, + "step": 38809 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815325236946034e-05, + "loss": 0.8271, + "step": 38810 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815315707138136e-05, + "loss": 1.0233, + "step": 38811 + }, + { + "epoch": 0.04, + "learning_rate": 4.981530617708526e-05, + "loss": 0.8519, + "step": 38812 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815296646787426e-05, + "loss": 0.8983, + "step": 38813 + }, + { + "epoch": 0.04, + "learning_rate": 4.981528711624462e-05, + "loss": 0.98, + "step": 38814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815277585456855e-05, + "loss": 0.8575, + "step": 38815 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815268054424126e-05, + "loss": 0.9987, + "step": 38816 + }, + { + "epoch": 0.04, + "learning_rate": 4.981525852314642e-05, + "loss": 0.9513, + "step": 38817 + }, + { + "epoch": 0.04, + "learning_rate": 4.981524899162375e-05, + "loss": 0.6333, + "step": 38818 + }, + { + "epoch": 0.04, + "learning_rate": 4.981523945985612e-05, + "loss": 0.7172, + "step": 38819 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815229927843526e-05, + "loss": 0.9969, + "step": 38820 + }, + { + "epoch": 0.04, + "learning_rate": 4.981522039558597e-05, + "loss": 0.4864, + "step": 38821 + }, + { + "epoch": 0.04, + "learning_rate": 4.981521086308344e-05, + "loss": 0.7031, + "step": 38822 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815201330335944e-05, + "loss": 0.649, + "step": 38823 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815191797343486e-05, + "loss": 1.1624, + "step": 38824 + }, + { + "epoch": 0.04, + "learning_rate": 4.981518226410606e-05, + "loss": 0.9243, + "step": 38825 + }, + { + "epoch": 0.04, + "learning_rate": 4.981517273062367e-05, + "loss": 1.0494, + "step": 38826 + }, + { + "epoch": 0.04, + "learning_rate": 4.981516319689632e-05, + "loss": 1.0783, + "step": 38827 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815153662923995e-05, + "loss": 1.1445, + "step": 38828 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815144128706714e-05, + "loss": 0.8494, + "step": 38829 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815134594244465e-05, + "loss": 0.9301, + "step": 38830 + }, + { + "epoch": 0.04, + "learning_rate": 4.981512505953725e-05, + "loss": 0.3366, + "step": 38831 + }, + { + "epoch": 0.04, + "learning_rate": 4.981511552458508e-05, + "loss": 0.4267, + "step": 38832 + }, + { + "epoch": 0.04, + "learning_rate": 4.981510598938793e-05, + "loss": 0.2562, + "step": 38833 + }, + { + "epoch": 0.04, + "learning_rate": 4.981509645394583e-05, + "loss": 0.3249, + "step": 38834 + }, + { + "epoch": 0.04, + "learning_rate": 4.981508691825876e-05, + "loss": 0.6466, + "step": 38835 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815077382326726e-05, + "loss": 1.0803, + "step": 38836 + }, + { + "epoch": 0.04, + "learning_rate": 4.981506784614973e-05, + "loss": 1.2746, + "step": 38837 + }, + { + "epoch": 0.04, + "learning_rate": 4.981505830972777e-05, + "loss": 1.1826, + "step": 38838 + }, + { + "epoch": 0.04, + "learning_rate": 4.981504877306084e-05, + "loss": 0.7479, + "step": 38839 + }, + { + "epoch": 0.04, + "learning_rate": 4.9815039236148954e-05, + "loss": 0.8456, + "step": 38840 + }, + { + "epoch": 0.04, + "learning_rate": 4.98150296989921e-05, + "loss": 1.1616, + "step": 38841 + }, + { + "epoch": 0.04, + "learning_rate": 4.981502016159028e-05, + "loss": 1.0679, + "step": 38842 + }, + { + "epoch": 0.04, + "learning_rate": 4.98150106239435e-05, + "loss": 0.9672, + "step": 38843 + }, + { + "epoch": 0.04, + "learning_rate": 4.981500108605176e-05, + "loss": 0.7996, + "step": 38844 + }, + { + "epoch": 0.04, + "learning_rate": 4.981499154791505e-05, + "loss": 1.1838, + "step": 38845 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814982009533384e-05, + "loss": 1.0481, + "step": 38846 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814972470906754e-05, + "loss": 1.0289, + "step": 38847 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814962932035156e-05, + "loss": 0.9079, + "step": 38848 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814953392918596e-05, + "loss": 1.0222, + "step": 38849 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814943853557074e-05, + "loss": 1.5282, + "step": 38850 + }, + { + "epoch": 0.04, + "learning_rate": 4.981493431395059e-05, + "loss": 0.7955, + "step": 38851 + }, + { + "epoch": 0.04, + "learning_rate": 4.981492477409915e-05, + "loss": 0.2277, + "step": 38852 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814915234002736e-05, + "loss": 0.2353, + "step": 38853 + }, + { + "epoch": 0.04, + "learning_rate": 4.981490569366136e-05, + "loss": 0.2032, + "step": 38854 + }, + { + "epoch": 0.04, + "learning_rate": 4.981489615307503e-05, + "loss": 0.2942, + "step": 38855 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814886612243734e-05, + "loss": 0.1924, + "step": 38856 + }, + { + "epoch": 0.04, + "learning_rate": 4.981487707116748e-05, + "loss": 0.7582, + "step": 38857 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814867529846254e-05, + "loss": 1.0894, + "step": 38858 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814857988280074e-05, + "loss": 1.096, + "step": 38859 + }, + { + "epoch": 0.04, + "learning_rate": 4.981484844646893e-05, + "loss": 0.7961, + "step": 38860 + }, + { + "epoch": 0.04, + "learning_rate": 4.981483890441282e-05, + "loss": 0.815, + "step": 38861 + }, + { + "epoch": 0.04, + "learning_rate": 4.981482936211176e-05, + "loss": 1.0569, + "step": 38862 + }, + { + "epoch": 0.04, + "learning_rate": 4.981481981956573e-05, + "loss": 1.0635, + "step": 38863 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814810276774734e-05, + "loss": 1.0919, + "step": 38864 + }, + { + "epoch": 0.04, + "learning_rate": 4.981480073373879e-05, + "loss": 1.4468, + "step": 38865 + }, + { + "epoch": 0.04, + "learning_rate": 4.981479119045788e-05, + "loss": 1.2861, + "step": 38866 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814781646932e-05, + "loss": 0.908, + "step": 38867 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814772103161165e-05, + "loss": 1.0874, + "step": 38868 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814762559145373e-05, + "loss": 1.0043, + "step": 38869 + }, + { + "epoch": 0.04, + "learning_rate": 4.981475301488462e-05, + "loss": 1.2009, + "step": 38870 + }, + { + "epoch": 0.04, + "learning_rate": 4.98147434703789e-05, + "loss": 1.1681, + "step": 38871 + }, + { + "epoch": 0.04, + "learning_rate": 4.981473392562822e-05, + "loss": 1.0392, + "step": 38872 + }, + { + "epoch": 0.04, + "learning_rate": 4.981472438063258e-05, + "loss": 1.1695, + "step": 38873 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814714835391985e-05, + "loss": 0.9792, + "step": 38874 + }, + { + "epoch": 0.04, + "learning_rate": 4.981470528990642e-05, + "loss": 1.0826, + "step": 38875 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814695744175896e-05, + "loss": 1.1335, + "step": 38876 + }, + { + "epoch": 0.04, + "learning_rate": 4.981468619820042e-05, + "loss": 0.7791, + "step": 38877 + }, + { + "epoch": 0.04, + "learning_rate": 4.981467665197998e-05, + "loss": 0.8762, + "step": 38878 + }, + { + "epoch": 0.04, + "learning_rate": 4.981466710551458e-05, + "loss": 0.4704, + "step": 38879 + }, + { + "epoch": 0.04, + "learning_rate": 4.981465755880422e-05, + "loss": 0.5178, + "step": 38880 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814648011848894e-05, + "loss": 0.4105, + "step": 38881 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814638464648624e-05, + "loss": 0.2647, + "step": 38882 + }, + { + "epoch": 0.04, + "learning_rate": 4.981462891720338e-05, + "loss": 0.3242, + "step": 38883 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814619369513184e-05, + "loss": 0.4495, + "step": 38884 + }, + { + "epoch": 0.04, + "learning_rate": 4.981460982157802e-05, + "loss": 0.2822, + "step": 38885 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814600273397904e-05, + "loss": 0.2092, + "step": 38886 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814590724972825e-05, + "loss": 0.2422, + "step": 38887 + }, + { + "epoch": 0.04, + "learning_rate": 4.981458117630279e-05, + "loss": 0.6219, + "step": 38888 + }, + { + "epoch": 0.04, + "learning_rate": 4.981457162738779e-05, + "loss": 0.904, + "step": 38889 + }, + { + "epoch": 0.04, + "learning_rate": 4.981456207822784e-05, + "loss": 0.6572, + "step": 38890 + }, + { + "epoch": 0.04, + "learning_rate": 4.981455252882293e-05, + "loss": 1.1985, + "step": 38891 + }, + { + "epoch": 0.04, + "learning_rate": 4.981454297917305e-05, + "loss": 0.9238, + "step": 38892 + }, + { + "epoch": 0.04, + "learning_rate": 4.981453342927822e-05, + "loss": 1.5682, + "step": 38893 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814523879138435e-05, + "loss": 1.2052, + "step": 38894 + }, + { + "epoch": 0.04, + "learning_rate": 4.981451432875369e-05, + "loss": 1.0076, + "step": 38895 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814504778123984e-05, + "loss": 1.1768, + "step": 38896 + }, + { + "epoch": 0.04, + "learning_rate": 4.981449522724931e-05, + "loss": 0.695, + "step": 38897 + }, + { + "epoch": 0.04, + "learning_rate": 4.98144856761297e-05, + "loss": 0.9186, + "step": 38898 + }, + { + "epoch": 0.04, + "learning_rate": 4.981447612476511e-05, + "loss": 0.9091, + "step": 38899 + }, + { + "epoch": 0.04, + "learning_rate": 4.981446657315557e-05, + "loss": 0.8752, + "step": 38900 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814457021301074e-05, + "loss": 1.1264, + "step": 38901 + }, + { + "epoch": 0.04, + "learning_rate": 4.981444746920162e-05, + "loss": 1.1097, + "step": 38902 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814437916857203e-05, + "loss": 1.0984, + "step": 38903 + }, + { + "epoch": 0.04, + "learning_rate": 4.981442836426784e-05, + "loss": 1.0483, + "step": 38904 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814418811433506e-05, + "loss": 1.6774, + "step": 38905 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814409258354224e-05, + "loss": 1.209, + "step": 38906 + }, + { + "epoch": 0.04, + "learning_rate": 4.981439970502998e-05, + "loss": 1.1676, + "step": 38907 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814390151460775e-05, + "loss": 0.9629, + "step": 38908 + }, + { + "epoch": 0.04, + "learning_rate": 4.981438059764662e-05, + "loss": 1.0055, + "step": 38909 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814371043587505e-05, + "loss": 1.2829, + "step": 38910 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814361489283434e-05, + "loss": 1.4417, + "step": 38911 + }, + { + "epoch": 0.04, + "learning_rate": 4.981435193473441e-05, + "loss": 1.1362, + "step": 38912 + }, + { + "epoch": 0.04, + "learning_rate": 4.981434237994042e-05, + "loss": 1.2417, + "step": 38913 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814332824901477e-05, + "loss": 1.299, + "step": 38914 + }, + { + "epoch": 0.04, + "learning_rate": 4.981432326961758e-05, + "loss": 1.0328, + "step": 38915 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814313714088725e-05, + "loss": 0.8897, + "step": 38916 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814304158314916e-05, + "loss": 1.1575, + "step": 38917 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814294602296145e-05, + "loss": 0.8244, + "step": 38918 + }, + { + "epoch": 0.04, + "learning_rate": 4.981428504603242e-05, + "loss": 0.8519, + "step": 38919 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814275489523745e-05, + "loss": 0.9641, + "step": 38920 + }, + { + "epoch": 0.04, + "learning_rate": 4.98142659327701e-05, + "loss": 1.0931, + "step": 38921 + }, + { + "epoch": 0.04, + "learning_rate": 4.981425637577151e-05, + "loss": 1.0849, + "step": 38922 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814246818527965e-05, + "loss": 1.1429, + "step": 38923 + }, + { + "epoch": 0.04, + "learning_rate": 4.981423726103946e-05, + "loss": 1.0569, + "step": 38924 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814227703306e-05, + "loss": 0.9475, + "step": 38925 + }, + { + "epoch": 0.04, + "learning_rate": 4.981421814532758e-05, + "loss": 1.0305, + "step": 38926 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814208587104214e-05, + "loss": 1.2725, + "step": 38927 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814199028635886e-05, + "loss": 0.9729, + "step": 38928 + }, + { + "epoch": 0.04, + "learning_rate": 4.98141894699226e-05, + "loss": 0.6252, + "step": 38929 + }, + { + "epoch": 0.04, + "learning_rate": 4.981417991096436e-05, + "loss": 0.7955, + "step": 38930 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814170351761175e-05, + "loss": 0.7291, + "step": 38931 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814160792313025e-05, + "loss": 0.9026, + "step": 38932 + }, + { + "epoch": 0.04, + "learning_rate": 4.981415123261993e-05, + "loss": 0.9469, + "step": 38933 + }, + { + "epoch": 0.04, + "learning_rate": 4.981414167268187e-05, + "loss": 0.9151, + "step": 38934 + }, + { + "epoch": 0.04, + "learning_rate": 4.981413211249886e-05, + "loss": 0.9673, + "step": 38935 + }, + { + "epoch": 0.04, + "learning_rate": 4.981412255207089e-05, + "loss": 1.3449, + "step": 38936 + }, + { + "epoch": 0.04, + "learning_rate": 4.981411299139797e-05, + "loss": 1.0505, + "step": 38937 + }, + { + "epoch": 0.04, + "learning_rate": 4.98141034304801e-05, + "loss": 0.5158, + "step": 38938 + }, + { + "epoch": 0.04, + "learning_rate": 4.981409386931727e-05, + "loss": 0.9788, + "step": 38939 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814084307909484e-05, + "loss": 0.9181, + "step": 38940 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814074746256745e-05, + "loss": 1.0075, + "step": 38941 + }, + { + "epoch": 0.04, + "learning_rate": 4.981406518435905e-05, + "loss": 1.203, + "step": 38942 + }, + { + "epoch": 0.04, + "learning_rate": 4.98140556222164e-05, + "loss": 0.9349, + "step": 38943 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814046059828804e-05, + "loss": 0.9995, + "step": 38944 + }, + { + "epoch": 0.04, + "learning_rate": 4.981403649719625e-05, + "loss": 1.0542, + "step": 38945 + }, + { + "epoch": 0.04, + "learning_rate": 4.981402693431874e-05, + "loss": 1.1886, + "step": 38946 + }, + { + "epoch": 0.04, + "learning_rate": 4.981401737119628e-05, + "loss": 1.2502, + "step": 38947 + }, + { + "epoch": 0.04, + "learning_rate": 4.981400780782887e-05, + "loss": 0.9441, + "step": 38948 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813998244216494e-05, + "loss": 0.7032, + "step": 38949 + }, + { + "epoch": 0.04, + "learning_rate": 4.981398868035918e-05, + "loss": 1.163, + "step": 38950 + }, + { + "epoch": 0.04, + "learning_rate": 4.98139791162569e-05, + "loss": 0.833, + "step": 38951 + }, + { + "epoch": 0.04, + "learning_rate": 4.981396955190967e-05, + "loss": 1.1118, + "step": 38952 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813959987317496e-05, + "loss": 0.901, + "step": 38953 + }, + { + "epoch": 0.04, + "learning_rate": 4.981395042248036e-05, + "loss": 1.0077, + "step": 38954 + }, + { + "epoch": 0.04, + "learning_rate": 4.981394085739827e-05, + "loss": 1.5748, + "step": 38955 + }, + { + "epoch": 0.04, + "learning_rate": 4.981393129207123e-05, + "loss": 1.0559, + "step": 38956 + }, + { + "epoch": 0.04, + "learning_rate": 4.981392172649924e-05, + "loss": 0.9457, + "step": 38957 + }, + { + "epoch": 0.04, + "learning_rate": 4.98139121606823e-05, + "loss": 1.2071, + "step": 38958 + }, + { + "epoch": 0.04, + "learning_rate": 4.98139025946204e-05, + "loss": 0.7268, + "step": 38959 + }, + { + "epoch": 0.04, + "learning_rate": 4.981389302831355e-05, + "loss": 1.1805, + "step": 38960 + }, + { + "epoch": 0.04, + "learning_rate": 4.981388346176175e-05, + "loss": 0.7402, + "step": 38961 + }, + { + "epoch": 0.04, + "learning_rate": 4.981387389496499e-05, + "loss": 1.0082, + "step": 38962 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813864327923285e-05, + "loss": 0.9424, + "step": 38963 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813854760636625e-05, + "loss": 0.8129, + "step": 38964 + }, + { + "epoch": 0.04, + "learning_rate": 4.981384519310501e-05, + "loss": 0.9177, + "step": 38965 + }, + { + "epoch": 0.04, + "learning_rate": 4.981383562532845e-05, + "loss": 0.6107, + "step": 38966 + }, + { + "epoch": 0.04, + "learning_rate": 4.981382605730694e-05, + "loss": 1.0026, + "step": 38967 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813816489040474e-05, + "loss": 0.8972, + "step": 38968 + }, + { + "epoch": 0.04, + "learning_rate": 4.981380692052906e-05, + "loss": 1.3241, + "step": 38969 + }, + { + "epoch": 0.04, + "learning_rate": 4.981379735177269e-05, + "loss": 1.0281, + "step": 38970 + }, + { + "epoch": 0.04, + "learning_rate": 4.981378778277137e-05, + "loss": 0.7939, + "step": 38971 + }, + { + "epoch": 0.04, + "learning_rate": 4.98137782135251e-05, + "loss": 0.3605, + "step": 38972 + }, + { + "epoch": 0.04, + "learning_rate": 4.981376864403388e-05, + "loss": 0.2113, + "step": 38973 + }, + { + "epoch": 0.04, + "learning_rate": 4.981375907429771e-05, + "loss": 0.1365, + "step": 38974 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813749504316574e-05, + "loss": 0.1217, + "step": 38975 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813739934090506e-05, + "loss": 0.174, + "step": 38976 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813730363619476e-05, + "loss": 0.2039, + "step": 38977 + }, + { + "epoch": 0.04, + "learning_rate": 4.98137207929035e-05, + "loss": 0.1773, + "step": 38978 + }, + { + "epoch": 0.04, + "learning_rate": 4.981371122194257e-05, + "loss": 0.6409, + "step": 38979 + }, + { + "epoch": 0.04, + "learning_rate": 4.98137016507367e-05, + "loss": 1.1435, + "step": 38980 + }, + { + "epoch": 0.04, + "learning_rate": 4.981369207928587e-05, + "loss": 1.1752, + "step": 38981 + }, + { + "epoch": 0.04, + "learning_rate": 4.981368250759009e-05, + "loss": 0.7172, + "step": 38982 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813672935649355e-05, + "loss": 1.3992, + "step": 38983 + }, + { + "epoch": 0.04, + "learning_rate": 4.981366336346368e-05, + "loss": 0.9269, + "step": 38984 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813653791033057e-05, + "loss": 1.1269, + "step": 38985 + }, + { + "epoch": 0.04, + "learning_rate": 4.981364421835748e-05, + "loss": 0.9756, + "step": 38986 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813634645436944e-05, + "loss": 1.2781, + "step": 38987 + }, + { + "epoch": 0.04, + "learning_rate": 4.981362507227146e-05, + "loss": 1.8932, + "step": 38988 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813615498861045e-05, + "loss": 0.8301, + "step": 38989 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813605925205666e-05, + "loss": 0.9496, + "step": 38990 + }, + { + "epoch": 0.04, + "learning_rate": 4.981359635130534e-05, + "loss": 0.9085, + "step": 38991 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813586777160056e-05, + "loss": 0.9137, + "step": 38992 + }, + { + "epoch": 0.04, + "learning_rate": 4.981357720276983e-05, + "loss": 1.2055, + "step": 38993 + }, + { + "epoch": 0.04, + "learning_rate": 4.981356762813466e-05, + "loss": 0.9071, + "step": 38994 + }, + { + "epoch": 0.04, + "learning_rate": 4.981355805325453e-05, + "loss": 1.0135, + "step": 38995 + }, + { + "epoch": 0.04, + "learning_rate": 4.981354847812946e-05, + "loss": 1.2296, + "step": 38996 + }, + { + "epoch": 0.04, + "learning_rate": 4.981353890275944e-05, + "loss": 1.3863, + "step": 38997 + }, + { + "epoch": 0.04, + "learning_rate": 4.981352932714447e-05, + "loss": 1.1605, + "step": 38998 + }, + { + "epoch": 0.04, + "learning_rate": 4.981351975128455e-05, + "loss": 1.2071, + "step": 38999 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813510175179675e-05, + "loss": 1.1673, + "step": 39000 + }, + { + "epoch": 0.04, + "eval_loss": 1.0770384073257446, + "eval_runtime": 127.5137, + "eval_samples_per_second": 10.862, + "eval_steps_per_second": 5.435, + "step": 39000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813500598829864e-05, + "loss": 0.9733, + "step": 39001 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813491022235105e-05, + "loss": 1.0111, + "step": 39002 + }, + { + "epoch": 0.04, + "learning_rate": 4.981348144539539e-05, + "loss": 1.0717, + "step": 39003 + }, + { + "epoch": 0.04, + "learning_rate": 4.981347186831072e-05, + "loss": 0.9463, + "step": 39004 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813462290981116e-05, + "loss": 0.5948, + "step": 39005 + }, + { + "epoch": 0.04, + "learning_rate": 4.981345271340656e-05, + "loss": 0.9362, + "step": 39006 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813443135587055e-05, + "loss": 1.0176, + "step": 39007 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813433557522605e-05, + "loss": 1.2668, + "step": 39008 + }, + { + "epoch": 0.04, + "learning_rate": 4.98134239792132e-05, + "loss": 1.0683, + "step": 39009 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813414400658854e-05, + "loss": 1.0627, + "step": 39010 + }, + { + "epoch": 0.04, + "learning_rate": 4.981340482185955e-05, + "loss": 0.8256, + "step": 39011 + }, + { + "epoch": 0.04, + "learning_rate": 4.981339524281531e-05, + "loss": 1.0126, + "step": 39012 + }, + { + "epoch": 0.04, + "learning_rate": 4.981338566352612e-05, + "loss": 1.3962, + "step": 39013 + }, + { + "epoch": 0.04, + "learning_rate": 4.981337608399198e-05, + "loss": 1.7819, + "step": 39014 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813366504212896e-05, + "loss": 1.0444, + "step": 39015 + }, + { + "epoch": 0.04, + "learning_rate": 4.981335692418886e-05, + "loss": 0.9261, + "step": 39016 + }, + { + "epoch": 0.04, + "learning_rate": 4.981334734391988e-05, + "loss": 0.5697, + "step": 39017 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813337763405954e-05, + "loss": 0.9559, + "step": 39018 + }, + { + "epoch": 0.04, + "learning_rate": 4.981332818264708e-05, + "loss": 0.7245, + "step": 39019 + }, + { + "epoch": 0.04, + "learning_rate": 4.981331860164326e-05, + "loss": 0.6773, + "step": 39020 + }, + { + "epoch": 0.04, + "learning_rate": 4.981330902039449e-05, + "loss": 0.6686, + "step": 39021 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813299438900784e-05, + "loss": 0.9485, + "step": 39022 + }, + { + "epoch": 0.04, + "learning_rate": 4.981328985716212e-05, + "loss": 0.9934, + "step": 39023 + }, + { + "epoch": 0.04, + "learning_rate": 4.981328027517851e-05, + "loss": 0.7221, + "step": 39024 + }, + { + "epoch": 0.04, + "learning_rate": 4.981327069294997e-05, + "loss": 0.483, + "step": 39025 + }, + { + "epoch": 0.04, + "learning_rate": 4.981326111047646e-05, + "loss": 0.7862, + "step": 39026 + }, + { + "epoch": 0.04, + "learning_rate": 4.981325152775802e-05, + "loss": 1.2067, + "step": 39027 + }, + { + "epoch": 0.04, + "learning_rate": 4.981324194479463e-05, + "loss": 0.9525, + "step": 39028 + }, + { + "epoch": 0.04, + "learning_rate": 4.981323236158629e-05, + "loss": 0.9384, + "step": 39029 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813222778133e-05, + "loss": 1.0354, + "step": 39030 + }, + { + "epoch": 0.04, + "learning_rate": 4.981321319443478e-05, + "loss": 1.0025, + "step": 39031 + }, + { + "epoch": 0.04, + "learning_rate": 4.981320361049161e-05, + "loss": 1.1616, + "step": 39032 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813194026303486e-05, + "loss": 0.8577, + "step": 39033 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813184441870426e-05, + "loss": 0.5259, + "step": 39034 + }, + { + "epoch": 0.04, + "learning_rate": 4.981317485719241e-05, + "loss": 0.111, + "step": 39035 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813165272269454e-05, + "loss": 0.1264, + "step": 39036 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813155687101556e-05, + "loss": 0.1618, + "step": 39037 + }, + { + "epoch": 0.04, + "learning_rate": 4.981314610168871e-05, + "loss": 0.8466, + "step": 39038 + }, + { + "epoch": 0.04, + "learning_rate": 4.981313651603092e-05, + "loss": 0.9936, + "step": 39039 + }, + { + "epoch": 0.04, + "learning_rate": 4.981312693012819e-05, + "loss": 0.849, + "step": 39040 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813117343980506e-05, + "loss": 1.0114, + "step": 39041 + }, + { + "epoch": 0.04, + "learning_rate": 4.9813107757587886e-05, + "loss": 0.9811, + "step": 39042 + }, + { + "epoch": 0.04, + "learning_rate": 4.981309817095032e-05, + "loss": 0.8635, + "step": 39043 + }, + { + "epoch": 0.04, + "learning_rate": 4.98130885840678e-05, + "loss": 0.8524, + "step": 39044 + }, + { + "epoch": 0.04, + "learning_rate": 4.981307899694034e-05, + "loss": 0.9694, + "step": 39045 + }, + { + "epoch": 0.04, + "learning_rate": 4.981306940956795e-05, + "loss": 0.9663, + "step": 39046 + }, + { + "epoch": 0.04, + "learning_rate": 4.98130598219506e-05, + "loss": 0.8745, + "step": 39047 + }, + { + "epoch": 0.04, + "learning_rate": 4.981305023408831e-05, + "loss": 1.0598, + "step": 39048 + }, + { + "epoch": 0.04, + "learning_rate": 4.981304064598108e-05, + "loss": 1.1675, + "step": 39049 + }, + { + "epoch": 0.04, + "learning_rate": 4.98130310576289e-05, + "loss": 1.1217, + "step": 39050 + }, + { + "epoch": 0.04, + "learning_rate": 4.981302146903177e-05, + "loss": 1.0164, + "step": 39051 + }, + { + "epoch": 0.04, + "learning_rate": 4.981301188018971e-05, + "loss": 1.0031, + "step": 39052 + }, + { + "epoch": 0.04, + "learning_rate": 4.98130022911027e-05, + "loss": 0.7732, + "step": 39053 + }, + { + "epoch": 0.04, + "learning_rate": 4.981299270177075e-05, + "loss": 0.308, + "step": 39054 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812983112193857e-05, + "loss": 0.9133, + "step": 39055 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812973522372014e-05, + "loss": 0.9497, + "step": 39056 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812963932305236e-05, + "loss": 1.3698, + "step": 39057 + }, + { + "epoch": 0.04, + "learning_rate": 4.981295434199351e-05, + "loss": 1.0613, + "step": 39058 + }, + { + "epoch": 0.04, + "learning_rate": 4.981294475143684e-05, + "loss": 1.2332, + "step": 39059 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812935160635226e-05, + "loss": 1.185, + "step": 39060 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812925569588675e-05, + "loss": 1.0745, + "step": 39061 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812915978297175e-05, + "loss": 0.9458, + "step": 39062 + }, + { + "epoch": 0.04, + "learning_rate": 4.981290638676074e-05, + "loss": 0.7959, + "step": 39063 + }, + { + "epoch": 0.04, + "learning_rate": 4.981289679497935e-05, + "loss": 1.0579, + "step": 39064 + }, + { + "epoch": 0.04, + "learning_rate": 4.981288720295303e-05, + "loss": 1.0615, + "step": 39065 + }, + { + "epoch": 0.04, + "learning_rate": 4.981287761068176e-05, + "loss": 1.2109, + "step": 39066 + }, + { + "epoch": 0.04, + "learning_rate": 4.981286801816555e-05, + "loss": 0.9595, + "step": 39067 + }, + { + "epoch": 0.04, + "learning_rate": 4.98128584254044e-05, + "loss": 0.8282, + "step": 39068 + }, + { + "epoch": 0.04, + "learning_rate": 4.981284883239831e-05, + "loss": 1.0781, + "step": 39069 + }, + { + "epoch": 0.04, + "learning_rate": 4.981283923914727e-05, + "loss": 0.6936, + "step": 39070 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812829645651296e-05, + "loss": 0.7369, + "step": 39071 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812820051910373e-05, + "loss": 0.577, + "step": 39072 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812810457924516e-05, + "loss": 0.6978, + "step": 39073 + }, + { + "epoch": 0.04, + "learning_rate": 4.981280086369371e-05, + "loss": 0.7823, + "step": 39074 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812791269217964e-05, + "loss": 0.7912, + "step": 39075 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812781674497276e-05, + "loss": 0.8127, + "step": 39076 + }, + { + "epoch": 0.04, + "learning_rate": 4.981277207953165e-05, + "loss": 0.7283, + "step": 39077 + }, + { + "epoch": 0.04, + "learning_rate": 4.981276248432109e-05, + "loss": 0.4665, + "step": 39078 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812752888865574e-05, + "loss": 1.0151, + "step": 39079 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812743293165126e-05, + "loss": 0.8507, + "step": 39080 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812733697219736e-05, + "loss": 0.8793, + "step": 39081 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812724101029405e-05, + "loss": 0.7604, + "step": 39082 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812714504594125e-05, + "loss": 0.5797, + "step": 39083 + }, + { + "epoch": 0.04, + "learning_rate": 4.981270490791392e-05, + "loss": 1.2596, + "step": 39084 + }, + { + "epoch": 0.04, + "learning_rate": 4.981269531098877e-05, + "loss": 1.6312, + "step": 39085 + }, + { + "epoch": 0.04, + "learning_rate": 4.981268571381867e-05, + "loss": 1.356, + "step": 39086 + }, + { + "epoch": 0.04, + "learning_rate": 4.981267611640363e-05, + "loss": 1.1605, + "step": 39087 + }, + { + "epoch": 0.04, + "learning_rate": 4.981266651874366e-05, + "loss": 1.0626, + "step": 39088 + }, + { + "epoch": 0.04, + "learning_rate": 4.981265692083874e-05, + "loss": 1.181, + "step": 39089 + }, + { + "epoch": 0.04, + "learning_rate": 4.981264732268889e-05, + "loss": 1.0509, + "step": 39090 + }, + { + "epoch": 0.04, + "learning_rate": 4.981263772429409e-05, + "loss": 1.0067, + "step": 39091 + }, + { + "epoch": 0.04, + "learning_rate": 4.981262812565436e-05, + "loss": 0.7276, + "step": 39092 + }, + { + "epoch": 0.04, + "learning_rate": 4.981261852676968e-05, + "loss": 1.0034, + "step": 39093 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812608927640065e-05, + "loss": 0.9856, + "step": 39094 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812599328265507e-05, + "loss": 0.8416, + "step": 39095 + }, + { + "epoch": 0.04, + "learning_rate": 4.981258972864602e-05, + "loss": 1.2294, + "step": 39096 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812580128781585e-05, + "loss": 0.9845, + "step": 39097 + }, + { + "epoch": 0.04, + "learning_rate": 4.981257052867221e-05, + "loss": 1.4679, + "step": 39098 + }, + { + "epoch": 0.04, + "learning_rate": 4.98125609283179e-05, + "loss": 1.0477, + "step": 39099 + }, + { + "epoch": 0.04, + "learning_rate": 4.981255132771865e-05, + "loss": 0.9312, + "step": 39100 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812541726874456e-05, + "loss": 1.146, + "step": 39101 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812532125785327e-05, + "loss": 1.2807, + "step": 39102 + }, + { + "epoch": 0.04, + "learning_rate": 4.981252252445126e-05, + "loss": 0.8001, + "step": 39103 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812512922872256e-05, + "loss": 0.825, + "step": 39104 + }, + { + "epoch": 0.04, + "learning_rate": 4.981250332104831e-05, + "loss": 1.1994, + "step": 39105 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812493718979427e-05, + "loss": 1.2156, + "step": 39106 + }, + { + "epoch": 0.04, + "learning_rate": 4.98124841166656e-05, + "loss": 0.603, + "step": 39107 + }, + { + "epoch": 0.04, + "learning_rate": 4.981247451410684e-05, + "loss": 0.4826, + "step": 39108 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812464911303144e-05, + "loss": 1.0989, + "step": 39109 + }, + { + "epoch": 0.04, + "learning_rate": 4.98124553082545e-05, + "loss": 1.0366, + "step": 39110 + }, + { + "epoch": 0.04, + "learning_rate": 4.981244570496093e-05, + "loss": 1.0607, + "step": 39111 + }, + { + "epoch": 0.04, + "learning_rate": 4.981243610142241e-05, + "loss": 0.9106, + "step": 39112 + }, + { + "epoch": 0.04, + "learning_rate": 4.981242649763897e-05, + "loss": 0.9214, + "step": 39113 + }, + { + "epoch": 0.04, + "learning_rate": 4.981241689361057e-05, + "loss": 1.1853, + "step": 39114 + }, + { + "epoch": 0.04, + "learning_rate": 4.981240728933725e-05, + "loss": 1.3109, + "step": 39115 + }, + { + "epoch": 0.04, + "learning_rate": 4.981239768481898e-05, + "loss": 1.2956, + "step": 39116 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812388080055786e-05, + "loss": 1.309, + "step": 39117 + }, + { + "epoch": 0.04, + "learning_rate": 4.981237847504764e-05, + "loss": 1.4526, + "step": 39118 + }, + { + "epoch": 0.04, + "learning_rate": 4.981236886979456e-05, + "loss": 1.4584, + "step": 39119 + }, + { + "epoch": 0.04, + "learning_rate": 4.981235926429655e-05, + "loss": 1.0505, + "step": 39120 + }, + { + "epoch": 0.04, + "learning_rate": 4.98123496585536e-05, + "loss": 0.8397, + "step": 39121 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812340052565706e-05, + "loss": 1.1974, + "step": 39122 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812330446332884e-05, + "loss": 0.9993, + "step": 39123 + }, + { + "epoch": 0.04, + "learning_rate": 4.981232083985512e-05, + "loss": 1.2516, + "step": 39124 + }, + { + "epoch": 0.04, + "learning_rate": 4.981231123313242e-05, + "loss": 1.3788, + "step": 39125 + }, + { + "epoch": 0.04, + "learning_rate": 4.981230162616479e-05, + "loss": 1.2909, + "step": 39126 + }, + { + "epoch": 0.04, + "learning_rate": 4.981229201895221e-05, + "loss": 0.8018, + "step": 39127 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812282411494706e-05, + "loss": 0.124, + "step": 39128 + }, + { + "epoch": 0.04, + "learning_rate": 4.981227280379226e-05, + "loss": 0.0891, + "step": 39129 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812263195844876e-05, + "loss": 0.0625, + "step": 39130 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812253587652555e-05, + "loss": 0.0367, + "step": 39131 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812243979215307e-05, + "loss": 0.0392, + "step": 39132 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812234370533116e-05, + "loss": 0.0366, + "step": 39133 + }, + { + "epoch": 0.04, + "learning_rate": 4.981222476160599e-05, + "loss": 0.0325, + "step": 39134 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812215152433924e-05, + "loss": 0.0388, + "step": 39135 + }, + { + "epoch": 0.04, + "learning_rate": 4.981220554301693e-05, + "loss": 0.0346, + "step": 39136 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812195933355e-05, + "loss": 0.0334, + "step": 39137 + }, + { + "epoch": 0.04, + "learning_rate": 4.981218632344813e-05, + "loss": 0.0418, + "step": 39138 + }, + { + "epoch": 0.04, + "learning_rate": 4.981217671329633e-05, + "loss": 0.0428, + "step": 39139 + }, + { + "epoch": 0.04, + "learning_rate": 4.981216710289959e-05, + "loss": 0.0991, + "step": 39140 + }, + { + "epoch": 0.04, + "learning_rate": 4.981215749225791e-05, + "loss": 0.0727, + "step": 39141 + }, + { + "epoch": 0.04, + "learning_rate": 4.98121478813713e-05, + "loss": 0.0662, + "step": 39142 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812138270239764e-05, + "loss": 1.0773, + "step": 39143 + }, + { + "epoch": 0.04, + "learning_rate": 4.981212865886328e-05, + "loss": 1.1517, + "step": 39144 + }, + { + "epoch": 0.04, + "learning_rate": 4.981211904724187e-05, + "loss": 1.1012, + "step": 39145 + }, + { + "epoch": 0.04, + "learning_rate": 4.981210943537551e-05, + "loss": 0.9735, + "step": 39146 + }, + { + "epoch": 0.04, + "learning_rate": 4.981209982326423e-05, + "loss": 0.9379, + "step": 39147 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812090210908015e-05, + "loss": 0.7635, + "step": 39148 + }, + { + "epoch": 0.04, + "learning_rate": 4.981208059830686e-05, + "loss": 1.1139, + "step": 39149 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812070985460765e-05, + "loss": 0.6442, + "step": 39150 + }, + { + "epoch": 0.04, + "learning_rate": 4.981206137236975e-05, + "loss": 0.9583, + "step": 39151 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812051759033796e-05, + "loss": 0.9248, + "step": 39152 + }, + { + "epoch": 0.04, + "learning_rate": 4.98120421454529e-05, + "loss": 0.8789, + "step": 39153 + }, + { + "epoch": 0.04, + "learning_rate": 4.981203253162708e-05, + "loss": 1.2202, + "step": 39154 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812022917556325e-05, + "loss": 1.0357, + "step": 39155 + }, + { + "epoch": 0.04, + "learning_rate": 4.9812013303240627e-05, + "loss": 1.0335, + "step": 39156 + }, + { + "epoch": 0.04, + "learning_rate": 4.981200368868e-05, + "loss": 1.2487, + "step": 39157 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811994073874446e-05, + "loss": 0.7313, + "step": 39158 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811984458823944e-05, + "loss": 0.9137, + "step": 39159 + }, + { + "epoch": 0.04, + "learning_rate": 4.981197484352852e-05, + "loss": 1.2186, + "step": 39160 + }, + { + "epoch": 0.04, + "learning_rate": 4.981196522798817e-05, + "loss": 0.9003, + "step": 39161 + }, + { + "epoch": 0.04, + "learning_rate": 4.981195561220287e-05, + "loss": 1.2427, + "step": 39162 + }, + { + "epoch": 0.04, + "learning_rate": 4.981194599617264e-05, + "loss": 0.9289, + "step": 39163 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811936379897484e-05, + "loss": 1.0902, + "step": 39164 + }, + { + "epoch": 0.04, + "learning_rate": 4.981192676337739e-05, + "loss": 1.0227, + "step": 39165 + }, + { + "epoch": 0.04, + "learning_rate": 4.981191714661237e-05, + "loss": 0.9829, + "step": 39166 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811907529602406e-05, + "loss": 1.0965, + "step": 39167 + }, + { + "epoch": 0.04, + "learning_rate": 4.981189791234752e-05, + "loss": 0.9323, + "step": 39168 + }, + { + "epoch": 0.04, + "learning_rate": 4.98118882948477e-05, + "loss": 0.5673, + "step": 39169 + }, + { + "epoch": 0.04, + "learning_rate": 4.981187867710294e-05, + "loss": 1.0277, + "step": 39170 + }, + { + "epoch": 0.04, + "learning_rate": 4.981186905911325e-05, + "loss": 0.8771, + "step": 39171 + }, + { + "epoch": 0.04, + "learning_rate": 4.981185944087863e-05, + "loss": 1.1455, + "step": 39172 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811849822399085e-05, + "loss": 0.8626, + "step": 39173 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811840203674594e-05, + "loss": 1.2188, + "step": 39174 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811830584705175e-05, + "loss": 1.2553, + "step": 39175 + }, + { + "epoch": 0.04, + "learning_rate": 4.981182096549083e-05, + "loss": 0.7568, + "step": 39176 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811811346031546e-05, + "loss": 1.1259, + "step": 39177 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811801726327336e-05, + "loss": 1.325, + "step": 39178 + }, + { + "epoch": 0.04, + "learning_rate": 4.981179210637819e-05, + "loss": 0.7901, + "step": 39179 + }, + { + "epoch": 0.04, + "learning_rate": 4.981178248618411e-05, + "loss": 0.6979, + "step": 39180 + }, + { + "epoch": 0.04, + "learning_rate": 4.981177286574511e-05, + "loss": 0.8872, + "step": 39181 + }, + { + "epoch": 0.04, + "learning_rate": 4.981176324506117e-05, + "loss": 0.8995, + "step": 39182 + }, + { + "epoch": 0.04, + "learning_rate": 4.98117536241323e-05, + "loss": 1.0638, + "step": 39183 + }, + { + "epoch": 0.04, + "learning_rate": 4.98117440029585e-05, + "loss": 0.9364, + "step": 39184 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811734381539766e-05, + "loss": 2.0642, + "step": 39185 + }, + { + "epoch": 0.04, + "learning_rate": 4.98117247598761e-05, + "loss": 1.5358, + "step": 39186 + }, + { + "epoch": 0.04, + "learning_rate": 4.981171513796751e-05, + "loss": 1.0374, + "step": 39187 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811705515813984e-05, + "loss": 0.7023, + "step": 39188 + }, + { + "epoch": 0.04, + "learning_rate": 4.981169589341553e-05, + "loss": 0.9653, + "step": 39189 + }, + { + "epoch": 0.04, + "learning_rate": 4.981168627077214e-05, + "loss": 0.8148, + "step": 39190 + }, + { + "epoch": 0.04, + "learning_rate": 4.981167664788382e-05, + "loss": 1.0264, + "step": 39191 + }, + { + "epoch": 0.04, + "learning_rate": 4.981166702475058e-05, + "loss": 1.4014, + "step": 39192 + }, + { + "epoch": 0.04, + "learning_rate": 4.98116574013724e-05, + "loss": 0.9131, + "step": 39193 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811647777749296e-05, + "loss": 1.0948, + "step": 39194 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811638153881255e-05, + "loss": 1.2823, + "step": 39195 + }, + { + "epoch": 0.04, + "learning_rate": 4.981162852976829e-05, + "loss": 0.6614, + "step": 39196 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811618905410395e-05, + "loss": 1.1642, + "step": 39197 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811609280807564e-05, + "loss": 0.8495, + "step": 39198 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811599655959804e-05, + "loss": 1.1655, + "step": 39199 + }, + { + "epoch": 0.04, + "learning_rate": 4.981159003086712e-05, + "loss": 1.2998, + "step": 39200 + }, + { + "epoch": 0.04, + "learning_rate": 4.98115804055295e-05, + "loss": 0.9103, + "step": 39201 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811570779946956e-05, + "loss": 0.7762, + "step": 39202 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811561154119484e-05, + "loss": 0.6583, + "step": 39203 + }, + { + "epoch": 0.04, + "learning_rate": 4.981155152804708e-05, + "loss": 1.0034, + "step": 39204 + }, + { + "epoch": 0.04, + "learning_rate": 4.981154190172975e-05, + "loss": 1.0966, + "step": 39205 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811532275167486e-05, + "loss": 0.8742, + "step": 39206 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811522648360295e-05, + "loss": 1.351, + "step": 39207 + }, + { + "epoch": 0.04, + "learning_rate": 4.981151302130817e-05, + "loss": 1.1814, + "step": 39208 + }, + { + "epoch": 0.04, + "learning_rate": 4.981150339401112e-05, + "loss": 0.6525, + "step": 39209 + }, + { + "epoch": 0.04, + "learning_rate": 4.981149376646914e-05, + "loss": 0.9543, + "step": 39210 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811484138682236e-05, + "loss": 1.1041, + "step": 39211 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811474510650405e-05, + "loss": 1.3302, + "step": 39212 + }, + { + "epoch": 0.04, + "learning_rate": 4.981146488237364e-05, + "loss": 1.0091, + "step": 39213 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811455253851945e-05, + "loss": 0.9231, + "step": 39214 + }, + { + "epoch": 0.04, + "learning_rate": 4.981144562508533e-05, + "loss": 0.7832, + "step": 39215 + }, + { + "epoch": 0.04, + "learning_rate": 4.981143599607377e-05, + "loss": 0.7762, + "step": 39216 + }, + { + "epoch": 0.04, + "learning_rate": 4.98114263668173e-05, + "loss": 0.9964, + "step": 39217 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811416737315895e-05, + "loss": 0.9461, + "step": 39218 + }, + { + "epoch": 0.04, + "learning_rate": 4.981140710756956e-05, + "loss": 1.2619, + "step": 39219 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811397477578305e-05, + "loss": 1.0262, + "step": 39220 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811387847342114e-05, + "loss": 0.9613, + "step": 39221 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811378216861e-05, + "loss": 1.0219, + "step": 39222 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811368586134956e-05, + "loss": 1.2916, + "step": 39223 + }, + { + "epoch": 0.04, + "learning_rate": 4.981135895516399e-05, + "loss": 1.2408, + "step": 39224 + }, + { + "epoch": 0.04, + "learning_rate": 4.981134932394809e-05, + "loss": 0.9689, + "step": 39225 + }, + { + "epoch": 0.04, + "learning_rate": 4.981133969248727e-05, + "loss": 1.0131, + "step": 39226 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811330060781516e-05, + "loss": 1.0489, + "step": 39227 + }, + { + "epoch": 0.04, + "learning_rate": 4.981132042883083e-05, + "loss": 1.0059, + "step": 39228 + }, + { + "epoch": 0.04, + "learning_rate": 4.981131079663523e-05, + "loss": 1.1499, + "step": 39229 + }, + { + "epoch": 0.04, + "learning_rate": 4.98113011641947e-05, + "loss": 0.9956, + "step": 39230 + }, + { + "epoch": 0.04, + "learning_rate": 4.981129153150924e-05, + "loss": 0.9842, + "step": 39231 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811281898578856e-05, + "loss": 0.7992, + "step": 39232 + }, + { + "epoch": 0.04, + "learning_rate": 4.981127226540354e-05, + "loss": 0.8494, + "step": 39233 + }, + { + "epoch": 0.04, + "learning_rate": 4.98112626319833e-05, + "loss": 1.0666, + "step": 39234 + }, + { + "epoch": 0.04, + "learning_rate": 4.981125299831814e-05, + "loss": 0.7245, + "step": 39235 + }, + { + "epoch": 0.04, + "learning_rate": 4.981124336440805e-05, + "loss": 0.5841, + "step": 39236 + }, + { + "epoch": 0.04, + "learning_rate": 4.981123373025303e-05, + "loss": 0.9119, + "step": 39237 + }, + { + "epoch": 0.04, + "learning_rate": 4.981122409585309e-05, + "loss": 1.1681, + "step": 39238 + }, + { + "epoch": 0.04, + "learning_rate": 4.981121446120821e-05, + "loss": 1.056, + "step": 39239 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811204826318424e-05, + "loss": 0.6005, + "step": 39240 + }, + { + "epoch": 0.04, + "learning_rate": 4.98111951911837e-05, + "loss": 0.665, + "step": 39241 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811185555804054e-05, + "loss": 0.8189, + "step": 39242 + }, + { + "epoch": 0.04, + "learning_rate": 4.981117592017948e-05, + "loss": 0.7773, + "step": 39243 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811166284309985e-05, + "loss": 1.0441, + "step": 39244 + }, + { + "epoch": 0.04, + "learning_rate": 4.981115664819556e-05, + "loss": 0.9727, + "step": 39245 + }, + { + "epoch": 0.04, + "learning_rate": 4.981114701183621e-05, + "loss": 1.28, + "step": 39246 + }, + { + "epoch": 0.04, + "learning_rate": 4.981113737523194e-05, + "loss": 1.3584, + "step": 39247 + }, + { + "epoch": 0.04, + "learning_rate": 4.981112773838274e-05, + "loss": 1.0465, + "step": 39248 + }, + { + "epoch": 0.04, + "learning_rate": 4.981111810128862e-05, + "loss": 0.7743, + "step": 39249 + }, + { + "epoch": 0.04, + "learning_rate": 4.981110846394957e-05, + "loss": 0.7419, + "step": 39250 + }, + { + "epoch": 0.04, + "learning_rate": 4.98110988263656e-05, + "loss": 1.2807, + "step": 39251 + }, + { + "epoch": 0.04, + "learning_rate": 4.98110891885367e-05, + "loss": 1.0047, + "step": 39252 + }, + { + "epoch": 0.04, + "learning_rate": 4.981107955046288e-05, + "loss": 1.0964, + "step": 39253 + }, + { + "epoch": 0.04, + "learning_rate": 4.981106991214412e-05, + "loss": 0.9875, + "step": 39254 + }, + { + "epoch": 0.04, + "learning_rate": 4.981106027358046e-05, + "loss": 0.898, + "step": 39255 + }, + { + "epoch": 0.04, + "learning_rate": 4.981105063477186e-05, + "loss": 0.3289, + "step": 39256 + }, + { + "epoch": 0.04, + "learning_rate": 4.981104099571834e-05, + "loss": 0.8392, + "step": 39257 + }, + { + "epoch": 0.04, + "learning_rate": 4.9811031356419897e-05, + "loss": 0.9833, + "step": 39258 + }, + { + "epoch": 0.04, + "learning_rate": 4.981102171687653e-05, + "loss": 1.3207, + "step": 39259 + }, + { + "epoch": 0.04, + "learning_rate": 4.981101207708824e-05, + "loss": 0.8308, + "step": 39260 + }, + { + "epoch": 0.04, + "learning_rate": 4.981100243705502e-05, + "loss": 0.7786, + "step": 39261 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810992796776876e-05, + "loss": 1.1744, + "step": 39262 + }, + { + "epoch": 0.04, + "learning_rate": 4.981098315625382e-05, + "loss": 0.9696, + "step": 39263 + }, + { + "epoch": 0.04, + "learning_rate": 4.981097351548583e-05, + "loss": 1.0215, + "step": 39264 + }, + { + "epoch": 0.04, + "learning_rate": 4.981096387447292e-05, + "loss": 1.1677, + "step": 39265 + }, + { + "epoch": 0.04, + "learning_rate": 4.981095423321509e-05, + "loss": 0.888, + "step": 39266 + }, + { + "epoch": 0.04, + "learning_rate": 4.981094459171233e-05, + "loss": 0.8307, + "step": 39267 + }, + { + "epoch": 0.04, + "learning_rate": 4.981093494996465e-05, + "loss": 0.7331, + "step": 39268 + }, + { + "epoch": 0.04, + "learning_rate": 4.981092530797205e-05, + "loss": 0.7844, + "step": 39269 + }, + { + "epoch": 0.04, + "learning_rate": 4.981091566573451e-05, + "loss": 0.8766, + "step": 39270 + }, + { + "epoch": 0.04, + "learning_rate": 4.981090602325207e-05, + "loss": 1.0515, + "step": 39271 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810896380524697e-05, + "loss": 1.0564, + "step": 39272 + }, + { + "epoch": 0.04, + "learning_rate": 4.98108867375524e-05, + "loss": 0.6584, + "step": 39273 + }, + { + "epoch": 0.04, + "learning_rate": 4.981087709433518e-05, + "loss": 0.9982, + "step": 39274 + }, + { + "epoch": 0.04, + "learning_rate": 4.981086745087305e-05, + "loss": 1.0005, + "step": 39275 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810857807165976e-05, + "loss": 0.9139, + "step": 39276 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810848163214e-05, + "loss": 0.9254, + "step": 39277 + }, + { + "epoch": 0.04, + "learning_rate": 4.981083851901709e-05, + "loss": 0.8472, + "step": 39278 + }, + { + "epoch": 0.04, + "learning_rate": 4.981082887457526e-05, + "loss": 0.9605, + "step": 39279 + }, + { + "epoch": 0.04, + "learning_rate": 4.981081922988851e-05, + "loss": 1.087, + "step": 39280 + }, + { + "epoch": 0.04, + "learning_rate": 4.981080958495684e-05, + "loss": 0.7958, + "step": 39281 + }, + { + "epoch": 0.04, + "learning_rate": 4.981079993978024e-05, + "loss": 1.1405, + "step": 39282 + }, + { + "epoch": 0.04, + "learning_rate": 4.981079029435872e-05, + "loss": 0.9852, + "step": 39283 + }, + { + "epoch": 0.04, + "learning_rate": 4.981078064869229e-05, + "loss": 1.1685, + "step": 39284 + }, + { + "epoch": 0.04, + "learning_rate": 4.981077100278092e-05, + "loss": 0.9621, + "step": 39285 + }, + { + "epoch": 0.04, + "learning_rate": 4.981076135662465e-05, + "loss": 0.9874, + "step": 39286 + }, + { + "epoch": 0.04, + "learning_rate": 4.981075171022344e-05, + "loss": 1.0804, + "step": 39287 + }, + { + "epoch": 0.04, + "learning_rate": 4.981074206357731e-05, + "loss": 1.2871, + "step": 39288 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810732416686266e-05, + "loss": 0.8644, + "step": 39289 + }, + { + "epoch": 0.04, + "learning_rate": 4.98107227695503e-05, + "loss": 1.0402, + "step": 39290 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810713122169415e-05, + "loss": 1.0247, + "step": 39291 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810703474543604e-05, + "loss": 1.008, + "step": 39292 + }, + { + "epoch": 0.04, + "learning_rate": 4.981069382667288e-05, + "loss": 0.6948, + "step": 39293 + }, + { + "epoch": 0.04, + "learning_rate": 4.981068417855723e-05, + "loss": 1.2287, + "step": 39294 + }, + { + "epoch": 0.04, + "learning_rate": 4.981067453019666e-05, + "loss": 1.4142, + "step": 39295 + }, + { + "epoch": 0.04, + "learning_rate": 4.981066488159117e-05, + "loss": 0.7012, + "step": 39296 + }, + { + "epoch": 0.04, + "learning_rate": 4.981065523274076e-05, + "loss": 0.5182, + "step": 39297 + }, + { + "epoch": 0.04, + "learning_rate": 4.981064558364542e-05, + "loss": 0.9715, + "step": 39298 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810635934305174e-05, + "loss": 0.8123, + "step": 39299 + }, + { + "epoch": 0.04, + "learning_rate": 4.981062628472e-05, + "loss": 0.5163, + "step": 39300 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810616634889904e-05, + "loss": 0.8614, + "step": 39301 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810606984814894e-05, + "loss": 1.0389, + "step": 39302 + }, + { + "epoch": 0.04, + "learning_rate": 4.981059733449496e-05, + "loss": 0.9489, + "step": 39303 + }, + { + "epoch": 0.04, + "learning_rate": 4.981058768393011e-05, + "loss": 0.8648, + "step": 39304 + }, + { + "epoch": 0.04, + "learning_rate": 4.981057803312034e-05, + "loss": 0.6303, + "step": 39305 + }, + { + "epoch": 0.04, + "learning_rate": 4.981056838206565e-05, + "loss": 0.9079, + "step": 39306 + }, + { + "epoch": 0.04, + "learning_rate": 4.981055873076604e-05, + "loss": 1.1502, + "step": 39307 + }, + { + "epoch": 0.04, + "learning_rate": 4.981054907922151e-05, + "loss": 1.0861, + "step": 39308 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810539427432056e-05, + "loss": 1.1633, + "step": 39309 + }, + { + "epoch": 0.04, + "learning_rate": 4.981052977539769e-05, + "loss": 1.0303, + "step": 39310 + }, + { + "epoch": 0.04, + "learning_rate": 4.98105201231184e-05, + "loss": 0.5823, + "step": 39311 + }, + { + "epoch": 0.04, + "learning_rate": 4.981051047059419e-05, + "loss": 1.0341, + "step": 39312 + }, + { + "epoch": 0.04, + "learning_rate": 4.981050081782507e-05, + "loss": 0.9347, + "step": 39313 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810491164811025e-05, + "loss": 0.9275, + "step": 39314 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810481511552065e-05, + "loss": 0.7837, + "step": 39315 + }, + { + "epoch": 0.04, + "learning_rate": 4.981047185804818e-05, + "loss": 1.0472, + "step": 39316 + }, + { + "epoch": 0.04, + "learning_rate": 4.981046220429938e-05, + "loss": 1.1927, + "step": 39317 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810452550305667e-05, + "loss": 0.9653, + "step": 39318 + }, + { + "epoch": 0.04, + "learning_rate": 4.981044289606702e-05, + "loss": 0.8895, + "step": 39319 + }, + { + "epoch": 0.04, + "learning_rate": 4.981043324158347e-05, + "loss": 1.0375, + "step": 39320 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810423586854997e-05, + "loss": 1.1937, + "step": 39321 + }, + { + "epoch": 0.04, + "learning_rate": 4.981041393188161e-05, + "loss": 0.639, + "step": 39322 + }, + { + "epoch": 0.04, + "learning_rate": 4.98104042766633e-05, + "loss": 0.7989, + "step": 39323 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810394621200076e-05, + "loss": 1.1128, + "step": 39324 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810384965491924e-05, + "loss": 0.8364, + "step": 39325 + }, + { + "epoch": 0.04, + "learning_rate": 4.981037530953886e-05, + "loss": 1.1631, + "step": 39326 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810365653340884e-05, + "loss": 0.9776, + "step": 39327 + }, + { + "epoch": 0.04, + "learning_rate": 4.981035599689798e-05, + "loss": 0.8689, + "step": 39328 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810346340210166e-05, + "loss": 1.2353, + "step": 39329 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810336683277435e-05, + "loss": 1.059, + "step": 39330 + }, + { + "epoch": 0.04, + "learning_rate": 4.981032702609979e-05, + "loss": 1.1029, + "step": 39331 + }, + { + "epoch": 0.04, + "learning_rate": 4.981031736867722e-05, + "loss": 1.1117, + "step": 39332 + }, + { + "epoch": 0.04, + "learning_rate": 4.981030771100974e-05, + "loss": 1.1262, + "step": 39333 + }, + { + "epoch": 0.04, + "learning_rate": 4.981029805309734e-05, + "loss": 1.0685, + "step": 39334 + }, + { + "epoch": 0.04, + "learning_rate": 4.981028839494002e-05, + "loss": 1.0004, + "step": 39335 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810278736537783e-05, + "loss": 1.2164, + "step": 39336 + }, + { + "epoch": 0.04, + "learning_rate": 4.981026907789064e-05, + "loss": 1.2626, + "step": 39337 + }, + { + "epoch": 0.04, + "learning_rate": 4.981025941899857e-05, + "loss": 0.957, + "step": 39338 + }, + { + "epoch": 0.04, + "learning_rate": 4.981024975986158e-05, + "loss": 0.8842, + "step": 39339 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810240100479686e-05, + "loss": 1.0255, + "step": 39340 + }, + { + "epoch": 0.04, + "learning_rate": 4.981023044085287e-05, + "loss": 0.9401, + "step": 39341 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810220780981136e-05, + "loss": 1.0058, + "step": 39342 + }, + { + "epoch": 0.04, + "learning_rate": 4.981021112086449e-05, + "loss": 0.8858, + "step": 39343 + }, + { + "epoch": 0.04, + "learning_rate": 4.981020146050293e-05, + "loss": 1.1689, + "step": 39344 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810191799896446e-05, + "loss": 1.9189, + "step": 39345 + }, + { + "epoch": 0.04, + "learning_rate": 4.981018213904505e-05, + "loss": 1.2785, + "step": 39346 + }, + { + "epoch": 0.04, + "learning_rate": 4.981017247794874e-05, + "loss": 0.9513, + "step": 39347 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810162816607506e-05, + "loss": 1.3757, + "step": 39348 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810153155021366e-05, + "loss": 1.2896, + "step": 39349 + }, + { + "epoch": 0.04, + "learning_rate": 4.981014349319031e-05, + "loss": 1.3869, + "step": 39350 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810133831114335e-05, + "loss": 0.5396, + "step": 39351 + }, + { + "epoch": 0.04, + "learning_rate": 4.981012416879345e-05, + "loss": 0.7396, + "step": 39352 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810114506227646e-05, + "loss": 1.0403, + "step": 39353 + }, + { + "epoch": 0.04, + "learning_rate": 4.981010484341693e-05, + "loss": 1.081, + "step": 39354 + }, + { + "epoch": 0.04, + "learning_rate": 4.981009518036129e-05, + "loss": 1.1007, + "step": 39355 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810085517060744e-05, + "loss": 1.1455, + "step": 39356 + }, + { + "epoch": 0.04, + "learning_rate": 4.981007585351528e-05, + "loss": 0.8713, + "step": 39357 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810066189724904e-05, + "loss": 1.1501, + "step": 39358 + }, + { + "epoch": 0.04, + "learning_rate": 4.981005652568961e-05, + "loss": 1.0876, + "step": 39359 + }, + { + "epoch": 0.04, + "learning_rate": 4.981004686140941e-05, + "loss": 0.9608, + "step": 39360 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810037196884284e-05, + "loss": 0.7409, + "step": 39361 + }, + { + "epoch": 0.04, + "learning_rate": 4.981002753211425e-05, + "loss": 1.0544, + "step": 39362 + }, + { + "epoch": 0.04, + "learning_rate": 4.98100178670993e-05, + "loss": 1.3174, + "step": 39363 + }, + { + "epoch": 0.04, + "learning_rate": 4.981000820183944e-05, + "loss": 1.0024, + "step": 39364 + }, + { + "epoch": 0.04, + "learning_rate": 4.980999853633466e-05, + "loss": 0.6008, + "step": 39365 + }, + { + "epoch": 0.04, + "learning_rate": 4.980998887058497e-05, + "loss": 0.6251, + "step": 39366 + }, + { + "epoch": 0.04, + "learning_rate": 4.980997920459036e-05, + "loss": 1.1262, + "step": 39367 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809969538350846e-05, + "loss": 0.6068, + "step": 39368 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809959871866415e-05, + "loss": 0.8365, + "step": 39369 + }, + { + "epoch": 0.04, + "learning_rate": 4.980995020513707e-05, + "loss": 1.0226, + "step": 39370 + }, + { + "epoch": 0.04, + "learning_rate": 4.980994053816281e-05, + "loss": 1.3298, + "step": 39371 + }, + { + "epoch": 0.04, + "learning_rate": 4.980993087094364e-05, + "loss": 0.3622, + "step": 39372 + }, + { + "epoch": 0.04, + "learning_rate": 4.980992120347956e-05, + "loss": 0.8239, + "step": 39373 + }, + { + "epoch": 0.04, + "learning_rate": 4.980991153577056e-05, + "loss": 1.1508, + "step": 39374 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809901867816644e-05, + "loss": 1.1408, + "step": 39375 + }, + { + "epoch": 0.04, + "learning_rate": 4.980989219961782e-05, + "loss": 1.3383, + "step": 39376 + }, + { + "epoch": 0.04, + "learning_rate": 4.980988253117408e-05, + "loss": 1.0905, + "step": 39377 + }, + { + "epoch": 0.04, + "learning_rate": 4.980987286248543e-05, + "loss": 1.3733, + "step": 39378 + }, + { + "epoch": 0.04, + "learning_rate": 4.980986319355187e-05, + "loss": 0.7359, + "step": 39379 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809853524373395e-05, + "loss": 1.2044, + "step": 39380 + }, + { + "epoch": 0.04, + "learning_rate": 4.980984385495001e-05, + "loss": 0.9881, + "step": 39381 + }, + { + "epoch": 0.04, + "learning_rate": 4.980983418528171e-05, + "loss": 1.1748, + "step": 39382 + }, + { + "epoch": 0.04, + "learning_rate": 4.98098245153685e-05, + "loss": 1.2154, + "step": 39383 + }, + { + "epoch": 0.04, + "learning_rate": 4.980981484521037e-05, + "loss": 0.9783, + "step": 39384 + }, + { + "epoch": 0.04, + "learning_rate": 4.980980517480733e-05, + "loss": 0.5089, + "step": 39385 + }, + { + "epoch": 0.04, + "learning_rate": 4.980979550415939e-05, + "loss": 1.018, + "step": 39386 + }, + { + "epoch": 0.04, + "learning_rate": 4.980978583326653e-05, + "loss": 0.919, + "step": 39387 + }, + { + "epoch": 0.04, + "learning_rate": 4.980977616212876e-05, + "loss": 1.1514, + "step": 39388 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809766490746074e-05, + "loss": 1.0756, + "step": 39389 + }, + { + "epoch": 0.04, + "learning_rate": 4.980975681911848e-05, + "loss": 1.3075, + "step": 39390 + }, + { + "epoch": 0.04, + "learning_rate": 4.980974714724598e-05, + "loss": 1.3014, + "step": 39391 + }, + { + "epoch": 0.04, + "learning_rate": 4.980973747512856e-05, + "loss": 1.1239, + "step": 39392 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809727802766225e-05, + "loss": 1.6621, + "step": 39393 + }, + { + "epoch": 0.04, + "learning_rate": 4.980971813015899e-05, + "loss": 1.6739, + "step": 39394 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809708457306834e-05, + "loss": 1.6172, + "step": 39395 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809698784209774e-05, + "loss": 1.6647, + "step": 39396 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809689110867805e-05, + "loss": 1.7539, + "step": 39397 + }, + { + "epoch": 0.04, + "learning_rate": 4.980967943728092e-05, + "loss": 1.6515, + "step": 39398 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809669763449126e-05, + "loss": 1.5129, + "step": 39399 + }, + { + "epoch": 0.04, + "learning_rate": 4.980966008937242e-05, + "loss": 1.6126, + "step": 39400 + }, + { + "epoch": 0.04, + "learning_rate": 4.98096504150508e-05, + "loss": 1.5243, + "step": 39401 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809640740484274e-05, + "loss": 1.4508, + "step": 39402 + }, + { + "epoch": 0.04, + "learning_rate": 4.980963106567284e-05, + "loss": 1.6949, + "step": 39403 + }, + { + "epoch": 0.04, + "learning_rate": 4.980962139061649e-05, + "loss": 1.5922, + "step": 39404 + }, + { + "epoch": 0.04, + "learning_rate": 4.980961171531524e-05, + "loss": 1.3797, + "step": 39405 + }, + { + "epoch": 0.04, + "learning_rate": 4.980960203976907e-05, + "loss": 1.6222, + "step": 39406 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809592363977996e-05, + "loss": 1.6444, + "step": 39407 + }, + { + "epoch": 0.04, + "learning_rate": 4.980958268794201e-05, + "loss": 1.4398, + "step": 39408 + }, + { + "epoch": 0.04, + "learning_rate": 4.980957301166111e-05, + "loss": 1.5139, + "step": 39409 + }, + { + "epoch": 0.04, + "learning_rate": 4.98095633351353e-05, + "loss": 1.518, + "step": 39410 + }, + { + "epoch": 0.04, + "learning_rate": 4.980955365836459e-05, + "loss": 1.6385, + "step": 39411 + }, + { + "epoch": 0.04, + "learning_rate": 4.980954398134896e-05, + "loss": 1.43, + "step": 39412 + }, + { + "epoch": 0.04, + "learning_rate": 4.980953430408843e-05, + "loss": 1.2932, + "step": 39413 + }, + { + "epoch": 0.04, + "learning_rate": 4.980952462658299e-05, + "loss": 1.6308, + "step": 39414 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809514948832636e-05, + "loss": 1.4673, + "step": 39415 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809505270837374e-05, + "loss": 0.9639, + "step": 39416 + }, + { + "epoch": 0.04, + "learning_rate": 4.98094955925972e-05, + "loss": 0.4077, + "step": 39417 + }, + { + "epoch": 0.04, + "learning_rate": 4.980948591411212e-05, + "loss": 0.2474, + "step": 39418 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809476235382134e-05, + "loss": 0.252, + "step": 39419 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809466556407234e-05, + "loss": 0.2813, + "step": 39420 + }, + { + "epoch": 0.04, + "learning_rate": 4.980945687718743e-05, + "loss": 0.2933, + "step": 39421 + }, + { + "epoch": 0.04, + "learning_rate": 4.980944719772271e-05, + "loss": 0.2783, + "step": 39422 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809437518013095e-05, + "loss": 0.8586, + "step": 39423 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809427838058564e-05, + "loss": 1.3121, + "step": 39424 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809418157859125e-05, + "loss": 1.9334, + "step": 39425 + }, + { + "epoch": 0.04, + "learning_rate": 4.980940847741478e-05, + "loss": 1.9649, + "step": 39426 + }, + { + "epoch": 0.04, + "learning_rate": 4.980939879672552e-05, + "loss": 1.8812, + "step": 39427 + }, + { + "epoch": 0.04, + "learning_rate": 4.980938911579136e-05, + "loss": 1.8704, + "step": 39428 + }, + { + "epoch": 0.04, + "learning_rate": 4.980937943461229e-05, + "loss": 1.868, + "step": 39429 + }, + { + "epoch": 0.04, + "learning_rate": 4.98093697531883e-05, + "loss": 1.8436, + "step": 39430 + }, + { + "epoch": 0.04, + "learning_rate": 4.980936007151942e-05, + "loss": 1.8716, + "step": 39431 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809350389605626e-05, + "loss": 1.9028, + "step": 39432 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809340707446926e-05, + "loss": 1.8769, + "step": 39433 + }, + { + "epoch": 0.04, + "learning_rate": 4.980933102504332e-05, + "loss": 1.8136, + "step": 39434 + }, + { + "epoch": 0.04, + "learning_rate": 4.98093213423948e-05, + "loss": 1.8193, + "step": 39435 + }, + { + "epoch": 0.04, + "learning_rate": 4.980931165950138e-05, + "loss": 1.8495, + "step": 39436 + }, + { + "epoch": 0.04, + "learning_rate": 4.980930197636304e-05, + "loss": 1.7969, + "step": 39437 + }, + { + "epoch": 0.04, + "learning_rate": 4.980929229297981e-05, + "loss": 1.816, + "step": 39438 + }, + { + "epoch": 0.04, + "learning_rate": 4.980928260935166e-05, + "loss": 1.5572, + "step": 39439 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809272925478614e-05, + "loss": 1.0006, + "step": 39440 + }, + { + "epoch": 0.04, + "learning_rate": 4.980926324136066e-05, + "loss": 0.8301, + "step": 39441 + }, + { + "epoch": 0.04, + "learning_rate": 4.980925355699779e-05, + "loss": 0.7767, + "step": 39442 + }, + { + "epoch": 0.04, + "learning_rate": 4.980924387239002e-05, + "loss": 0.9847, + "step": 39443 + }, + { + "epoch": 0.04, + "learning_rate": 4.980923418753735e-05, + "loss": 1.017, + "step": 39444 + }, + { + "epoch": 0.04, + "learning_rate": 4.980922450243976e-05, + "loss": 0.9332, + "step": 39445 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809214817097274e-05, + "loss": 1.0278, + "step": 39446 + }, + { + "epoch": 0.04, + "learning_rate": 4.980920513150987e-05, + "loss": 1.0741, + "step": 39447 + }, + { + "epoch": 0.04, + "learning_rate": 4.980919544567757e-05, + "loss": 0.829, + "step": 39448 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809185759600366e-05, + "loss": 1.1072, + "step": 39449 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809176073278255e-05, + "loss": 1.1097, + "step": 39450 + }, + { + "epoch": 0.04, + "learning_rate": 4.980916638671123e-05, + "loss": 0.7462, + "step": 39451 + }, + { + "epoch": 0.04, + "learning_rate": 4.980915669989931e-05, + "loss": 1.1392, + "step": 39452 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809147012842475e-05, + "loss": 1.2443, + "step": 39453 + }, + { + "epoch": 0.04, + "learning_rate": 4.980913732554075e-05, + "loss": 0.981, + "step": 39454 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809127637994104e-05, + "loss": 1.0561, + "step": 39455 + }, + { + "epoch": 0.04, + "learning_rate": 4.980911795020255e-05, + "loss": 0.3015, + "step": 39456 + }, + { + "epoch": 0.04, + "learning_rate": 4.98091082621661e-05, + "loss": 0.5833, + "step": 39457 + }, + { + "epoch": 0.04, + "learning_rate": 4.980909857388474e-05, + "loss": 0.7208, + "step": 39458 + }, + { + "epoch": 0.04, + "learning_rate": 4.980908888535848e-05, + "loss": 0.7793, + "step": 39459 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809079196587314e-05, + "loss": 0.9635, + "step": 39460 + }, + { + "epoch": 0.04, + "learning_rate": 4.980906950757125e-05, + "loss": 1.03, + "step": 39461 + }, + { + "epoch": 0.04, + "learning_rate": 4.980905981831027e-05, + "loss": 1.021, + "step": 39462 + }, + { + "epoch": 0.04, + "learning_rate": 4.980905012880439e-05, + "loss": 1.1803, + "step": 39463 + }, + { + "epoch": 0.04, + "learning_rate": 4.98090404390536e-05, + "loss": 1.2164, + "step": 39464 + }, + { + "epoch": 0.04, + "learning_rate": 4.980903074905792e-05, + "loss": 1.0066, + "step": 39465 + }, + { + "epoch": 0.04, + "learning_rate": 4.980902105881732e-05, + "loss": 0.9976, + "step": 39466 + }, + { + "epoch": 0.04, + "learning_rate": 4.9809011368331824e-05, + "loss": 1.0251, + "step": 39467 + }, + { + "epoch": 0.04, + "learning_rate": 4.980900167760142e-05, + "loss": 0.9697, + "step": 39468 + }, + { + "epoch": 0.04, + "learning_rate": 4.980899198662611e-05, + "loss": 0.9595, + "step": 39469 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808982295405906e-05, + "loss": 0.7478, + "step": 39470 + }, + { + "epoch": 0.04, + "learning_rate": 4.980897260394079e-05, + "loss": 1.0769, + "step": 39471 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808962912230774e-05, + "loss": 1.0043, + "step": 39472 + }, + { + "epoch": 0.04, + "learning_rate": 4.980895322027585e-05, + "loss": 0.9089, + "step": 39473 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808943528076024e-05, + "loss": 0.966, + "step": 39474 + }, + { + "epoch": 0.04, + "learning_rate": 4.98089338356313e-05, + "loss": 1.0338, + "step": 39475 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808924142941664e-05, + "loss": 0.7414, + "step": 39476 + }, + { + "epoch": 0.04, + "learning_rate": 4.980891445000713e-05, + "loss": 1.0217, + "step": 39477 + }, + { + "epoch": 0.04, + "learning_rate": 4.980890475682769e-05, + "loss": 0.4078, + "step": 39478 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808895063403347e-05, + "loss": 0.2495, + "step": 39479 + }, + { + "epoch": 0.04, + "learning_rate": 4.980888536973411e-05, + "loss": 0.2139, + "step": 39480 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808875675819964e-05, + "loss": 0.2069, + "step": 39481 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808865981660914e-05, + "loss": 0.1812, + "step": 39482 + }, + { + "epoch": 0.04, + "learning_rate": 4.980885628725696e-05, + "loss": 0.2064, + "step": 39483 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808846592608105e-05, + "loss": 0.2731, + "step": 39484 + }, + { + "epoch": 0.04, + "learning_rate": 4.980883689771435e-05, + "loss": 0.3351, + "step": 39485 + }, + { + "epoch": 0.04, + "learning_rate": 4.980882720257569e-05, + "loss": 0.857, + "step": 39486 + }, + { + "epoch": 0.04, + "learning_rate": 4.980881750719213e-05, + "loss": 1.1341, + "step": 39487 + }, + { + "epoch": 0.04, + "learning_rate": 4.980880781156366e-05, + "loss": 0.5107, + "step": 39488 + }, + { + "epoch": 0.04, + "learning_rate": 4.98087981156903e-05, + "loss": 1.362, + "step": 39489 + }, + { + "epoch": 0.04, + "learning_rate": 4.980878841957203e-05, + "loss": 1.2048, + "step": 39490 + }, + { + "epoch": 0.04, + "learning_rate": 4.980877872320886e-05, + "loss": 1.0569, + "step": 39491 + }, + { + "epoch": 0.04, + "learning_rate": 4.980876902660079e-05, + "loss": 1.1194, + "step": 39492 + }, + { + "epoch": 0.04, + "learning_rate": 4.980875932974781e-05, + "loss": 0.8547, + "step": 39493 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808749632649935e-05, + "loss": 1.3534, + "step": 39494 + }, + { + "epoch": 0.04, + "learning_rate": 4.980873993530716e-05, + "loss": 1.2066, + "step": 39495 + }, + { + "epoch": 0.04, + "learning_rate": 4.980873023771948e-05, + "loss": 1.0963, + "step": 39496 + }, + { + "epoch": 0.04, + "learning_rate": 4.98087205398869e-05, + "loss": 0.9997, + "step": 39497 + }, + { + "epoch": 0.04, + "learning_rate": 4.980871084180943e-05, + "loss": 0.9457, + "step": 39498 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808701143487035e-05, + "loss": 0.8345, + "step": 39499 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808691444919755e-05, + "loss": 1.3071, + "step": 39500 + }, + { + "epoch": 0.04, + "eval_loss": 1.0904228687286377, + "eval_runtime": 127.8411, + "eval_samples_per_second": 10.834, + "eval_steps_per_second": 5.421, + "step": 39500 + }, + { + "epoch": 0.04, + "learning_rate": 4.980868174610757e-05, + "loss": 1.5089, + "step": 39501 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808672047050484e-05, + "loss": 1.3452, + "step": 39502 + }, + { + "epoch": 0.04, + "learning_rate": 4.98086623477485e-05, + "loss": 1.1372, + "step": 39503 + }, + { + "epoch": 0.04, + "learning_rate": 4.980865264820162e-05, + "loss": 0.6489, + "step": 39504 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808642948409826e-05, + "loss": 0.4958, + "step": 39505 + }, + { + "epoch": 0.04, + "learning_rate": 4.980863324837314e-05, + "loss": 1.0723, + "step": 39506 + }, + { + "epoch": 0.04, + "learning_rate": 4.980862354809155e-05, + "loss": 0.6679, + "step": 39507 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808613847565065e-05, + "loss": 0.4005, + "step": 39508 + }, + { + "epoch": 0.04, + "learning_rate": 4.980860414679368e-05, + "loss": 0.7643, + "step": 39509 + }, + { + "epoch": 0.04, + "learning_rate": 4.980859444577739e-05, + "loss": 1.1303, + "step": 39510 + }, + { + "epoch": 0.04, + "learning_rate": 4.98085847445162e-05, + "loss": 0.9753, + "step": 39511 + }, + { + "epoch": 0.04, + "learning_rate": 4.980857504301011e-05, + "loss": 0.8158, + "step": 39512 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808565341259126e-05, + "loss": 1.0136, + "step": 39513 + }, + { + "epoch": 0.04, + "learning_rate": 4.980855563926324e-05, + "loss": 0.9954, + "step": 39514 + }, + { + "epoch": 0.04, + "learning_rate": 4.980854593702245e-05, + "loss": 0.8018, + "step": 39515 + }, + { + "epoch": 0.04, + "learning_rate": 4.980853623453676e-05, + "loss": 1.17, + "step": 39516 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808526531806174e-05, + "loss": 1.0952, + "step": 39517 + }, + { + "epoch": 0.04, + "learning_rate": 4.980851682883069e-05, + "loss": 0.8008, + "step": 39518 + }, + { + "epoch": 0.04, + "learning_rate": 4.980850712561031e-05, + "loss": 0.8252, + "step": 39519 + }, + { + "epoch": 0.04, + "learning_rate": 4.980849742214503e-05, + "loss": 1.1353, + "step": 39520 + }, + { + "epoch": 0.04, + "learning_rate": 4.980848771843484e-05, + "loss": 1.002, + "step": 39521 + }, + { + "epoch": 0.04, + "learning_rate": 4.980847801447976e-05, + "loss": 0.9392, + "step": 39522 + }, + { + "epoch": 0.04, + "learning_rate": 4.980846831027978e-05, + "loss": 1.0779, + "step": 39523 + }, + { + "epoch": 0.04, + "learning_rate": 4.98084586058349e-05, + "loss": 0.9561, + "step": 39524 + }, + { + "epoch": 0.04, + "learning_rate": 4.980844890114512e-05, + "loss": 1.0676, + "step": 39525 + }, + { + "epoch": 0.04, + "learning_rate": 4.980843919621044e-05, + "loss": 1.288, + "step": 39526 + }, + { + "epoch": 0.04, + "learning_rate": 4.980842949103087e-05, + "loss": 0.4913, + "step": 39527 + }, + { + "epoch": 0.04, + "learning_rate": 4.98084197856064e-05, + "loss": 0.4486, + "step": 39528 + }, + { + "epoch": 0.04, + "learning_rate": 4.980841007993703e-05, + "loss": 0.3775, + "step": 39529 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808400374022755e-05, + "loss": 0.4906, + "step": 39530 + }, + { + "epoch": 0.04, + "learning_rate": 4.980839066786359e-05, + "loss": 0.5442, + "step": 39531 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808380961459524e-05, + "loss": 0.7615, + "step": 39532 + }, + { + "epoch": 0.04, + "learning_rate": 4.980837125481056e-05, + "loss": 0.9001, + "step": 39533 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808361547916695e-05, + "loss": 0.9437, + "step": 39534 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808351840777936e-05, + "loss": 1.1139, + "step": 39535 + }, + { + "epoch": 0.04, + "learning_rate": 4.980834213339428e-05, + "loss": 1.1029, + "step": 39536 + }, + { + "epoch": 0.04, + "learning_rate": 4.980833242576572e-05, + "loss": 1.0275, + "step": 39537 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808322717892274e-05, + "loss": 0.7664, + "step": 39538 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808313009773924e-05, + "loss": 0.9369, + "step": 39539 + }, + { + "epoch": 0.04, + "learning_rate": 4.980830330141068e-05, + "loss": 1.2068, + "step": 39540 + }, + { + "epoch": 0.04, + "learning_rate": 4.980829359280254e-05, + "loss": 0.9247, + "step": 39541 + }, + { + "epoch": 0.04, + "learning_rate": 4.98082838839495e-05, + "loss": 0.8073, + "step": 39542 + }, + { + "epoch": 0.04, + "learning_rate": 4.980827417485156e-05, + "loss": 0.9949, + "step": 39543 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808264465508725e-05, + "loss": 1.4867, + "step": 39544 + }, + { + "epoch": 0.04, + "learning_rate": 4.980825475592099e-05, + "loss": 0.954, + "step": 39545 + }, + { + "epoch": 0.04, + "learning_rate": 4.980824504608836e-05, + "loss": 1.0732, + "step": 39546 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808235336010846e-05, + "loss": 0.7687, + "step": 39547 + }, + { + "epoch": 0.04, + "learning_rate": 4.980822562568842e-05, + "loss": 1.0208, + "step": 39548 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808215915121104e-05, + "loss": 0.8852, + "step": 39549 + }, + { + "epoch": 0.04, + "learning_rate": 4.980820620430889e-05, + "loss": 0.6995, + "step": 39550 + }, + { + "epoch": 0.04, + "learning_rate": 4.980819649325178e-05, + "loss": 1.1055, + "step": 39551 + }, + { + "epoch": 0.04, + "learning_rate": 4.980818678194978e-05, + "loss": 0.8915, + "step": 39552 + }, + { + "epoch": 0.04, + "learning_rate": 4.980817707040287e-05, + "loss": 1.1701, + "step": 39553 + }, + { + "epoch": 0.04, + "learning_rate": 4.980816735861108e-05, + "loss": 0.9991, + "step": 39554 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808157646574385e-05, + "loss": 0.5249, + "step": 39555 + }, + { + "epoch": 0.04, + "learning_rate": 4.98081479342928e-05, + "loss": 0.7577, + "step": 39556 + }, + { + "epoch": 0.04, + "learning_rate": 4.980813822176631e-05, + "loss": 1.1473, + "step": 39557 + }, + { + "epoch": 0.04, + "learning_rate": 4.980812850899493e-05, + "loss": 0.5606, + "step": 39558 + }, + { + "epoch": 0.04, + "learning_rate": 4.980811879597866e-05, + "loss": 0.9582, + "step": 39559 + }, + { + "epoch": 0.04, + "learning_rate": 4.980810908271749e-05, + "loss": 1.4524, + "step": 39560 + }, + { + "epoch": 0.04, + "learning_rate": 4.980809936921142e-05, + "loss": 1.0736, + "step": 39561 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808089655460466e-05, + "loss": 0.8222, + "step": 39562 + }, + { + "epoch": 0.04, + "learning_rate": 4.980807994146461e-05, + "loss": 1.1277, + "step": 39563 + }, + { + "epoch": 0.04, + "learning_rate": 4.980807022722386e-05, + "loss": 0.8421, + "step": 39564 + }, + { + "epoch": 0.04, + "learning_rate": 4.980806051273821e-05, + "loss": 0.7651, + "step": 39565 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808050798007675e-05, + "loss": 0.9937, + "step": 39566 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808041083032235e-05, + "loss": 1.1369, + "step": 39567 + }, + { + "epoch": 0.04, + "learning_rate": 4.980803136781191e-05, + "loss": 0.8724, + "step": 39568 + }, + { + "epoch": 0.04, + "learning_rate": 4.980802165234668e-05, + "loss": 1.1304, + "step": 39569 + }, + { + "epoch": 0.04, + "learning_rate": 4.980801193663657e-05, + "loss": 1.1183, + "step": 39570 + }, + { + "epoch": 0.04, + "learning_rate": 4.9808002220681546e-05, + "loss": 0.9634, + "step": 39571 + }, + { + "epoch": 0.04, + "learning_rate": 4.980799250448165e-05, + "loss": 1.3035, + "step": 39572 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807982788036844e-05, + "loss": 0.8984, + "step": 39573 + }, + { + "epoch": 0.04, + "learning_rate": 4.980797307134715e-05, + "loss": 1.0317, + "step": 39574 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807963354412565e-05, + "loss": 0.9827, + "step": 39575 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807953637233084e-05, + "loss": 1.1055, + "step": 39576 + }, + { + "epoch": 0.04, + "learning_rate": 4.980794391980871e-05, + "loss": 0.8893, + "step": 39577 + }, + { + "epoch": 0.04, + "learning_rate": 4.980793420213944e-05, + "loss": 0.8415, + "step": 39578 + }, + { + "epoch": 0.04, + "learning_rate": 4.980792448422528e-05, + "loss": 1.0539, + "step": 39579 + }, + { + "epoch": 0.04, + "learning_rate": 4.980791476606622e-05, + "loss": 0.9605, + "step": 39580 + }, + { + "epoch": 0.04, + "learning_rate": 4.980790504766227e-05, + "loss": 0.1362, + "step": 39581 + }, + { + "epoch": 0.04, + "learning_rate": 4.980789532901343e-05, + "loss": 0.1407, + "step": 39582 + }, + { + "epoch": 0.04, + "learning_rate": 4.980788561011969e-05, + "loss": 0.0693, + "step": 39583 + }, + { + "epoch": 0.04, + "learning_rate": 4.980787589098106e-05, + "loss": 0.1408, + "step": 39584 + }, + { + "epoch": 0.04, + "learning_rate": 4.980786617159754e-05, + "loss": 0.1129, + "step": 39585 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807856451969124e-05, + "loss": 0.0893, + "step": 39586 + }, + { + "epoch": 0.04, + "learning_rate": 4.980784673209582e-05, + "loss": 0.0869, + "step": 39587 + }, + { + "epoch": 0.04, + "learning_rate": 4.980783701197762e-05, + "loss": 0.14, + "step": 39588 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807827291614526e-05, + "loss": 0.17, + "step": 39589 + }, + { + "epoch": 0.04, + "learning_rate": 4.980781757100654e-05, + "loss": 0.0472, + "step": 39590 + }, + { + "epoch": 0.04, + "learning_rate": 4.980780785015367e-05, + "loss": 0.1104, + "step": 39591 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807798129055894e-05, + "loss": 0.0283, + "step": 39592 + }, + { + "epoch": 0.04, + "learning_rate": 4.980778840771323e-05, + "loss": 0.1044, + "step": 39593 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807778686125684e-05, + "loss": 0.1385, + "step": 39594 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807768964293234e-05, + "loss": 0.1543, + "step": 39595 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807759242215904e-05, + "loss": 0.999, + "step": 39596 + }, + { + "epoch": 0.04, + "learning_rate": 4.980774951989367e-05, + "loss": 1.1491, + "step": 39597 + }, + { + "epoch": 0.04, + "learning_rate": 4.980773979732655e-05, + "loss": 1.1371, + "step": 39598 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807730074514535e-05, + "loss": 1.0269, + "step": 39599 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807720351457635e-05, + "loss": 0.9628, + "step": 39600 + }, + { + "epoch": 0.04, + "learning_rate": 4.980771062815584e-05, + "loss": 0.7832, + "step": 39601 + }, + { + "epoch": 0.04, + "learning_rate": 4.980770090460916e-05, + "loss": 0.6424, + "step": 39602 + }, + { + "epoch": 0.04, + "learning_rate": 4.980769118081757e-05, + "loss": 0.5132, + "step": 39603 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807681456781106e-05, + "loss": 0.7342, + "step": 39604 + }, + { + "epoch": 0.04, + "learning_rate": 4.980767173249975e-05, + "loss": 0.8218, + "step": 39605 + }, + { + "epoch": 0.04, + "learning_rate": 4.98076620079735e-05, + "loss": 1.2693, + "step": 39606 + }, + { + "epoch": 0.04, + "learning_rate": 4.980765228320235e-05, + "loss": 1.2889, + "step": 39607 + }, + { + "epoch": 0.04, + "learning_rate": 4.980764255818633e-05, + "loss": 1.0838, + "step": 39608 + }, + { + "epoch": 0.04, + "learning_rate": 4.98076328329254e-05, + "loss": 1.0751, + "step": 39609 + }, + { + "epoch": 0.04, + "learning_rate": 4.980762310741959e-05, + "loss": 1.128, + "step": 39610 + }, + { + "epoch": 0.04, + "learning_rate": 4.980761338166889e-05, + "loss": 1.0213, + "step": 39611 + }, + { + "epoch": 0.04, + "learning_rate": 4.98076036556733e-05, + "loss": 0.9926, + "step": 39612 + }, + { + "epoch": 0.04, + "learning_rate": 4.980759392943281e-05, + "loss": 0.999, + "step": 39613 + }, + { + "epoch": 0.04, + "learning_rate": 4.980758420294743e-05, + "loss": 0.9834, + "step": 39614 + }, + { + "epoch": 0.04, + "learning_rate": 4.980757447621717e-05, + "loss": 1.4501, + "step": 39615 + }, + { + "epoch": 0.04, + "learning_rate": 4.980756474924202e-05, + "loss": 1.042, + "step": 39616 + }, + { + "epoch": 0.04, + "learning_rate": 4.980755502202198e-05, + "loss": 1.2663, + "step": 39617 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807545294557046e-05, + "loss": 1.1256, + "step": 39618 + }, + { + "epoch": 0.04, + "learning_rate": 4.980753556684722e-05, + "loss": 1.1558, + "step": 39619 + }, + { + "epoch": 0.04, + "learning_rate": 4.980752583889251e-05, + "loss": 1.3595, + "step": 39620 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807516110692906e-05, + "loss": 1.0893, + "step": 39621 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807506382248415e-05, + "loss": 1.1788, + "step": 39622 + }, + { + "epoch": 0.04, + "learning_rate": 4.980749665355903e-05, + "loss": 0.8338, + "step": 39623 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807486924624766e-05, + "loss": 1.426, + "step": 39624 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807477195445606e-05, + "loss": 1.119, + "step": 39625 + }, + { + "epoch": 0.04, + "learning_rate": 4.980746746602156e-05, + "loss": 0.8227, + "step": 39626 + }, + { + "epoch": 0.04, + "learning_rate": 4.980745773635262e-05, + "loss": 1.3086, + "step": 39627 + }, + { + "epoch": 0.04, + "learning_rate": 4.98074480064388e-05, + "loss": 1.2431, + "step": 39628 + }, + { + "epoch": 0.04, + "learning_rate": 4.980743827628008e-05, + "loss": 1.1512, + "step": 39629 + }, + { + "epoch": 0.04, + "learning_rate": 4.980742854587648e-05, + "loss": 1.4715, + "step": 39630 + }, + { + "epoch": 0.04, + "learning_rate": 4.980741881522799e-05, + "loss": 1.0971, + "step": 39631 + }, + { + "epoch": 0.04, + "learning_rate": 4.980740908433461e-05, + "loss": 0.869, + "step": 39632 + }, + { + "epoch": 0.04, + "learning_rate": 4.980739935319634e-05, + "loss": 1.0467, + "step": 39633 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807389621813185e-05, + "loss": 0.7941, + "step": 39634 + }, + { + "epoch": 0.04, + "learning_rate": 4.980737989018514e-05, + "loss": 0.8166, + "step": 39635 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807370158312204e-05, + "loss": 1.2776, + "step": 39636 + }, + { + "epoch": 0.04, + "learning_rate": 4.980736042619439e-05, + "loss": 1.0115, + "step": 39637 + }, + { + "epoch": 0.04, + "learning_rate": 4.980735069383168e-05, + "loss": 1.2009, + "step": 39638 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807340961224084e-05, + "loss": 1.1516, + "step": 39639 + }, + { + "epoch": 0.04, + "learning_rate": 4.98073312283716e-05, + "loss": 0.9791, + "step": 39640 + }, + { + "epoch": 0.04, + "learning_rate": 4.980732149527423e-05, + "loss": 1.0081, + "step": 39641 + }, + { + "epoch": 0.04, + "learning_rate": 4.980731176193197e-05, + "loss": 0.9408, + "step": 39642 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807302028344825e-05, + "loss": 0.8656, + "step": 39643 + }, + { + "epoch": 0.04, + "learning_rate": 4.98072922945128e-05, + "loss": 1.0545, + "step": 39644 + }, + { + "epoch": 0.04, + "learning_rate": 4.980728256043588e-05, + "loss": 0.9576, + "step": 39645 + }, + { + "epoch": 0.04, + "learning_rate": 4.980727282611407e-05, + "loss": 0.7662, + "step": 39646 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807263091547374e-05, + "loss": 1.1132, + "step": 39647 + }, + { + "epoch": 0.04, + "learning_rate": 4.98072533567358e-05, + "loss": 0.9596, + "step": 39648 + }, + { + "epoch": 0.04, + "learning_rate": 4.980724362167933e-05, + "loss": 0.8194, + "step": 39649 + }, + { + "epoch": 0.04, + "learning_rate": 4.980723388637798e-05, + "loss": 0.945, + "step": 39650 + }, + { + "epoch": 0.04, + "learning_rate": 4.980722415083173e-05, + "loss": 1.2175, + "step": 39651 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807214415040606e-05, + "loss": 1.0356, + "step": 39652 + }, + { + "epoch": 0.04, + "learning_rate": 4.980720467900459e-05, + "loss": 1.229, + "step": 39653 + }, + { + "epoch": 0.04, + "learning_rate": 4.980719494272369e-05, + "loss": 0.7093, + "step": 39654 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807185206197905e-05, + "loss": 1.053, + "step": 39655 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807175469427236e-05, + "loss": 0.8506, + "step": 39656 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807165732411674e-05, + "loss": 1.2515, + "step": 39657 + }, + { + "epoch": 0.04, + "learning_rate": 4.980715599515123e-05, + "loss": 1.3079, + "step": 39658 + }, + { + "epoch": 0.04, + "learning_rate": 4.980714625764591e-05, + "loss": 1.1382, + "step": 39659 + }, + { + "epoch": 0.04, + "learning_rate": 4.980713651989569e-05, + "loss": 1.0446, + "step": 39660 + }, + { + "epoch": 0.04, + "learning_rate": 4.980712678190059e-05, + "loss": 0.8599, + "step": 39661 + }, + { + "epoch": 0.04, + "learning_rate": 4.98071170436606e-05, + "loss": 0.8707, + "step": 39662 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807107305175724e-05, + "loss": 0.9389, + "step": 39663 + }, + { + "epoch": 0.04, + "learning_rate": 4.980709756644597e-05, + "loss": 0.9083, + "step": 39664 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807087827471335e-05, + "loss": 1.1268, + "step": 39665 + }, + { + "epoch": 0.04, + "learning_rate": 4.98070780882518e-05, + "loss": 1.0101, + "step": 39666 + }, + { + "epoch": 0.04, + "learning_rate": 4.980706834878739e-05, + "loss": 0.7073, + "step": 39667 + }, + { + "epoch": 0.04, + "learning_rate": 4.9807058609078094e-05, + "loss": 1.0927, + "step": 39668 + }, + { + "epoch": 0.04, + "learning_rate": 4.980704886912391e-05, + "loss": 1.3372, + "step": 39669 + }, + { + "epoch": 0.04, + "learning_rate": 4.980703912892485e-05, + "loss": 1.4678, + "step": 39670 + }, + { + "epoch": 0.04, + "learning_rate": 4.98070293884809e-05, + "loss": 0.952, + "step": 39671 + }, + { + "epoch": 0.04, + "learning_rate": 4.980701964779206e-05, + "loss": 1.1444, + "step": 39672 + }, + { + "epoch": 0.04, + "learning_rate": 4.980700990685834e-05, + "loss": 0.7154, + "step": 39673 + }, + { + "epoch": 0.04, + "learning_rate": 4.980700016567973e-05, + "loss": 0.182, + "step": 39674 + }, + { + "epoch": 0.04, + "learning_rate": 4.980699042425624e-05, + "loss": 0.098, + "step": 39675 + }, + { + "epoch": 0.04, + "learning_rate": 4.980698068258787e-05, + "loss": 0.2037, + "step": 39676 + }, + { + "epoch": 0.04, + "learning_rate": 4.980697094067461e-05, + "loss": 0.1751, + "step": 39677 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806961198516475e-05, + "loss": 0.1502, + "step": 39678 + }, + { + "epoch": 0.04, + "learning_rate": 4.980695145611345e-05, + "loss": 0.14, + "step": 39679 + }, + { + "epoch": 0.04, + "learning_rate": 4.980694171346554e-05, + "loss": 0.073, + "step": 39680 + }, + { + "epoch": 0.04, + "learning_rate": 4.980693197057275e-05, + "loss": 0.0821, + "step": 39681 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806922227435074e-05, + "loss": 0.6482, + "step": 39682 + }, + { + "epoch": 0.04, + "learning_rate": 4.980691248405251e-05, + "loss": 1.0999, + "step": 39683 + }, + { + "epoch": 0.04, + "learning_rate": 4.980690274042507e-05, + "loss": 0.6375, + "step": 39684 + }, + { + "epoch": 0.04, + "learning_rate": 4.980689299655274e-05, + "loss": 0.5399, + "step": 39685 + }, + { + "epoch": 0.04, + "learning_rate": 4.980688325243553e-05, + "loss": 0.3151, + "step": 39686 + }, + { + "epoch": 0.04, + "learning_rate": 4.980687350807344e-05, + "loss": 0.9839, + "step": 39687 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806863763466463e-05, + "loss": 1.0598, + "step": 39688 + }, + { + "epoch": 0.04, + "learning_rate": 4.98068540186146e-05, + "loss": 1.0311, + "step": 39689 + }, + { + "epoch": 0.04, + "learning_rate": 4.980684427351786e-05, + "loss": 1.1698, + "step": 39690 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806834528176236e-05, + "loss": 0.9581, + "step": 39691 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806824782589737e-05, + "loss": 0.9782, + "step": 39692 + }, + { + "epoch": 0.04, + "learning_rate": 4.980681503675834e-05, + "loss": 1.6296, + "step": 39693 + }, + { + "epoch": 0.04, + "learning_rate": 4.980680529068207e-05, + "loss": 1.4184, + "step": 39694 + }, + { + "epoch": 0.04, + "learning_rate": 4.980679554436092e-05, + "loss": 0.2725, + "step": 39695 + }, + { + "epoch": 0.04, + "learning_rate": 4.980678579779488e-05, + "loss": 0.2316, + "step": 39696 + }, + { + "epoch": 0.04, + "learning_rate": 4.980677605098396e-05, + "loss": 0.3968, + "step": 39697 + }, + { + "epoch": 0.04, + "learning_rate": 4.980676630392817e-05, + "loss": 0.1634, + "step": 39698 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806756556627476e-05, + "loss": 1.0726, + "step": 39699 + }, + { + "epoch": 0.04, + "learning_rate": 4.980674680908192e-05, + "loss": 1.428, + "step": 39700 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806737061291467e-05, + "loss": 0.8447, + "step": 39701 + }, + { + "epoch": 0.04, + "learning_rate": 4.980672731325614e-05, + "loss": 1.1002, + "step": 39702 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806717564975935e-05, + "loss": 1.0821, + "step": 39703 + }, + { + "epoch": 0.04, + "learning_rate": 4.980670781645084e-05, + "loss": 0.8764, + "step": 39704 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806698067680866e-05, + "loss": 0.886, + "step": 39705 + }, + { + "epoch": 0.04, + "learning_rate": 4.980668831866602e-05, + "loss": 1.1687, + "step": 39706 + }, + { + "epoch": 0.04, + "learning_rate": 4.980667856940628e-05, + "loss": 1.0449, + "step": 39707 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806668819901667e-05, + "loss": 1.1332, + "step": 39708 + }, + { + "epoch": 0.04, + "learning_rate": 4.980665907015217e-05, + "loss": 1.1815, + "step": 39709 + }, + { + "epoch": 0.04, + "learning_rate": 4.980664932015779e-05, + "loss": 0.9185, + "step": 39710 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806639569918534e-05, + "loss": 1.1374, + "step": 39711 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806629819434396e-05, + "loss": 0.9978, + "step": 39712 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806620068705377e-05, + "loss": 0.6502, + "step": 39713 + }, + { + "epoch": 0.04, + "learning_rate": 4.980661031773148e-05, + "loss": 0.4418, + "step": 39714 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806600566512696e-05, + "loss": 0.3249, + "step": 39715 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806590815049035e-05, + "loss": 0.3394, + "step": 39716 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806581063340493e-05, + "loss": 0.19, + "step": 39717 + }, + { + "epoch": 0.04, + "learning_rate": 4.980657131138707e-05, + "loss": 0.1253, + "step": 39718 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806561559188775e-05, + "loss": 0.0901, + "step": 39719 + }, + { + "epoch": 0.04, + "learning_rate": 4.980655180674559e-05, + "loss": 0.6536, + "step": 39720 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806542054057534e-05, + "loss": 1.2669, + "step": 39721 + }, + { + "epoch": 0.04, + "learning_rate": 4.980653230112459e-05, + "loss": 1.1506, + "step": 39722 + }, + { + "epoch": 0.04, + "learning_rate": 4.980652254794677e-05, + "loss": 1.3095, + "step": 39723 + }, + { + "epoch": 0.04, + "learning_rate": 4.980651279452407e-05, + "loss": 0.9535, + "step": 39724 + }, + { + "epoch": 0.04, + "learning_rate": 4.980650304085649e-05, + "loss": 1.2392, + "step": 39725 + }, + { + "epoch": 0.04, + "learning_rate": 4.980649328694403e-05, + "loss": 1.1935, + "step": 39726 + }, + { + "epoch": 0.04, + "learning_rate": 4.98064835327867e-05, + "loss": 1.2554, + "step": 39727 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806473778384475e-05, + "loss": 1.0312, + "step": 39728 + }, + { + "epoch": 0.04, + "learning_rate": 4.980646402373739e-05, + "loss": 0.7624, + "step": 39729 + }, + { + "epoch": 0.04, + "learning_rate": 4.980645426884541e-05, + "loss": 1.0175, + "step": 39730 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806444513708554e-05, + "loss": 1.3839, + "step": 39731 + }, + { + "epoch": 0.04, + "learning_rate": 4.980643475832682e-05, + "loss": 1.2868, + "step": 39732 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806425002700205e-05, + "loss": 1.2763, + "step": 39733 + }, + { + "epoch": 0.04, + "learning_rate": 4.980641524682872e-05, + "loss": 1.1918, + "step": 39734 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806405490712355e-05, + "loss": 1.1339, + "step": 39735 + }, + { + "epoch": 0.04, + "learning_rate": 4.980639573435111e-05, + "loss": 0.9954, + "step": 39736 + }, + { + "epoch": 0.04, + "learning_rate": 4.980638597774498e-05, + "loss": 0.9955, + "step": 39737 + }, + { + "epoch": 0.04, + "learning_rate": 4.980637622089398e-05, + "loss": 1.1512, + "step": 39738 + }, + { + "epoch": 0.04, + "learning_rate": 4.98063664637981e-05, + "loss": 0.7312, + "step": 39739 + }, + { + "epoch": 0.04, + "learning_rate": 4.980635670645734e-05, + "loss": 0.7837, + "step": 39740 + }, + { + "epoch": 0.04, + "learning_rate": 4.98063469488717e-05, + "loss": 0.9313, + "step": 39741 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806337191041186e-05, + "loss": 1.2674, + "step": 39742 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806327432965795e-05, + "loss": 1.0388, + "step": 39743 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806317674645525e-05, + "loss": 0.9276, + "step": 39744 + }, + { + "epoch": 0.04, + "learning_rate": 4.980630791608037e-05, + "loss": 0.9225, + "step": 39745 + }, + { + "epoch": 0.04, + "learning_rate": 4.980629815727035e-05, + "loss": 1.0504, + "step": 39746 + }, + { + "epoch": 0.04, + "learning_rate": 4.980628839821545e-05, + "loss": 1.074, + "step": 39747 + }, + { + "epoch": 0.04, + "learning_rate": 4.980627863891567e-05, + "loss": 0.9533, + "step": 39748 + }, + { + "epoch": 0.04, + "learning_rate": 4.980626887937101e-05, + "loss": 1.1157, + "step": 39749 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806259119581475e-05, + "loss": 0.9622, + "step": 39750 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806249359547067e-05, + "loss": 1.1137, + "step": 39751 + }, + { + "epoch": 0.04, + "learning_rate": 4.980623959926778e-05, + "loss": 1.2651, + "step": 39752 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806229838743615e-05, + "loss": 1.3131, + "step": 39753 + }, + { + "epoch": 0.04, + "learning_rate": 4.980622007797457e-05, + "loss": 1.2567, + "step": 39754 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806210316960655e-05, + "loss": 1.0471, + "step": 39755 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806200555701865e-05, + "loss": 1.26, + "step": 39756 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806190794198193e-05, + "loss": 0.6757, + "step": 39757 + }, + { + "epoch": 0.04, + "learning_rate": 4.980618103244965e-05, + "loss": 0.576, + "step": 39758 + }, + { + "epoch": 0.04, + "learning_rate": 4.980617127045622e-05, + "loss": 0.9231, + "step": 39759 + }, + { + "epoch": 0.04, + "learning_rate": 4.980616150821792e-05, + "loss": 1.0475, + "step": 39760 + }, + { + "epoch": 0.04, + "learning_rate": 4.980615174573475e-05, + "loss": 1.0177, + "step": 39761 + }, + { + "epoch": 0.04, + "learning_rate": 4.98061419830067e-05, + "loss": 1.0858, + "step": 39762 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806132220033774e-05, + "loss": 1.0365, + "step": 39763 + }, + { + "epoch": 0.04, + "learning_rate": 4.980612245681597e-05, + "loss": 0.9833, + "step": 39764 + }, + { + "epoch": 0.04, + "learning_rate": 4.980611269335329e-05, + "loss": 0.9814, + "step": 39765 + }, + { + "epoch": 0.04, + "learning_rate": 4.980610292964574e-05, + "loss": 0.9961, + "step": 39766 + }, + { + "epoch": 0.04, + "learning_rate": 4.980609316569331e-05, + "loss": 0.8658, + "step": 39767 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806083401496007e-05, + "loss": 0.9336, + "step": 39768 + }, + { + "epoch": 0.04, + "learning_rate": 4.980607363705383e-05, + "loss": 1.0476, + "step": 39769 + }, + { + "epoch": 0.04, + "learning_rate": 4.980606387236678e-05, + "loss": 1.0702, + "step": 39770 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806054107434845e-05, + "loss": 0.7545, + "step": 39771 + }, + { + "epoch": 0.04, + "learning_rate": 4.980604434225804e-05, + "loss": 0.5542, + "step": 39772 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806034576836366e-05, + "loss": 1.3612, + "step": 39773 + }, + { + "epoch": 0.04, + "learning_rate": 4.980602481116981e-05, + "loss": 1.0093, + "step": 39774 + }, + { + "epoch": 0.04, + "learning_rate": 4.980601504525838e-05, + "loss": 0.9272, + "step": 39775 + }, + { + "epoch": 0.04, + "learning_rate": 4.980600527910208e-05, + "loss": 1.1514, + "step": 39776 + }, + { + "epoch": 0.04, + "learning_rate": 4.98059955127009e-05, + "loss": 0.9447, + "step": 39777 + }, + { + "epoch": 0.04, + "learning_rate": 4.980598574605485e-05, + "loss": 0.6046, + "step": 39778 + }, + { + "epoch": 0.04, + "learning_rate": 4.980597597916392e-05, + "loss": 0.9977, + "step": 39779 + }, + { + "epoch": 0.04, + "learning_rate": 4.980596621202812e-05, + "loss": 0.7683, + "step": 39780 + }, + { + "epoch": 0.04, + "learning_rate": 4.980595644464745e-05, + "loss": 1.1903, + "step": 39781 + }, + { + "epoch": 0.04, + "learning_rate": 4.98059466770219e-05, + "loss": 1.1113, + "step": 39782 + }, + { + "epoch": 0.04, + "learning_rate": 4.980593690915148e-05, + "loss": 1.0493, + "step": 39783 + }, + { + "epoch": 0.04, + "learning_rate": 4.980592714103619e-05, + "loss": 1.0784, + "step": 39784 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805917372676015e-05, + "loss": 0.9485, + "step": 39785 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805907604070975e-05, + "loss": 1.2104, + "step": 39786 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805897835221055e-05, + "loss": 1.325, + "step": 39787 + }, + { + "epoch": 0.04, + "learning_rate": 4.980588806612627e-05, + "loss": 1.2289, + "step": 39788 + }, + { + "epoch": 0.04, + "learning_rate": 4.98058782967866e-05, + "loss": 1.2455, + "step": 39789 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805868527202064e-05, + "loss": 1.1528, + "step": 39790 + }, + { + "epoch": 0.04, + "learning_rate": 4.980585875737266e-05, + "loss": 1.0837, + "step": 39791 + }, + { + "epoch": 0.04, + "learning_rate": 4.980584898729837e-05, + "loss": 1.0239, + "step": 39792 + }, + { + "epoch": 0.04, + "learning_rate": 4.980583921697922e-05, + "loss": 1.4868, + "step": 39793 + }, + { + "epoch": 0.04, + "learning_rate": 4.980582944641519e-05, + "loss": 1.1437, + "step": 39794 + }, + { + "epoch": 0.04, + "learning_rate": 4.980581967560629e-05, + "loss": 1.1375, + "step": 39795 + }, + { + "epoch": 0.04, + "learning_rate": 4.980580990455252e-05, + "loss": 1.2235, + "step": 39796 + }, + { + "epoch": 0.04, + "learning_rate": 4.980580013325388e-05, + "loss": 1.2088, + "step": 39797 + }, + { + "epoch": 0.04, + "learning_rate": 4.980579036171036e-05, + "loss": 0.7889, + "step": 39798 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805780589921966e-05, + "loss": 0.8961, + "step": 39799 + }, + { + "epoch": 0.04, + "learning_rate": 4.98057708178887e-05, + "loss": 0.4882, + "step": 39800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805761045610566e-05, + "loss": 1.0117, + "step": 39801 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805751273087565e-05, + "loss": 1.1287, + "step": 39802 + }, + { + "epoch": 0.04, + "learning_rate": 4.980574150031968e-05, + "loss": 0.8852, + "step": 39803 + }, + { + "epoch": 0.04, + "learning_rate": 4.980573172730693e-05, + "loss": 1.0397, + "step": 39804 + }, + { + "epoch": 0.04, + "learning_rate": 4.980572195404931e-05, + "loss": 1.0921, + "step": 39805 + }, + { + "epoch": 0.04, + "learning_rate": 4.980571218054682e-05, + "loss": 1.281, + "step": 39806 + }, + { + "epoch": 0.04, + "learning_rate": 4.980570240679945e-05, + "loss": 1.1871, + "step": 39807 + }, + { + "epoch": 0.04, + "learning_rate": 4.980569263280721e-05, + "loss": 0.9497, + "step": 39808 + }, + { + "epoch": 0.04, + "learning_rate": 4.98056828585701e-05, + "loss": 1.1334, + "step": 39809 + }, + { + "epoch": 0.04, + "learning_rate": 4.980567308408812e-05, + "loss": 0.9606, + "step": 39810 + }, + { + "epoch": 0.04, + "learning_rate": 4.980566330936127e-05, + "loss": 1.1414, + "step": 39811 + }, + { + "epoch": 0.04, + "learning_rate": 4.980565353438955e-05, + "loss": 0.7484, + "step": 39812 + }, + { + "epoch": 0.04, + "learning_rate": 4.980564375917296e-05, + "loss": 0.6242, + "step": 39813 + }, + { + "epoch": 0.04, + "learning_rate": 4.98056339837115e-05, + "loss": 0.7956, + "step": 39814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805624208005165e-05, + "loss": 0.6091, + "step": 39815 + }, + { + "epoch": 0.04, + "learning_rate": 4.980561443205396e-05, + "loss": 0.4182, + "step": 39816 + }, + { + "epoch": 0.04, + "learning_rate": 4.980560465585788e-05, + "loss": 0.5593, + "step": 39817 + }, + { + "epoch": 0.04, + "learning_rate": 4.980559487941694e-05, + "loss": 0.9537, + "step": 39818 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805585102731126e-05, + "loss": 0.9216, + "step": 39819 + }, + { + "epoch": 0.04, + "learning_rate": 4.980557532580044e-05, + "loss": 1.0556, + "step": 39820 + }, + { + "epoch": 0.04, + "learning_rate": 4.980556554862488e-05, + "loss": 0.9017, + "step": 39821 + }, + { + "epoch": 0.04, + "learning_rate": 4.980555577120445e-05, + "loss": 1.1458, + "step": 39822 + }, + { + "epoch": 0.04, + "learning_rate": 4.980554599353915e-05, + "loss": 0.3722, + "step": 39823 + }, + { + "epoch": 0.04, + "learning_rate": 4.980553621562899e-05, + "loss": 0.7794, + "step": 39824 + }, + { + "epoch": 0.04, + "learning_rate": 4.980552643747395e-05, + "loss": 0.983, + "step": 39825 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805516659074046e-05, + "loss": 0.7596, + "step": 39826 + }, + { + "epoch": 0.04, + "learning_rate": 4.980550688042927e-05, + "loss": 1.1471, + "step": 39827 + }, + { + "epoch": 0.04, + "learning_rate": 4.980549710153962e-05, + "loss": 1.0333, + "step": 39828 + }, + { + "epoch": 0.04, + "learning_rate": 4.980548732240511e-05, + "loss": 1.338, + "step": 39829 + }, + { + "epoch": 0.04, + "learning_rate": 4.980547754302573e-05, + "loss": 1.0228, + "step": 39830 + }, + { + "epoch": 0.04, + "learning_rate": 4.980546776340147e-05, + "loss": 1.2134, + "step": 39831 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805457983532346e-05, + "loss": 1.0184, + "step": 39832 + }, + { + "epoch": 0.04, + "learning_rate": 4.980544820341836e-05, + "loss": 0.7001, + "step": 39833 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805438423059496e-05, + "loss": 0.9346, + "step": 39834 + }, + { + "epoch": 0.04, + "learning_rate": 4.980542864245577e-05, + "loss": 1.0392, + "step": 39835 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805418861607164e-05, + "loss": 1.2257, + "step": 39836 + }, + { + "epoch": 0.04, + "learning_rate": 4.98054090805137e-05, + "loss": 1.232, + "step": 39837 + }, + { + "epoch": 0.04, + "learning_rate": 4.980539929917537e-05, + "loss": 0.9506, + "step": 39838 + }, + { + "epoch": 0.04, + "learning_rate": 4.980538951759216e-05, + "loss": 0.9749, + "step": 39839 + }, + { + "epoch": 0.04, + "learning_rate": 4.980537973576409e-05, + "loss": 1.703, + "step": 39840 + }, + { + "epoch": 0.04, + "learning_rate": 4.980536995369115e-05, + "loss": 1.1668, + "step": 39841 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805360171373346e-05, + "loss": 1.026, + "step": 39842 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805350388810667e-05, + "loss": 1.0058, + "step": 39843 + }, + { + "epoch": 0.04, + "learning_rate": 4.980534060600313e-05, + "loss": 1.246, + "step": 39844 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805330822950714e-05, + "loss": 1.4204, + "step": 39845 + }, + { + "epoch": 0.04, + "learning_rate": 4.980532103965343e-05, + "loss": 1.0409, + "step": 39846 + }, + { + "epoch": 0.04, + "learning_rate": 4.980531125611128e-05, + "loss": 0.9456, + "step": 39847 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805301472324265e-05, + "loss": 0.7395, + "step": 39848 + }, + { + "epoch": 0.04, + "learning_rate": 4.980529168829239e-05, + "loss": 0.7324, + "step": 39849 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805281904015636e-05, + "loss": 0.9092, + "step": 39850 + }, + { + "epoch": 0.04, + "learning_rate": 4.980527211949402e-05, + "loss": 0.7404, + "step": 39851 + }, + { + "epoch": 0.04, + "learning_rate": 4.980526233472753e-05, + "loss": 0.6173, + "step": 39852 + }, + { + "epoch": 0.04, + "learning_rate": 4.980525254971618e-05, + "loss": 1.1041, + "step": 39853 + }, + { + "epoch": 0.04, + "learning_rate": 4.980524276445996e-05, + "loss": 1.1045, + "step": 39854 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805232978958874e-05, + "loss": 1.0316, + "step": 39855 + }, + { + "epoch": 0.04, + "learning_rate": 4.980522319321292e-05, + "loss": 1.0486, + "step": 39856 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805213407222105e-05, + "loss": 0.8043, + "step": 39857 + }, + { + "epoch": 0.04, + "learning_rate": 4.980520362098642e-05, + "loss": 0.5091, + "step": 39858 + }, + { + "epoch": 0.04, + "learning_rate": 4.980519383450586e-05, + "loss": 1.0089, + "step": 39859 + }, + { + "epoch": 0.04, + "learning_rate": 4.980518404778045e-05, + "loss": 1.1165, + "step": 39860 + }, + { + "epoch": 0.04, + "learning_rate": 4.980517426081016e-05, + "loss": 0.9318, + "step": 39861 + }, + { + "epoch": 0.04, + "learning_rate": 4.980516447359501e-05, + "loss": 1.3488, + "step": 39862 + }, + { + "epoch": 0.04, + "learning_rate": 4.980515468613499e-05, + "loss": 1.393, + "step": 39863 + }, + { + "epoch": 0.04, + "learning_rate": 4.980514489843011e-05, + "loss": 1.2074, + "step": 39864 + }, + { + "epoch": 0.04, + "learning_rate": 4.980513511048036e-05, + "loss": 0.8507, + "step": 39865 + }, + { + "epoch": 0.04, + "learning_rate": 4.980512532228575e-05, + "loss": 0.9746, + "step": 39866 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805115533846265e-05, + "loss": 1.0322, + "step": 39867 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805105745161917e-05, + "loss": 1.1499, + "step": 39868 + }, + { + "epoch": 0.04, + "learning_rate": 4.980509595623271e-05, + "loss": 0.809, + "step": 39869 + }, + { + "epoch": 0.04, + "learning_rate": 4.980508616705863e-05, + "loss": 1.0846, + "step": 39870 + }, + { + "epoch": 0.04, + "learning_rate": 4.980507637763968e-05, + "loss": 1.2244, + "step": 39871 + }, + { + "epoch": 0.04, + "learning_rate": 4.980506658797588e-05, + "loss": 1.4287, + "step": 39872 + }, + { + "epoch": 0.04, + "learning_rate": 4.980505679806721e-05, + "loss": 1.0867, + "step": 39873 + }, + { + "epoch": 0.04, + "learning_rate": 4.980504700791366e-05, + "loss": 1.0142, + "step": 39874 + }, + { + "epoch": 0.04, + "learning_rate": 4.980503721751526e-05, + "loss": 0.8707, + "step": 39875 + }, + { + "epoch": 0.04, + "learning_rate": 4.9805027426871994e-05, + "loss": 1.3318, + "step": 39876 + }, + { + "epoch": 0.04, + "learning_rate": 4.980501763598386e-05, + "loss": 1.0133, + "step": 39877 + }, + { + "epoch": 0.04, + "learning_rate": 4.980500784485086e-05, + "loss": 0.9691, + "step": 39878 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804998053472996e-05, + "loss": 0.8251, + "step": 39879 + }, + { + "epoch": 0.04, + "learning_rate": 4.980498826185027e-05, + "loss": 0.4781, + "step": 39880 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804978469982677e-05, + "loss": 1.0287, + "step": 39881 + }, + { + "epoch": 0.04, + "learning_rate": 4.980496867787022e-05, + "loss": 1.0725, + "step": 39882 + }, + { + "epoch": 0.04, + "learning_rate": 4.980495888551291e-05, + "loss": 0.9222, + "step": 39883 + }, + { + "epoch": 0.04, + "learning_rate": 4.980494909291072e-05, + "loss": 0.835, + "step": 39884 + }, + { + "epoch": 0.04, + "learning_rate": 4.980493930006367e-05, + "loss": 0.6236, + "step": 39885 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804929506971764e-05, + "loss": 1.095, + "step": 39886 + }, + { + "epoch": 0.04, + "learning_rate": 4.980491971363499e-05, + "loss": 1.2357, + "step": 39887 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804909920053346e-05, + "loss": 0.9702, + "step": 39888 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804900126226844e-05, + "loss": 1.3954, + "step": 39889 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804890332155474e-05, + "loss": 1.0667, + "step": 39890 + }, + { + "epoch": 0.04, + "learning_rate": 4.980488053783925e-05, + "loss": 1.1576, + "step": 39891 + }, + { + "epoch": 0.04, + "learning_rate": 4.980487074327815e-05, + "loss": 1.0314, + "step": 39892 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804860948472195e-05, + "loss": 0.8599, + "step": 39893 + }, + { + "epoch": 0.04, + "learning_rate": 4.980485115342138e-05, + "loss": 0.8577, + "step": 39894 + }, + { + "epoch": 0.04, + "learning_rate": 4.98048413581257e-05, + "loss": 0.9501, + "step": 39895 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804831562585154e-05, + "loss": 1.1824, + "step": 39896 + }, + { + "epoch": 0.04, + "learning_rate": 4.980482176679974e-05, + "loss": 1.1088, + "step": 39897 + }, + { + "epoch": 0.04, + "learning_rate": 4.980481197076947e-05, + "loss": 1.1328, + "step": 39898 + }, + { + "epoch": 0.04, + "learning_rate": 4.980480217449434e-05, + "loss": 0.8645, + "step": 39899 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804792377974344e-05, + "loss": 1.2619, + "step": 39900 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804782581209484e-05, + "loss": 1.0967, + "step": 39901 + }, + { + "epoch": 0.04, + "learning_rate": 4.980477278419976e-05, + "loss": 0.8888, + "step": 39902 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804762986945176e-05, + "loss": 1.0697, + "step": 39903 + }, + { + "epoch": 0.04, + "learning_rate": 4.980475318944573e-05, + "loss": 1.0638, + "step": 39904 + }, + { + "epoch": 0.04, + "learning_rate": 4.980474339170143e-05, + "loss": 0.9915, + "step": 39905 + }, + { + "epoch": 0.04, + "learning_rate": 4.980473359371225e-05, + "loss": 1.1577, + "step": 39906 + }, + { + "epoch": 0.04, + "learning_rate": 4.980472379547822e-05, + "loss": 1.4436, + "step": 39907 + }, + { + "epoch": 0.04, + "learning_rate": 4.980471399699933e-05, + "loss": 2.2532, + "step": 39908 + }, + { + "epoch": 0.04, + "learning_rate": 4.980470419827558e-05, + "loss": 1.0512, + "step": 39909 + }, + { + "epoch": 0.04, + "learning_rate": 4.980469439930696e-05, + "loss": 0.8714, + "step": 39910 + }, + { + "epoch": 0.04, + "learning_rate": 4.980468460009348e-05, + "loss": 0.9124, + "step": 39911 + }, + { + "epoch": 0.04, + "learning_rate": 4.980467480063514e-05, + "loss": 0.9924, + "step": 39912 + }, + { + "epoch": 0.04, + "learning_rate": 4.980466500093194e-05, + "loss": 0.6864, + "step": 39913 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804655200983874e-05, + "loss": 0.6734, + "step": 39914 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804645400790956e-05, + "loss": 0.6543, + "step": 39915 + }, + { + "epoch": 0.04, + "learning_rate": 4.980463560035317e-05, + "loss": 0.8771, + "step": 39916 + }, + { + "epoch": 0.04, + "learning_rate": 4.980462579967053e-05, + "loss": 0.7412, + "step": 39917 + }, + { + "epoch": 0.04, + "learning_rate": 4.980461599874302e-05, + "loss": 0.8485, + "step": 39918 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804606197570656e-05, + "loss": 1.0743, + "step": 39919 + }, + { + "epoch": 0.04, + "learning_rate": 4.980459639615342e-05, + "loss": 0.9703, + "step": 39920 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804586594491344e-05, + "loss": 1.0341, + "step": 39921 + }, + { + "epoch": 0.04, + "learning_rate": 4.980457679258439e-05, + "loss": 1.0085, + "step": 39922 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804566990432584e-05, + "loss": 1.0665, + "step": 39923 + }, + { + "epoch": 0.04, + "learning_rate": 4.980455718803592e-05, + "loss": 1.0677, + "step": 39924 + }, + { + "epoch": 0.04, + "learning_rate": 4.980454738539438e-05, + "loss": 1.0629, + "step": 39925 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804537582507995e-05, + "loss": 0.875, + "step": 39926 + }, + { + "epoch": 0.04, + "learning_rate": 4.980452777937675e-05, + "loss": 0.8644, + "step": 39927 + }, + { + "epoch": 0.04, + "learning_rate": 4.980451797600064e-05, + "loss": 0.8147, + "step": 39928 + }, + { + "epoch": 0.04, + "learning_rate": 4.980450817237967e-05, + "loss": 1.1121, + "step": 39929 + }, + { + "epoch": 0.04, + "learning_rate": 4.980449836851384e-05, + "loss": 0.9854, + "step": 39930 + }, + { + "epoch": 0.04, + "learning_rate": 4.980448856440315e-05, + "loss": 0.9022, + "step": 39931 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804478760047604e-05, + "loss": 0.9454, + "step": 39932 + }, + { + "epoch": 0.04, + "learning_rate": 4.98044689554472e-05, + "loss": 1.1218, + "step": 39933 + }, + { + "epoch": 0.04, + "learning_rate": 4.980445915060193e-05, + "loss": 0.8561, + "step": 39934 + }, + { + "epoch": 0.04, + "learning_rate": 4.980444934551181e-05, + "loss": 0.9451, + "step": 39935 + }, + { + "epoch": 0.04, + "learning_rate": 4.980443954017683e-05, + "loss": 0.9667, + "step": 39936 + }, + { + "epoch": 0.04, + "learning_rate": 4.980442973459698e-05, + "loss": 0.6333, + "step": 39937 + }, + { + "epoch": 0.04, + "learning_rate": 4.980441992877228e-05, + "loss": 0.9019, + "step": 39938 + }, + { + "epoch": 0.04, + "learning_rate": 4.980441012270272e-05, + "loss": 0.9139, + "step": 39939 + }, + { + "epoch": 0.04, + "learning_rate": 4.98044003163883e-05, + "loss": 1.0522, + "step": 39940 + }, + { + "epoch": 0.04, + "learning_rate": 4.980439050982902e-05, + "loss": 0.9855, + "step": 39941 + }, + { + "epoch": 0.04, + "learning_rate": 4.980438070302489e-05, + "loss": 1.1599, + "step": 39942 + }, + { + "epoch": 0.04, + "learning_rate": 4.980437089597589e-05, + "loss": 0.8775, + "step": 39943 + }, + { + "epoch": 0.04, + "learning_rate": 4.980436108868204e-05, + "loss": 1.0334, + "step": 39944 + }, + { + "epoch": 0.04, + "learning_rate": 4.980435128114333e-05, + "loss": 1.1788, + "step": 39945 + }, + { + "epoch": 0.04, + "learning_rate": 4.980434147335976e-05, + "loss": 0.8706, + "step": 39946 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804331665331334e-05, + "loss": 0.9382, + "step": 39947 + }, + { + "epoch": 0.04, + "learning_rate": 4.980432185705805e-05, + "loss": 1.2384, + "step": 39948 + }, + { + "epoch": 0.04, + "learning_rate": 4.98043120485399e-05, + "loss": 0.4394, + "step": 39949 + }, + { + "epoch": 0.04, + "learning_rate": 4.98043022397769e-05, + "loss": 0.6767, + "step": 39950 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804292430769046e-05, + "loss": 1.095, + "step": 39951 + }, + { + "epoch": 0.04, + "learning_rate": 4.980428262151633e-05, + "loss": 1.0734, + "step": 39952 + }, + { + "epoch": 0.04, + "learning_rate": 4.980427281201876e-05, + "loss": 1.1458, + "step": 39953 + }, + { + "epoch": 0.04, + "learning_rate": 4.980426300227632e-05, + "loss": 1.0335, + "step": 39954 + }, + { + "epoch": 0.04, + "learning_rate": 4.980425319228904e-05, + "loss": 0.9548, + "step": 39955 + }, + { + "epoch": 0.04, + "learning_rate": 4.980424338205689e-05, + "loss": 1.0335, + "step": 39956 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804233571579896e-05, + "loss": 1.0044, + "step": 39957 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804223760858034e-05, + "loss": 0.553, + "step": 39958 + }, + { + "epoch": 0.04, + "learning_rate": 4.980421394989132e-05, + "loss": 0.9166, + "step": 39959 + }, + { + "epoch": 0.04, + "learning_rate": 4.980420413867974e-05, + "loss": 0.8217, + "step": 39960 + }, + { + "epoch": 0.04, + "learning_rate": 4.980419432722331e-05, + "loss": 0.8729, + "step": 39961 + }, + { + "epoch": 0.04, + "learning_rate": 4.980418451552203e-05, + "loss": 1.0885, + "step": 39962 + }, + { + "epoch": 0.04, + "learning_rate": 4.980417470357589e-05, + "loss": 1.3459, + "step": 39963 + }, + { + "epoch": 0.04, + "learning_rate": 4.980416489138489e-05, + "loss": 1.2026, + "step": 39964 + }, + { + "epoch": 0.04, + "learning_rate": 4.980415507894903e-05, + "loss": 0.8631, + "step": 39965 + }, + { + "epoch": 0.04, + "learning_rate": 4.980414526626832e-05, + "loss": 1.1893, + "step": 39966 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804135453342756e-05, + "loss": 1.1731, + "step": 39967 + }, + { + "epoch": 0.04, + "learning_rate": 4.980412564017233e-05, + "loss": 0.9397, + "step": 39968 + }, + { + "epoch": 0.04, + "learning_rate": 4.980411582675705e-05, + "loss": 1.0412, + "step": 39969 + }, + { + "epoch": 0.04, + "learning_rate": 4.980410601309692e-05, + "loss": 1.2476, + "step": 39970 + }, + { + "epoch": 0.04, + "learning_rate": 4.980409619919193e-05, + "loss": 1.2759, + "step": 39971 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804086385042084e-05, + "loss": 1.0286, + "step": 39972 + }, + { + "epoch": 0.04, + "learning_rate": 4.980407657064738e-05, + "loss": 1.0243, + "step": 39973 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804066756007826e-05, + "loss": 0.9346, + "step": 39974 + }, + { + "epoch": 0.04, + "learning_rate": 4.980405694112341e-05, + "loss": 0.8766, + "step": 39975 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804047125994147e-05, + "loss": 0.5845, + "step": 39976 + }, + { + "epoch": 0.04, + "learning_rate": 4.980403731062002e-05, + "loss": 0.9896, + "step": 39977 + }, + { + "epoch": 0.04, + "learning_rate": 4.980402749500104e-05, + "loss": 0.9506, + "step": 39978 + }, + { + "epoch": 0.04, + "learning_rate": 4.980401767913721e-05, + "loss": 0.8245, + "step": 39979 + }, + { + "epoch": 0.04, + "learning_rate": 4.9804007863028526e-05, + "loss": 0.564, + "step": 39980 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803998046674986e-05, + "loss": 0.5005, + "step": 39981 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803988230076585e-05, + "loss": 0.9579, + "step": 39982 + }, + { + "epoch": 0.04, + "learning_rate": 4.980397841323333e-05, + "loss": 0.5331, + "step": 39983 + }, + { + "epoch": 0.04, + "learning_rate": 4.980396859614523e-05, + "loss": 0.4729, + "step": 39984 + }, + { + "epoch": 0.04, + "learning_rate": 4.980395877881227e-05, + "loss": 0.65, + "step": 39985 + }, + { + "epoch": 0.04, + "learning_rate": 4.980394896123446e-05, + "loss": 0.3968, + "step": 39986 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803939143411785e-05, + "loss": 0.5404, + "step": 39987 + }, + { + "epoch": 0.04, + "learning_rate": 4.980392932534427e-05, + "loss": 0.618, + "step": 39988 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803919507031894e-05, + "loss": 0.3823, + "step": 39989 + }, + { + "epoch": 0.04, + "learning_rate": 4.980390968847466e-05, + "loss": 0.5021, + "step": 39990 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803899869672576e-05, + "loss": 0.6555, + "step": 39991 + }, + { + "epoch": 0.04, + "learning_rate": 4.980389005062563e-05, + "loss": 0.8617, + "step": 39992 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803880231333844e-05, + "loss": 0.8095, + "step": 39993 + }, + { + "epoch": 0.04, + "learning_rate": 4.980387041179721e-05, + "loss": 0.851, + "step": 39994 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803860592015704e-05, + "loss": 0.7962, + "step": 39995 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803850771989354e-05, + "loss": 0.7576, + "step": 39996 + }, + { + "epoch": 0.04, + "learning_rate": 4.980384095171815e-05, + "loss": 0.7449, + "step": 39997 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803831131202094e-05, + "loss": 0.8515, + "step": 39998 + }, + { + "epoch": 0.04, + "learning_rate": 4.980382131044118e-05, + "loss": 1.0649, + "step": 39999 + }, + { + "epoch": 0.04, + "learning_rate": 4.980381148943542e-05, + "loss": 0.9707, + "step": 40000 + }, + { + "epoch": 0.04, + "eval_loss": 1.1041456460952759, + "eval_runtime": 126.6089, + "eval_samples_per_second": 10.939, + "eval_steps_per_second": 5.474, + "step": 40000 + }, + { + "epoch": 0.04, + "learning_rate": 4.98038016681848e-05, + "loss": 0.8351, + "step": 40001 + }, + { + "epoch": 0.04, + "learning_rate": 4.980379184668934e-05, + "loss": 1.0063, + "step": 40002 + }, + { + "epoch": 0.04, + "learning_rate": 4.980378202494901e-05, + "loss": 1.0335, + "step": 40003 + }, + { + "epoch": 0.04, + "learning_rate": 4.980377220296384e-05, + "loss": 0.9556, + "step": 40004 + }, + { + "epoch": 0.04, + "learning_rate": 4.980376238073382e-05, + "loss": 0.969, + "step": 40005 + }, + { + "epoch": 0.04, + "learning_rate": 4.980375255825894e-05, + "loss": 1.1269, + "step": 40006 + }, + { + "epoch": 0.04, + "learning_rate": 4.980374273553921e-05, + "loss": 1.0262, + "step": 40007 + }, + { + "epoch": 0.04, + "learning_rate": 4.980373291257462e-05, + "loss": 0.6993, + "step": 40008 + }, + { + "epoch": 0.04, + "learning_rate": 4.980372308936519e-05, + "loss": 0.7884, + "step": 40009 + }, + { + "epoch": 0.04, + "learning_rate": 4.98037132659109e-05, + "loss": 1.1212, + "step": 40010 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803703442211765e-05, + "loss": 1.0038, + "step": 40011 + }, + { + "epoch": 0.04, + "learning_rate": 4.980369361826778e-05, + "loss": 0.8559, + "step": 40012 + }, + { + "epoch": 0.04, + "learning_rate": 4.980368379407893e-05, + "loss": 1.1532, + "step": 40013 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803673969645234e-05, + "loss": 1.2441, + "step": 40014 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803664144966694e-05, + "loss": 0.9196, + "step": 40015 + }, + { + "epoch": 0.04, + "learning_rate": 4.98036543200433e-05, + "loss": 0.8118, + "step": 40016 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803644494875055e-05, + "loss": 0.8715, + "step": 40017 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803634669461955e-05, + "loss": 1.2817, + "step": 40018 + }, + { + "epoch": 0.04, + "learning_rate": 4.980362484380401e-05, + "loss": 0.8654, + "step": 40019 + }, + { + "epoch": 0.04, + "learning_rate": 4.980361501790121e-05, + "loss": 1.3229, + "step": 40020 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803605191753554e-05, + "loss": 1.1641, + "step": 40021 + }, + { + "epoch": 0.04, + "learning_rate": 4.980359536536105e-05, + "loss": 1.0475, + "step": 40022 + }, + { + "epoch": 0.04, + "learning_rate": 4.98035855387237e-05, + "loss": 1.2802, + "step": 40023 + }, + { + "epoch": 0.04, + "learning_rate": 4.98035757118415e-05, + "loss": 1.2998, + "step": 40024 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803565884714444e-05, + "loss": 0.9992, + "step": 40025 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803556057342536e-05, + "loss": 1.0157, + "step": 40026 + }, + { + "epoch": 0.04, + "learning_rate": 4.980354622972578e-05, + "loss": 0.9564, + "step": 40027 + }, + { + "epoch": 0.04, + "learning_rate": 4.980353640186418e-05, + "loss": 0.7224, + "step": 40028 + }, + { + "epoch": 0.04, + "learning_rate": 4.980352657375773e-05, + "loss": 0.9645, + "step": 40029 + }, + { + "epoch": 0.04, + "learning_rate": 4.980351674540642e-05, + "loss": 1.0334, + "step": 40030 + }, + { + "epoch": 0.04, + "learning_rate": 4.980350691681027e-05, + "loss": 0.7898, + "step": 40031 + }, + { + "epoch": 0.04, + "learning_rate": 4.980349708796926e-05, + "loss": 0.9632, + "step": 40032 + }, + { + "epoch": 0.04, + "learning_rate": 4.980348725888341e-05, + "loss": 0.9754, + "step": 40033 + }, + { + "epoch": 0.04, + "learning_rate": 4.980347742955271e-05, + "loss": 0.6593, + "step": 40034 + }, + { + "epoch": 0.04, + "learning_rate": 4.980346759997716e-05, + "loss": 1.0488, + "step": 40035 + }, + { + "epoch": 0.04, + "learning_rate": 4.980345777015676e-05, + "loss": 0.7422, + "step": 40036 + }, + { + "epoch": 0.04, + "learning_rate": 4.980344794009151e-05, + "loss": 0.8106, + "step": 40037 + }, + { + "epoch": 0.04, + "learning_rate": 4.98034381097814e-05, + "loss": 1.2368, + "step": 40038 + }, + { + "epoch": 0.04, + "learning_rate": 4.980342827922645e-05, + "loss": 1.4599, + "step": 40039 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803418448426656e-05, + "loss": 0.9751, + "step": 40040 + }, + { + "epoch": 0.04, + "learning_rate": 4.980340861738201e-05, + "loss": 0.9305, + "step": 40041 + }, + { + "epoch": 0.04, + "learning_rate": 4.980339878609252e-05, + "loss": 1.2623, + "step": 40042 + }, + { + "epoch": 0.04, + "learning_rate": 4.980338895455817e-05, + "loss": 1.3362, + "step": 40043 + }, + { + "epoch": 0.04, + "learning_rate": 4.980337912277897e-05, + "loss": 1.8253, + "step": 40044 + }, + { + "epoch": 0.04, + "learning_rate": 4.980336929075493e-05, + "loss": 2.0186, + "step": 40045 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803359458486045e-05, + "loss": 1.986, + "step": 40046 + }, + { + "epoch": 0.04, + "learning_rate": 4.980334962597231e-05, + "loss": 0.9248, + "step": 40047 + }, + { + "epoch": 0.04, + "learning_rate": 4.980333979321372e-05, + "loss": 0.5971, + "step": 40048 + }, + { + "epoch": 0.04, + "learning_rate": 4.980332996021028e-05, + "loss": 0.4241, + "step": 40049 + }, + { + "epoch": 0.04, + "learning_rate": 4.980332012696199e-05, + "loss": 0.7349, + "step": 40050 + }, + { + "epoch": 0.04, + "learning_rate": 4.980331029346886e-05, + "loss": 0.9617, + "step": 40051 + }, + { + "epoch": 0.04, + "learning_rate": 4.980330045973089e-05, + "loss": 1.1847, + "step": 40052 + }, + { + "epoch": 0.04, + "learning_rate": 4.980329062574806e-05, + "loss": 1.2285, + "step": 40053 + }, + { + "epoch": 0.04, + "learning_rate": 4.980328079152039e-05, + "loss": 0.6677, + "step": 40054 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803270957047866e-05, + "loss": 1.1706, + "step": 40055 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803261122330495e-05, + "loss": 0.8298, + "step": 40056 + }, + { + "epoch": 0.04, + "learning_rate": 4.980325128736828e-05, + "loss": 1.5489, + "step": 40057 + }, + { + "epoch": 0.04, + "learning_rate": 4.980324145216122e-05, + "loss": 1.0558, + "step": 40058 + }, + { + "epoch": 0.04, + "learning_rate": 4.98032316167093e-05, + "loss": 0.866, + "step": 40059 + }, + { + "epoch": 0.04, + "learning_rate": 4.980322178101255e-05, + "loss": 1.0489, + "step": 40060 + }, + { + "epoch": 0.04, + "learning_rate": 4.980321194507094e-05, + "loss": 1.2543, + "step": 40061 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803202108884496e-05, + "loss": 1.102, + "step": 40062 + }, + { + "epoch": 0.04, + "learning_rate": 4.980319227245319e-05, + "loss": 0.9019, + "step": 40063 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803182435777054e-05, + "loss": 0.8962, + "step": 40064 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803172598856056e-05, + "loss": 0.8035, + "step": 40065 + }, + { + "epoch": 0.04, + "learning_rate": 4.980316276169022e-05, + "loss": 1.1187, + "step": 40066 + }, + { + "epoch": 0.04, + "learning_rate": 4.980315292427953e-05, + "loss": 0.8389, + "step": 40067 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803143086624e-05, + "loss": 1.0741, + "step": 40068 + }, + { + "epoch": 0.04, + "learning_rate": 4.980313324872363e-05, + "loss": 1.0117, + "step": 40069 + }, + { + "epoch": 0.04, + "learning_rate": 4.98031234105784e-05, + "loss": 1.0591, + "step": 40070 + }, + { + "epoch": 0.04, + "learning_rate": 4.980311357218833e-05, + "loss": 0.9564, + "step": 40071 + }, + { + "epoch": 0.04, + "learning_rate": 4.980310373355342e-05, + "loss": 0.5827, + "step": 40072 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803093894673656e-05, + "loss": 1.1534, + "step": 40073 + }, + { + "epoch": 0.04, + "learning_rate": 4.980308405554905e-05, + "loss": 0.7235, + "step": 40074 + }, + { + "epoch": 0.04, + "learning_rate": 4.98030742161796e-05, + "loss": 0.4214, + "step": 40075 + }, + { + "epoch": 0.04, + "learning_rate": 4.98030643765653e-05, + "loss": 0.5204, + "step": 40076 + }, + { + "epoch": 0.04, + "learning_rate": 4.980305453670616e-05, + "loss": 0.663, + "step": 40077 + }, + { + "epoch": 0.04, + "learning_rate": 4.980304469660218e-05, + "loss": 0.6855, + "step": 40078 + }, + { + "epoch": 0.04, + "learning_rate": 4.980303485625334e-05, + "loss": 1.1267, + "step": 40079 + }, + { + "epoch": 0.04, + "learning_rate": 4.980302501565966e-05, + "loss": 1.0533, + "step": 40080 + }, + { + "epoch": 0.04, + "learning_rate": 4.9803015174821134e-05, + "loss": 1.3505, + "step": 40081 + }, + { + "epoch": 0.04, + "learning_rate": 4.980300533373777e-05, + "loss": 1.1863, + "step": 40082 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802995492409557e-05, + "loss": 1.0581, + "step": 40083 + }, + { + "epoch": 0.04, + "learning_rate": 4.98029856508365e-05, + "loss": 0.6891, + "step": 40084 + }, + { + "epoch": 0.04, + "learning_rate": 4.980297580901859e-05, + "loss": 1.1461, + "step": 40085 + }, + { + "epoch": 0.04, + "learning_rate": 4.980296596695585e-05, + "loss": 1.0273, + "step": 40086 + }, + { + "epoch": 0.04, + "learning_rate": 4.980295612464826e-05, + "loss": 1.0165, + "step": 40087 + }, + { + "epoch": 0.04, + "learning_rate": 4.980294628209582e-05, + "loss": 1.0943, + "step": 40088 + }, + { + "epoch": 0.04, + "learning_rate": 4.980293643929854e-05, + "loss": 1.0806, + "step": 40089 + }, + { + "epoch": 0.04, + "learning_rate": 4.980292659625642e-05, + "loss": 0.5791, + "step": 40090 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802916752969445e-05, + "loss": 0.8668, + "step": 40091 + }, + { + "epoch": 0.04, + "learning_rate": 4.980290690943763e-05, + "loss": 0.8294, + "step": 40092 + }, + { + "epoch": 0.04, + "learning_rate": 4.980289706566098e-05, + "loss": 0.6656, + "step": 40093 + }, + { + "epoch": 0.04, + "learning_rate": 4.980288722163948e-05, + "loss": 0.5729, + "step": 40094 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802877377373135e-05, + "loss": 0.6142, + "step": 40095 + }, + { + "epoch": 0.04, + "learning_rate": 4.980286753286195e-05, + "loss": 1.0247, + "step": 40096 + }, + { + "epoch": 0.04, + "learning_rate": 4.980285768810592e-05, + "loss": 0.7716, + "step": 40097 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802847843105044e-05, + "loss": 1.0167, + "step": 40098 + }, + { + "epoch": 0.04, + "learning_rate": 4.980283799785933e-05, + "loss": 1.02, + "step": 40099 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802828152368766e-05, + "loss": 1.0109, + "step": 40100 + }, + { + "epoch": 0.04, + "learning_rate": 4.980281830663337e-05, + "loss": 1.0328, + "step": 40101 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802808460653114e-05, + "loss": 1.1585, + "step": 40102 + }, + { + "epoch": 0.04, + "learning_rate": 4.980279861442803e-05, + "loss": 1.1695, + "step": 40103 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802788767958096e-05, + "loss": 0.2007, + "step": 40104 + }, + { + "epoch": 0.04, + "learning_rate": 4.980277892124332e-05, + "loss": 0.1856, + "step": 40105 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802769074283704e-05, + "loss": 1.1141, + "step": 40106 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802759227079245e-05, + "loss": 0.9583, + "step": 40107 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802749379629946e-05, + "loss": 0.9166, + "step": 40108 + }, + { + "epoch": 0.04, + "learning_rate": 4.980273953193579e-05, + "loss": 1.0047, + "step": 40109 + }, + { + "epoch": 0.04, + "learning_rate": 4.980272968399681e-05, + "loss": 1.1154, + "step": 40110 + }, + { + "epoch": 0.04, + "learning_rate": 4.980271983581298e-05, + "loss": 0.9844, + "step": 40111 + }, + { + "epoch": 0.04, + "learning_rate": 4.980270998738431e-05, + "loss": 1.0784, + "step": 40112 + }, + { + "epoch": 0.04, + "learning_rate": 4.98027001387108e-05, + "loss": 1.011, + "step": 40113 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802690289792444e-05, + "loss": 1.1918, + "step": 40114 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802680440629245e-05, + "loss": 0.8127, + "step": 40115 + }, + { + "epoch": 0.04, + "learning_rate": 4.980267059122121e-05, + "loss": 0.7379, + "step": 40116 + }, + { + "epoch": 0.04, + "learning_rate": 4.980266074156833e-05, + "loss": 0.9316, + "step": 40117 + }, + { + "epoch": 0.04, + "learning_rate": 4.980265089167061e-05, + "loss": 0.986, + "step": 40118 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802641041528056e-05, + "loss": 0.8, + "step": 40119 + }, + { + "epoch": 0.04, + "learning_rate": 4.980263119114065e-05, + "loss": 0.8796, + "step": 40120 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802621340508404e-05, + "loss": 0.9183, + "step": 40121 + }, + { + "epoch": 0.04, + "learning_rate": 4.980261148963132e-05, + "loss": 1.052, + "step": 40122 + }, + { + "epoch": 0.04, + "learning_rate": 4.980260163850939e-05, + "loss": 0.9185, + "step": 40123 + }, + { + "epoch": 0.04, + "learning_rate": 4.980259178714263e-05, + "loss": 1.1649, + "step": 40124 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802581935531015e-05, + "loss": 0.9559, + "step": 40125 + }, + { + "epoch": 0.04, + "learning_rate": 4.980257208367457e-05, + "loss": 1.0646, + "step": 40126 + }, + { + "epoch": 0.04, + "learning_rate": 4.980256223157328e-05, + "loss": 0.8924, + "step": 40127 + }, + { + "epoch": 0.04, + "learning_rate": 4.980255237922715e-05, + "loss": 0.7681, + "step": 40128 + }, + { + "epoch": 0.04, + "learning_rate": 4.980254252663617e-05, + "loss": 0.9565, + "step": 40129 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802532673800375e-05, + "loss": 1.1585, + "step": 40130 + }, + { + "epoch": 0.04, + "learning_rate": 4.980252282071972e-05, + "loss": 1.2575, + "step": 40131 + }, + { + "epoch": 0.04, + "learning_rate": 4.980251296739423e-05, + "loss": 0.9026, + "step": 40132 + }, + { + "epoch": 0.04, + "learning_rate": 4.980250311382389e-05, + "loss": 1.2882, + "step": 40133 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802493260008734e-05, + "loss": 1.1957, + "step": 40134 + }, + { + "epoch": 0.04, + "learning_rate": 4.980248340594872e-05, + "loss": 0.679, + "step": 40135 + }, + { + "epoch": 0.04, + "learning_rate": 4.980247355164387e-05, + "loss": 0.8389, + "step": 40136 + }, + { + "epoch": 0.04, + "learning_rate": 4.980246369709418e-05, + "loss": 0.6929, + "step": 40137 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802453842299654e-05, + "loss": 1.0598, + "step": 40138 + }, + { + "epoch": 0.04, + "learning_rate": 4.980244398726028e-05, + "loss": 1.0423, + "step": 40139 + }, + { + "epoch": 0.04, + "learning_rate": 4.980243413197607e-05, + "loss": 0.6124, + "step": 40140 + }, + { + "epoch": 0.04, + "learning_rate": 4.980242427644703e-05, + "loss": 0.3361, + "step": 40141 + }, + { + "epoch": 0.04, + "learning_rate": 4.980241442067314e-05, + "loss": 0.9174, + "step": 40142 + }, + { + "epoch": 0.04, + "learning_rate": 4.980240456465442e-05, + "loss": 1.2573, + "step": 40143 + }, + { + "epoch": 0.04, + "learning_rate": 4.980239470839085e-05, + "loss": 1.1555, + "step": 40144 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802384851882454e-05, + "loss": 0.9096, + "step": 40145 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802374995129216e-05, + "loss": 0.8689, + "step": 40146 + }, + { + "epoch": 0.04, + "learning_rate": 4.980236513813114e-05, + "loss": 0.8914, + "step": 40147 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802355280888214e-05, + "loss": 0.3023, + "step": 40148 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802345423400456e-05, + "loss": 0.8141, + "step": 40149 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802335565667865e-05, + "loss": 0.8951, + "step": 40150 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802325707690435e-05, + "loss": 1.0116, + "step": 40151 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802315849468164e-05, + "loss": 0.622, + "step": 40152 + }, + { + "epoch": 0.04, + "learning_rate": 4.980230599100105e-05, + "loss": 0.9495, + "step": 40153 + }, + { + "epoch": 0.04, + "learning_rate": 4.98022961322891e-05, + "loss": 0.8977, + "step": 40154 + }, + { + "epoch": 0.04, + "learning_rate": 4.980228627333232e-05, + "loss": 0.8625, + "step": 40155 + }, + { + "epoch": 0.04, + "learning_rate": 4.98022764141307e-05, + "loss": 1.3008, + "step": 40156 + }, + { + "epoch": 0.04, + "learning_rate": 4.980226655468424e-05, + "loss": 1.2325, + "step": 40157 + }, + { + "epoch": 0.04, + "learning_rate": 4.980225669499294e-05, + "loss": 1.0094, + "step": 40158 + }, + { + "epoch": 0.04, + "learning_rate": 4.980224683505681e-05, + "loss": 0.9082, + "step": 40159 + }, + { + "epoch": 0.04, + "learning_rate": 4.980223697487584e-05, + "loss": 1.0506, + "step": 40160 + }, + { + "epoch": 0.04, + "learning_rate": 4.980222711445003e-05, + "loss": 0.9291, + "step": 40161 + }, + { + "epoch": 0.04, + "learning_rate": 4.980221725377938e-05, + "loss": 1.2304, + "step": 40162 + }, + { + "epoch": 0.04, + "learning_rate": 4.98022073928639e-05, + "loss": 1.1269, + "step": 40163 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802197531703575e-05, + "loss": 1.0724, + "step": 40164 + }, + { + "epoch": 0.04, + "learning_rate": 4.980218767029842e-05, + "loss": 0.9201, + "step": 40165 + }, + { + "epoch": 0.04, + "learning_rate": 4.980217780864842e-05, + "loss": 0.9911, + "step": 40166 + }, + { + "epoch": 0.04, + "learning_rate": 4.980216794675359e-05, + "loss": 1.0427, + "step": 40167 + }, + { + "epoch": 0.04, + "learning_rate": 4.980215808461392e-05, + "loss": 0.8543, + "step": 40168 + }, + { + "epoch": 0.04, + "learning_rate": 4.980214822222942e-05, + "loss": 1.3675, + "step": 40169 + }, + { + "epoch": 0.04, + "learning_rate": 4.980213835960008e-05, + "loss": 0.901, + "step": 40170 + }, + { + "epoch": 0.04, + "learning_rate": 4.980212849672591e-05, + "loss": 0.9215, + "step": 40171 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802118633606894e-05, + "loss": 1.0075, + "step": 40172 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802108770243045e-05, + "loss": 0.9186, + "step": 40173 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802098906634356e-05, + "loss": 0.93, + "step": 40174 + }, + { + "epoch": 0.04, + "learning_rate": 4.980208904278084e-05, + "loss": 1.2567, + "step": 40175 + }, + { + "epoch": 0.04, + "learning_rate": 4.980207917868248e-05, + "loss": 1.186, + "step": 40176 + }, + { + "epoch": 0.04, + "learning_rate": 4.980206931433929e-05, + "loss": 1.2373, + "step": 40177 + }, + { + "epoch": 0.04, + "learning_rate": 4.980205944975126e-05, + "loss": 1.1856, + "step": 40178 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802049584918394e-05, + "loss": 1.1495, + "step": 40179 + }, + { + "epoch": 0.04, + "learning_rate": 4.98020397198407e-05, + "loss": 1.2619, + "step": 40180 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802029854518165e-05, + "loss": 0.9719, + "step": 40181 + }, + { + "epoch": 0.04, + "learning_rate": 4.98020199889508e-05, + "loss": 0.9717, + "step": 40182 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802010123138596e-05, + "loss": 1.1512, + "step": 40183 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802000257081556e-05, + "loss": 0.8778, + "step": 40184 + }, + { + "epoch": 0.04, + "learning_rate": 4.980199039077968e-05, + "loss": 0.8933, + "step": 40185 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801980524232974e-05, + "loss": 1.3386, + "step": 40186 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801970657441434e-05, + "loss": 0.9141, + "step": 40187 + }, + { + "epoch": 0.04, + "learning_rate": 4.980196079040505e-05, + "loss": 1.013, + "step": 40188 + }, + { + "epoch": 0.04, + "learning_rate": 4.980195092312384e-05, + "loss": 0.8862, + "step": 40189 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801941055597786e-05, + "loss": 0.9935, + "step": 40190 + }, + { + "epoch": 0.04, + "learning_rate": 4.980193118782691e-05, + "loss": 0.8461, + "step": 40191 + }, + { + "epoch": 0.04, + "learning_rate": 4.980192131981119e-05, + "loss": 0.7871, + "step": 40192 + }, + { + "epoch": 0.04, + "learning_rate": 4.980191145155065e-05, + "loss": 1.0288, + "step": 40193 + }, + { + "epoch": 0.04, + "learning_rate": 4.980190158304526e-05, + "loss": 1.2097, + "step": 40194 + }, + { + "epoch": 0.04, + "learning_rate": 4.980189171429505e-05, + "loss": 1.0057, + "step": 40195 + }, + { + "epoch": 0.04, + "learning_rate": 4.980188184529999e-05, + "loss": 0.9988, + "step": 40196 + }, + { + "epoch": 0.04, + "learning_rate": 4.980187197606011e-05, + "loss": 1.1743, + "step": 40197 + }, + { + "epoch": 0.04, + "learning_rate": 4.980186210657539e-05, + "loss": 1.2058, + "step": 40198 + }, + { + "epoch": 0.04, + "learning_rate": 4.980185223684584e-05, + "loss": 1.0773, + "step": 40199 + }, + { + "epoch": 0.04, + "learning_rate": 4.980184236687145e-05, + "loss": 1.2282, + "step": 40200 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801832496652235e-05, + "loss": 0.8915, + "step": 40201 + }, + { + "epoch": 0.04, + "learning_rate": 4.980182262618818e-05, + "loss": 1.1978, + "step": 40202 + }, + { + "epoch": 0.04, + "learning_rate": 4.980181275547929e-05, + "loss": 1.1206, + "step": 40203 + }, + { + "epoch": 0.04, + "learning_rate": 4.980180288452558e-05, + "loss": 0.9227, + "step": 40204 + }, + { + "epoch": 0.04, + "learning_rate": 4.980179301332702e-05, + "loss": 0.8176, + "step": 40205 + }, + { + "epoch": 0.04, + "learning_rate": 4.980178314188364e-05, + "loss": 1.083, + "step": 40206 + }, + { + "epoch": 0.04, + "learning_rate": 4.980177327019543e-05, + "loss": 1.114, + "step": 40207 + }, + { + "epoch": 0.04, + "learning_rate": 4.980176339826238e-05, + "loss": 0.8839, + "step": 40208 + }, + { + "epoch": 0.04, + "learning_rate": 4.980175352608449e-05, + "loss": 1.0438, + "step": 40209 + }, + { + "epoch": 0.04, + "learning_rate": 4.980174365366178e-05, + "loss": 0.5715, + "step": 40210 + }, + { + "epoch": 0.04, + "learning_rate": 4.980173378099423e-05, + "loss": 1.7546, + "step": 40211 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801723908081855e-05, + "loss": 0.8007, + "step": 40212 + }, + { + "epoch": 0.04, + "learning_rate": 4.980171403492464e-05, + "loss": 0.7488, + "step": 40213 + }, + { + "epoch": 0.04, + "learning_rate": 4.98017041615226e-05, + "loss": 1.1464, + "step": 40214 + }, + { + "epoch": 0.04, + "learning_rate": 4.980169428787572e-05, + "loss": 0.8603, + "step": 40215 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801684413984016e-05, + "loss": 1.2161, + "step": 40216 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801674539847474e-05, + "loss": 1.1111, + "step": 40217 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801664665466105e-05, + "loss": 1.0981, + "step": 40218 + }, + { + "epoch": 0.04, + "learning_rate": 4.98016547908399e-05, + "loss": 1.1458, + "step": 40219 + }, + { + "epoch": 0.04, + "learning_rate": 4.980164491596887e-05, + "loss": 1.1462, + "step": 40220 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801635040853006e-05, + "loss": 1.4483, + "step": 40221 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801625165492304e-05, + "loss": 1.0551, + "step": 40222 + }, + { + "epoch": 0.04, + "learning_rate": 4.980161528988678e-05, + "loss": 1.0357, + "step": 40223 + }, + { + "epoch": 0.04, + "learning_rate": 4.980160541403642e-05, + "loss": 1.1434, + "step": 40224 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801595537941234e-05, + "loss": 1.1042, + "step": 40225 + }, + { + "epoch": 0.04, + "learning_rate": 4.980158566160121e-05, + "loss": 1.0783, + "step": 40226 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801575785016365e-05, + "loss": 0.8386, + "step": 40227 + }, + { + "epoch": 0.04, + "learning_rate": 4.980156590818668e-05, + "loss": 0.655, + "step": 40228 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801556031112165e-05, + "loss": 0.9964, + "step": 40229 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801546153792825e-05, + "loss": 0.7931, + "step": 40230 + }, + { + "epoch": 0.04, + "learning_rate": 4.980153627622865e-05, + "loss": 0.5834, + "step": 40231 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801526398419645e-05, + "loss": 1.0704, + "step": 40232 + }, + { + "epoch": 0.04, + "learning_rate": 4.980151652036581e-05, + "loss": 1.0943, + "step": 40233 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801506642067146e-05, + "loss": 0.7844, + "step": 40234 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801496763523654e-05, + "loss": 0.8188, + "step": 40235 + }, + { + "epoch": 0.04, + "learning_rate": 4.980148688473533e-05, + "loss": 1.0977, + "step": 40236 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801477005702176e-05, + "loss": 0.681, + "step": 40237 + }, + { + "epoch": 0.04, + "learning_rate": 4.980146712642419e-05, + "loss": 0.8482, + "step": 40238 + }, + { + "epoch": 0.04, + "learning_rate": 4.980145724690137e-05, + "loss": 1.202, + "step": 40239 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801447367133735e-05, + "loss": 1.2455, + "step": 40240 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801437487121264e-05, + "loss": 0.9425, + "step": 40241 + }, + { + "epoch": 0.04, + "learning_rate": 4.980142760686396e-05, + "loss": 0.6116, + "step": 40242 + }, + { + "epoch": 0.04, + "learning_rate": 4.980141772636183e-05, + "loss": 1.1579, + "step": 40243 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801407845614865e-05, + "loss": 1.0084, + "step": 40244 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801397964623074e-05, + "loss": 0.8425, + "step": 40245 + }, + { + "epoch": 0.04, + "learning_rate": 4.980138808338646e-05, + "loss": 1.0874, + "step": 40246 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801378201905015e-05, + "loss": 1.0933, + "step": 40247 + }, + { + "epoch": 0.04, + "learning_rate": 4.980136832017874e-05, + "loss": 1.0146, + "step": 40248 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801358438207636e-05, + "loss": 0.9702, + "step": 40249 + }, + { + "epoch": 0.04, + "learning_rate": 4.98013485559917e-05, + "loss": 0.9263, + "step": 40250 + }, + { + "epoch": 0.04, + "learning_rate": 4.980133867353094e-05, + "loss": 1.0773, + "step": 40251 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801328790825356e-05, + "loss": 1.0713, + "step": 40252 + }, + { + "epoch": 0.04, + "learning_rate": 4.980131890787493e-05, + "loss": 0.8643, + "step": 40253 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801309024679686e-05, + "loss": 1.1966, + "step": 40254 + }, + { + "epoch": 0.04, + "learning_rate": 4.980129914123961e-05, + "loss": 1.1583, + "step": 40255 + }, + { + "epoch": 0.04, + "learning_rate": 4.98012892575547e-05, + "loss": 0.3594, + "step": 40256 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801279373624976e-05, + "loss": 0.3692, + "step": 40257 + }, + { + "epoch": 0.04, + "learning_rate": 4.980126948945042e-05, + "loss": 0.3091, + "step": 40258 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801259605031035e-05, + "loss": 0.3099, + "step": 40259 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801249720366815e-05, + "loss": 0.2864, + "step": 40260 + }, + { + "epoch": 0.04, + "learning_rate": 4.980123983545778e-05, + "loss": 0.8747, + "step": 40261 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801229950303915e-05, + "loss": 1.2832, + "step": 40262 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801220064905216e-05, + "loss": 1.0106, + "step": 40263 + }, + { + "epoch": 0.04, + "learning_rate": 4.980121017926169e-05, + "loss": 1.1235, + "step": 40264 + }, + { + "epoch": 0.04, + "learning_rate": 4.980120029337334e-05, + "loss": 0.9057, + "step": 40265 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801190407240166e-05, + "loss": 0.9119, + "step": 40266 + }, + { + "epoch": 0.04, + "learning_rate": 4.980118052086217e-05, + "loss": 1.0879, + "step": 40267 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801170634239336e-05, + "loss": 0.7549, + "step": 40268 + }, + { + "epoch": 0.04, + "learning_rate": 4.980116074737168e-05, + "loss": 0.9418, + "step": 40269 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801150860259194e-05, + "loss": 1.2752, + "step": 40270 + }, + { + "epoch": 0.04, + "learning_rate": 4.980114097290188e-05, + "loss": 1.2406, + "step": 40271 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801131085299746e-05, + "loss": 1.1016, + "step": 40272 + }, + { + "epoch": 0.04, + "learning_rate": 4.980112119745278e-05, + "loss": 1.14, + "step": 40273 + }, + { + "epoch": 0.04, + "learning_rate": 4.980111130936099e-05, + "loss": 1.1683, + "step": 40274 + }, + { + "epoch": 0.04, + "learning_rate": 4.980110142102438e-05, + "loss": 1.1489, + "step": 40275 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801091532442934e-05, + "loss": 1.3125, + "step": 40276 + }, + { + "epoch": 0.04, + "learning_rate": 4.980108164361667e-05, + "loss": 1.4885, + "step": 40277 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801071754545576e-05, + "loss": 1.2222, + "step": 40278 + }, + { + "epoch": 0.04, + "learning_rate": 4.980106186522966e-05, + "loss": 1.3618, + "step": 40279 + }, + { + "epoch": 0.04, + "learning_rate": 4.980105197566891e-05, + "loss": 1.348, + "step": 40280 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801042085863345e-05, + "loss": 1.324, + "step": 40281 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801032195812944e-05, + "loss": 1.0962, + "step": 40282 + }, + { + "epoch": 0.04, + "learning_rate": 4.980102230551773e-05, + "loss": 0.9156, + "step": 40283 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801012414977676e-05, + "loss": 0.9025, + "step": 40284 + }, + { + "epoch": 0.04, + "learning_rate": 4.980100252419281e-05, + "loss": 1.0756, + "step": 40285 + }, + { + "epoch": 0.04, + "learning_rate": 4.980099263316311e-05, + "loss": 1.2162, + "step": 40286 + }, + { + "epoch": 0.04, + "learning_rate": 4.980098274188859e-05, + "loss": 0.7657, + "step": 40287 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800972850369244e-05, + "loss": 0.346, + "step": 40288 + }, + { + "epoch": 0.04, + "learning_rate": 4.980096295860508e-05, + "loss": 0.9511, + "step": 40289 + }, + { + "epoch": 0.04, + "learning_rate": 4.980095306659608e-05, + "loss": 1.0045, + "step": 40290 + }, + { + "epoch": 0.04, + "learning_rate": 4.980094317434226e-05, + "loss": 1.0135, + "step": 40291 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800933281843616e-05, + "loss": 1.0791, + "step": 40292 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800923389100145e-05, + "loss": 1.1477, + "step": 40293 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800913496111854e-05, + "loss": 0.7018, + "step": 40294 + }, + { + "epoch": 0.04, + "learning_rate": 4.980090360287874e-05, + "loss": 0.6978, + "step": 40295 + }, + { + "epoch": 0.04, + "learning_rate": 4.98008937094008e-05, + "loss": 1.1565, + "step": 40296 + }, + { + "epoch": 0.04, + "learning_rate": 4.980088381567804e-05, + "loss": 1.1785, + "step": 40297 + }, + { + "epoch": 0.04, + "learning_rate": 4.980087392171045e-05, + "loss": 0.9073, + "step": 40298 + }, + { + "epoch": 0.04, + "learning_rate": 4.980086402749803e-05, + "loss": 1.0832, + "step": 40299 + }, + { + "epoch": 0.04, + "learning_rate": 4.980085413304079e-05, + "loss": 1.4058, + "step": 40300 + }, + { + "epoch": 0.04, + "learning_rate": 4.980084423833873e-05, + "loss": 1.134, + "step": 40301 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800834343391845e-05, + "loss": 0.7869, + "step": 40302 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800824448200144e-05, + "loss": 0.704, + "step": 40303 + }, + { + "epoch": 0.04, + "learning_rate": 4.980081455276361e-05, + "loss": 0.7443, + "step": 40304 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800804657082255e-05, + "loss": 0.5222, + "step": 40305 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800794761156074e-05, + "loss": 1.124, + "step": 40306 + }, + { + "epoch": 0.04, + "learning_rate": 4.980078486498508e-05, + "loss": 1.0752, + "step": 40307 + }, + { + "epoch": 0.04, + "learning_rate": 4.980077496856926e-05, + "loss": 0.9497, + "step": 40308 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800765071908615e-05, + "loss": 1.2164, + "step": 40309 + }, + { + "epoch": 0.04, + "learning_rate": 4.980075517500314e-05, + "loss": 1.1242, + "step": 40310 + }, + { + "epoch": 0.04, + "learning_rate": 4.980074527785286e-05, + "loss": 0.9608, + "step": 40311 + }, + { + "epoch": 0.04, + "learning_rate": 4.980073538045774e-05, + "loss": 1.2742, + "step": 40312 + }, + { + "epoch": 0.04, + "learning_rate": 4.980072548281781e-05, + "loss": 1.3697, + "step": 40313 + }, + { + "epoch": 0.04, + "learning_rate": 4.980071558493305e-05, + "loss": 1.0308, + "step": 40314 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800705686803465e-05, + "loss": 0.7338, + "step": 40315 + }, + { + "epoch": 0.04, + "learning_rate": 4.980069578842906e-05, + "loss": 0.8847, + "step": 40316 + }, + { + "epoch": 0.04, + "learning_rate": 4.980068588980984e-05, + "loss": 0.7842, + "step": 40317 + }, + { + "epoch": 0.04, + "learning_rate": 4.98006759909458e-05, + "loss": 1.1488, + "step": 40318 + }, + { + "epoch": 0.04, + "learning_rate": 4.980066609183693e-05, + "loss": 1.1526, + "step": 40319 + }, + { + "epoch": 0.04, + "learning_rate": 4.980065619248324e-05, + "loss": 1.0138, + "step": 40320 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800646292884726e-05, + "loss": 0.9225, + "step": 40321 + }, + { + "epoch": 0.04, + "learning_rate": 4.980063639304139e-05, + "loss": 0.9303, + "step": 40322 + }, + { + "epoch": 0.04, + "learning_rate": 4.980062649295324e-05, + "loss": 1.0293, + "step": 40323 + }, + { + "epoch": 0.04, + "learning_rate": 4.980061659262026e-05, + "loss": 1.1079, + "step": 40324 + }, + { + "epoch": 0.04, + "learning_rate": 4.980060669204247e-05, + "loss": 1.171, + "step": 40325 + }, + { + "epoch": 0.04, + "learning_rate": 4.980059679121985e-05, + "loss": 1.3404, + "step": 40326 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800586890152414e-05, + "loss": 1.1918, + "step": 40327 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800576988840154e-05, + "loss": 0.9194, + "step": 40328 + }, + { + "epoch": 0.04, + "learning_rate": 4.980056708728307e-05, + "loss": 0.9174, + "step": 40329 + }, + { + "epoch": 0.04, + "learning_rate": 4.980055718548117e-05, + "loss": 1.8803, + "step": 40330 + }, + { + "epoch": 0.04, + "learning_rate": 4.980054728343444e-05, + "loss": 1.1949, + "step": 40331 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800537381142905e-05, + "loss": 1.2113, + "step": 40332 + }, + { + "epoch": 0.04, + "learning_rate": 4.980052747860654e-05, + "loss": 0.9933, + "step": 40333 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800517575825356e-05, + "loss": 0.9518, + "step": 40334 + }, + { + "epoch": 0.04, + "learning_rate": 4.980050767279936e-05, + "loss": 0.5709, + "step": 40335 + }, + { + "epoch": 0.04, + "learning_rate": 4.980049776952853e-05, + "loss": 1.0836, + "step": 40336 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800487866012885e-05, + "loss": 0.8889, + "step": 40337 + }, + { + "epoch": 0.04, + "learning_rate": 4.980047796225242e-05, + "loss": 0.5545, + "step": 40338 + }, + { + "epoch": 0.04, + "learning_rate": 4.980046805824714e-05, + "loss": 1.0021, + "step": 40339 + }, + { + "epoch": 0.04, + "learning_rate": 4.980045815399703e-05, + "loss": 0.7766, + "step": 40340 + }, + { + "epoch": 0.04, + "learning_rate": 4.980044824950211e-05, + "loss": 0.6365, + "step": 40341 + }, + { + "epoch": 0.04, + "learning_rate": 4.980043834476237e-05, + "loss": 0.8995, + "step": 40342 + }, + { + "epoch": 0.04, + "learning_rate": 4.98004284397778e-05, + "loss": 1.1682, + "step": 40343 + }, + { + "epoch": 0.04, + "learning_rate": 4.980041853454842e-05, + "loss": 1.2797, + "step": 40344 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800408629074226e-05, + "loss": 1.1498, + "step": 40345 + }, + { + "epoch": 0.04, + "learning_rate": 4.98003987233552e-05, + "loss": 1.1213, + "step": 40346 + }, + { + "epoch": 0.04, + "learning_rate": 4.980038881739136e-05, + "loss": 0.7712, + "step": 40347 + }, + { + "epoch": 0.04, + "learning_rate": 4.980037891118271e-05, + "loss": 1.1566, + "step": 40348 + }, + { + "epoch": 0.04, + "learning_rate": 4.980036900472923e-05, + "loss": 0.8583, + "step": 40349 + }, + { + "epoch": 0.04, + "learning_rate": 4.980035909803094e-05, + "loss": 0.8445, + "step": 40350 + }, + { + "epoch": 0.04, + "learning_rate": 4.980034919108781e-05, + "loss": 0.639, + "step": 40351 + }, + { + "epoch": 0.04, + "learning_rate": 4.980033928389989e-05, + "loss": 0.7378, + "step": 40352 + }, + { + "epoch": 0.04, + "learning_rate": 4.980032937646714e-05, + "loss": 0.6765, + "step": 40353 + }, + { + "epoch": 0.04, + "learning_rate": 4.980031946878956e-05, + "loss": 0.9341, + "step": 40354 + }, + { + "epoch": 0.04, + "learning_rate": 4.980030956086718e-05, + "loss": 0.9038, + "step": 40355 + }, + { + "epoch": 0.04, + "learning_rate": 4.980029965269997e-05, + "loss": 0.7714, + "step": 40356 + }, + { + "epoch": 0.04, + "learning_rate": 4.980028974428794e-05, + "loss": 1.2605, + "step": 40357 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800279835631105e-05, + "loss": 1.0448, + "step": 40358 + }, + { + "epoch": 0.04, + "learning_rate": 4.980026992672944e-05, + "loss": 0.8777, + "step": 40359 + }, + { + "epoch": 0.04, + "learning_rate": 4.980026001758296e-05, + "loss": 0.8664, + "step": 40360 + }, + { + "epoch": 0.04, + "learning_rate": 4.980025010819166e-05, + "loss": 1.0218, + "step": 40361 + }, + { + "epoch": 0.04, + "learning_rate": 4.980024019855555e-05, + "loss": 1.0082, + "step": 40362 + }, + { + "epoch": 0.04, + "learning_rate": 4.980023028867462e-05, + "loss": 1.1168, + "step": 40363 + }, + { + "epoch": 0.04, + "learning_rate": 4.980022037854887e-05, + "loss": 1.2178, + "step": 40364 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800210468178304e-05, + "loss": 0.904, + "step": 40365 + }, + { + "epoch": 0.04, + "learning_rate": 4.980020055756292e-05, + "loss": 1.2466, + "step": 40366 + }, + { + "epoch": 0.04, + "learning_rate": 4.980019064670272e-05, + "loss": 1.0779, + "step": 40367 + }, + { + "epoch": 0.04, + "learning_rate": 4.98001807355977e-05, + "loss": 1.0573, + "step": 40368 + }, + { + "epoch": 0.04, + "learning_rate": 4.980017082424787e-05, + "loss": 1.2056, + "step": 40369 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800160912653215e-05, + "loss": 0.9002, + "step": 40370 + }, + { + "epoch": 0.04, + "learning_rate": 4.980015100081375e-05, + "loss": 0.7154, + "step": 40371 + }, + { + "epoch": 0.04, + "learning_rate": 4.980014108872947e-05, + "loss": 0.9177, + "step": 40372 + }, + { + "epoch": 0.04, + "learning_rate": 4.980013117640037e-05, + "loss": 0.8781, + "step": 40373 + }, + { + "epoch": 0.04, + "learning_rate": 4.980012126382645e-05, + "loss": 1.0062, + "step": 40374 + }, + { + "epoch": 0.04, + "learning_rate": 4.980011135100771e-05, + "loss": 1.2545, + "step": 40375 + }, + { + "epoch": 0.04, + "learning_rate": 4.980010143794416e-05, + "loss": 1.4403, + "step": 40376 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800091524635795e-05, + "loss": 1.2413, + "step": 40377 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800081611082615e-05, + "loss": 1.0187, + "step": 40378 + }, + { + "epoch": 0.04, + "learning_rate": 4.980007169728461e-05, + "loss": 1.1179, + "step": 40379 + }, + { + "epoch": 0.04, + "learning_rate": 4.98000617832418e-05, + "loss": 1.3806, + "step": 40380 + }, + { + "epoch": 0.04, + "learning_rate": 4.980005186895417e-05, + "loss": 1.0002, + "step": 40381 + }, + { + "epoch": 0.04, + "learning_rate": 4.9800041954421725e-05, + "loss": 0.7196, + "step": 40382 + }, + { + "epoch": 0.04, + "learning_rate": 4.980003203964446e-05, + "loss": 0.7002, + "step": 40383 + }, + { + "epoch": 0.04, + "learning_rate": 4.980002212462238e-05, + "loss": 0.9976, + "step": 40384 + }, + { + "epoch": 0.04, + "learning_rate": 4.980001220935549e-05, + "loss": 1.2453, + "step": 40385 + }, + { + "epoch": 0.04, + "learning_rate": 4.980000229384379e-05, + "loss": 1.298, + "step": 40386 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799992378087267e-05, + "loss": 0.9038, + "step": 40387 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799982462085924e-05, + "loss": 0.2244, + "step": 40388 + }, + { + "epoch": 0.04, + "learning_rate": 4.979997254583978e-05, + "loss": 0.1102, + "step": 40389 + }, + { + "epoch": 0.04, + "learning_rate": 4.979996262934881e-05, + "loss": 0.128, + "step": 40390 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799952712613026e-05, + "loss": 0.3284, + "step": 40391 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799942795632425e-05, + "loss": 0.9555, + "step": 40392 + }, + { + "epoch": 0.04, + "learning_rate": 4.979993287840702e-05, + "loss": 0.8643, + "step": 40393 + }, + { + "epoch": 0.04, + "learning_rate": 4.979992296093679e-05, + "loss": 1.0874, + "step": 40394 + }, + { + "epoch": 0.04, + "learning_rate": 4.979991304322175e-05, + "loss": 1.2404, + "step": 40395 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799903125261894e-05, + "loss": 0.8376, + "step": 40396 + }, + { + "epoch": 0.04, + "learning_rate": 4.979989320705722e-05, + "loss": 1.0915, + "step": 40397 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799883288607745e-05, + "loss": 1.096, + "step": 40398 + }, + { + "epoch": 0.04, + "learning_rate": 4.979987336991345e-05, + "loss": 1.199, + "step": 40399 + }, + { + "epoch": 0.04, + "learning_rate": 4.979986345097434e-05, + "loss": 1.1232, + "step": 40400 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799853531790414e-05, + "loss": 1.208, + "step": 40401 + }, + { + "epoch": 0.04, + "learning_rate": 4.979984361236167e-05, + "loss": 1.1252, + "step": 40402 + }, + { + "epoch": 0.04, + "learning_rate": 4.979983369268812e-05, + "loss": 0.9586, + "step": 40403 + }, + { + "epoch": 0.04, + "learning_rate": 4.979982377276976e-05, + "loss": 1.1601, + "step": 40404 + }, + { + "epoch": 0.04, + "learning_rate": 4.979981385260658e-05, + "loss": 0.8218, + "step": 40405 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799803932198585e-05, + "loss": 1.3614, + "step": 40406 + }, + { + "epoch": 0.04, + "learning_rate": 4.979979401154578e-05, + "loss": 0.9328, + "step": 40407 + }, + { + "epoch": 0.04, + "learning_rate": 4.979978409064816e-05, + "loss": 0.8974, + "step": 40408 + }, + { + "epoch": 0.04, + "learning_rate": 4.979977416950573e-05, + "loss": 1.2297, + "step": 40409 + }, + { + "epoch": 0.04, + "learning_rate": 4.979976424811849e-05, + "loss": 0.8565, + "step": 40410 + }, + { + "epoch": 0.04, + "learning_rate": 4.979975432648643e-05, + "loss": 1.0016, + "step": 40411 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799744404609564e-05, + "loss": 1.1097, + "step": 40412 + }, + { + "epoch": 0.04, + "learning_rate": 4.979973448248787e-05, + "loss": 1.0564, + "step": 40413 + }, + { + "epoch": 0.04, + "learning_rate": 4.979972456012138e-05, + "loss": 1.2441, + "step": 40414 + }, + { + "epoch": 0.04, + "learning_rate": 4.979971463751007e-05, + "loss": 1.1635, + "step": 40415 + }, + { + "epoch": 0.04, + "learning_rate": 4.979970471465395e-05, + "loss": 1.0216, + "step": 40416 + }, + { + "epoch": 0.04, + "learning_rate": 4.979969479155302e-05, + "loss": 1.0714, + "step": 40417 + }, + { + "epoch": 0.04, + "learning_rate": 4.979968486820727e-05, + "loss": 1.1885, + "step": 40418 + }, + { + "epoch": 0.04, + "learning_rate": 4.979967494461672e-05, + "loss": 1.1723, + "step": 40419 + }, + { + "epoch": 0.04, + "learning_rate": 4.979966502078135e-05, + "loss": 1.201, + "step": 40420 + }, + { + "epoch": 0.04, + "learning_rate": 4.979965509670117e-05, + "loss": 1.1066, + "step": 40421 + }, + { + "epoch": 0.04, + "learning_rate": 4.979964517237618e-05, + "loss": 1.0195, + "step": 40422 + }, + { + "epoch": 0.04, + "learning_rate": 4.979963524780637e-05, + "loss": 1.7522, + "step": 40423 + }, + { + "epoch": 0.04, + "learning_rate": 4.979962532299176e-05, + "loss": 1.5405, + "step": 40424 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799615397932336e-05, + "loss": 1.2195, + "step": 40425 + }, + { + "epoch": 0.04, + "learning_rate": 4.97996054726281e-05, + "loss": 1.036, + "step": 40426 + }, + { + "epoch": 0.04, + "learning_rate": 4.979959554707905e-05, + "loss": 0.9554, + "step": 40427 + }, + { + "epoch": 0.04, + "learning_rate": 4.979958562128519e-05, + "loss": 1.166, + "step": 40428 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799575695246515e-05, + "loss": 1.0108, + "step": 40429 + }, + { + "epoch": 0.04, + "learning_rate": 4.979956576896303e-05, + "loss": 0.7945, + "step": 40430 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799555842434744e-05, + "loss": 0.8358, + "step": 40431 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799545915661637e-05, + "loss": 0.968, + "step": 40432 + }, + { + "epoch": 0.04, + "learning_rate": 4.979953598864372e-05, + "loss": 1.2343, + "step": 40433 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799526061380996e-05, + "loss": 0.8089, + "step": 40434 + }, + { + "epoch": 0.04, + "learning_rate": 4.979951613387346e-05, + "loss": 1.2418, + "step": 40435 + }, + { + "epoch": 0.04, + "learning_rate": 4.979950620612112e-05, + "loss": 1.0267, + "step": 40436 + }, + { + "epoch": 0.04, + "learning_rate": 4.979949627812396e-05, + "loss": 1.0541, + "step": 40437 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799486349881994e-05, + "loss": 1.0007, + "step": 40438 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799476421395216e-05, + "loss": 1.124, + "step": 40439 + }, + { + "epoch": 0.04, + "learning_rate": 4.979946649266363e-05, + "loss": 1.1527, + "step": 40440 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799456563687236e-05, + "loss": 0.9214, + "step": 40441 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799446634466027e-05, + "loss": 0.7335, + "step": 40442 + }, + { + "epoch": 0.04, + "learning_rate": 4.979943670500001e-05, + "loss": 0.113, + "step": 40443 + }, + { + "epoch": 0.04, + "learning_rate": 4.979942677528919e-05, + "loss": 0.1031, + "step": 40444 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799416845333555e-05, + "loss": 0.137, + "step": 40445 + }, + { + "epoch": 0.04, + "learning_rate": 4.979940691513311e-05, + "loss": 0.0892, + "step": 40446 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799396984687855e-05, + "loss": 0.0601, + "step": 40447 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799387053997795e-05, + "loss": 0.5432, + "step": 40448 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799377123062916e-05, + "loss": 0.9166, + "step": 40449 + }, + { + "epoch": 0.04, + "learning_rate": 4.979936719188324e-05, + "loss": 1.3575, + "step": 40450 + }, + { + "epoch": 0.04, + "learning_rate": 4.979935726045875e-05, + "loss": 1.0591, + "step": 40451 + }, + { + "epoch": 0.04, + "learning_rate": 4.979934732878945e-05, + "loss": 1.0793, + "step": 40452 + }, + { + "epoch": 0.04, + "learning_rate": 4.979933739687535e-05, + "loss": 1.1634, + "step": 40453 + }, + { + "epoch": 0.04, + "learning_rate": 4.979932746471643e-05, + "loss": 1.2072, + "step": 40454 + }, + { + "epoch": 0.04, + "learning_rate": 4.979931753231271e-05, + "loss": 1.4106, + "step": 40455 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799307599664167e-05, + "loss": 1.0665, + "step": 40456 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799297666770825e-05, + "loss": 1.1354, + "step": 40457 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799287733632685e-05, + "loss": 1.0043, + "step": 40458 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799277800249725e-05, + "loss": 1.1989, + "step": 40459 + }, + { + "epoch": 0.04, + "learning_rate": 4.979926786662196e-05, + "loss": 0.8771, + "step": 40460 + }, + { + "epoch": 0.04, + "learning_rate": 4.979925793274938e-05, + "loss": 0.9233, + "step": 40461 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799247998632e-05, + "loss": 0.8634, + "step": 40462 + }, + { + "epoch": 0.04, + "learning_rate": 4.979923806426982e-05, + "loss": 1.248, + "step": 40463 + }, + { + "epoch": 0.04, + "learning_rate": 4.979922812966282e-05, + "loss": 0.9448, + "step": 40464 + }, + { + "epoch": 0.04, + "learning_rate": 4.979921819481102e-05, + "loss": 0.8783, + "step": 40465 + }, + { + "epoch": 0.04, + "learning_rate": 4.97992082597144e-05, + "loss": 1.0168, + "step": 40466 + }, + { + "epoch": 0.04, + "learning_rate": 4.979919832437298e-05, + "loss": 1.4717, + "step": 40467 + }, + { + "epoch": 0.04, + "learning_rate": 4.979918838878676e-05, + "loss": 1.3347, + "step": 40468 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799178452955716e-05, + "loss": 1.3556, + "step": 40469 + }, + { + "epoch": 0.04, + "learning_rate": 4.979916851687988e-05, + "loss": 0.9168, + "step": 40470 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799158580559236e-05, + "loss": 0.8621, + "step": 40471 + }, + { + "epoch": 0.04, + "learning_rate": 4.979914864399378e-05, + "loss": 1.1182, + "step": 40472 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799138707183525e-05, + "loss": 0.9528, + "step": 40473 + }, + { + "epoch": 0.04, + "learning_rate": 4.979912877012845e-05, + "loss": 1.0736, + "step": 40474 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799118832828576e-05, + "loss": 0.8429, + "step": 40475 + }, + { + "epoch": 0.04, + "learning_rate": 4.97991088952839e-05, + "loss": 1.0234, + "step": 40476 + }, + { + "epoch": 0.04, + "learning_rate": 4.979909895749441e-05, + "loss": 1.0059, + "step": 40477 + }, + { + "epoch": 0.04, + "learning_rate": 4.979908901946012e-05, + "loss": 0.7953, + "step": 40478 + }, + { + "epoch": 0.04, + "learning_rate": 4.979907908118102e-05, + "loss": 1.4784, + "step": 40479 + }, + { + "epoch": 0.04, + "learning_rate": 4.979906914265711e-05, + "loss": 1.1617, + "step": 40480 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799059203888396e-05, + "loss": 1.5323, + "step": 40481 + }, + { + "epoch": 0.04, + "learning_rate": 4.979904926487488e-05, + "loss": 1.0616, + "step": 40482 + }, + { + "epoch": 0.04, + "learning_rate": 4.979903932561656e-05, + "loss": 1.1777, + "step": 40483 + }, + { + "epoch": 0.04, + "learning_rate": 4.979902938611343e-05, + "loss": 1.1689, + "step": 40484 + }, + { + "epoch": 0.04, + "learning_rate": 4.97990194463655e-05, + "loss": 0.7069, + "step": 40485 + }, + { + "epoch": 0.04, + "learning_rate": 4.979900950637276e-05, + "loss": 0.5363, + "step": 40486 + }, + { + "epoch": 0.04, + "learning_rate": 4.979899956613522e-05, + "loss": 1.0924, + "step": 40487 + }, + { + "epoch": 0.04, + "learning_rate": 4.979898962565286e-05, + "loss": 1.2736, + "step": 40488 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798979684925706e-05, + "loss": 0.9107, + "step": 40489 + }, + { + "epoch": 0.04, + "learning_rate": 4.979896974395375e-05, + "loss": 0.9895, + "step": 40490 + }, + { + "epoch": 0.04, + "learning_rate": 4.979895980273698e-05, + "loss": 1.1239, + "step": 40491 + }, + { + "epoch": 0.04, + "learning_rate": 4.979894986127541e-05, + "loss": 1.2821, + "step": 40492 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798939919569036e-05, + "loss": 0.9863, + "step": 40493 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798929977617856e-05, + "loss": 1.0842, + "step": 40494 + }, + { + "epoch": 0.04, + "learning_rate": 4.979892003542187e-05, + "loss": 0.9367, + "step": 40495 + }, + { + "epoch": 0.04, + "learning_rate": 4.979891009298108e-05, + "loss": 1.2342, + "step": 40496 + }, + { + "epoch": 0.04, + "learning_rate": 4.979890015029549e-05, + "loss": 0.9419, + "step": 40497 + }, + { + "epoch": 0.04, + "learning_rate": 4.979889020736509e-05, + "loss": 1.6446, + "step": 40498 + }, + { + "epoch": 0.04, + "learning_rate": 4.979888026418989e-05, + "loss": 0.8643, + "step": 40499 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798870320769886e-05, + "loss": 1.1664, + "step": 40500 + }, + { + "epoch": 0.04, + "eval_loss": 1.0778478384017944, + "eval_runtime": 126.3161, + "eval_samples_per_second": 10.965, + "eval_steps_per_second": 5.486, + "step": 40500 + }, + { + "epoch": 0.04, + "learning_rate": 4.979886037710507e-05, + "loss": 1.0081, + "step": 40501 + }, + { + "epoch": 0.04, + "learning_rate": 4.979885043319546e-05, + "loss": 1.2385, + "step": 40502 + }, + { + "epoch": 0.04, + "learning_rate": 4.979884048904104e-05, + "loss": 1.307, + "step": 40503 + }, + { + "epoch": 0.04, + "learning_rate": 4.979883054464182e-05, + "loss": 1.4312, + "step": 40504 + }, + { + "epoch": 0.04, + "learning_rate": 4.97988205999978e-05, + "loss": 0.8789, + "step": 40505 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798810655108965e-05, + "loss": 0.9337, + "step": 40506 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798800709975334e-05, + "loss": 1.0624, + "step": 40507 + }, + { + "epoch": 0.04, + "learning_rate": 4.97987907645969e-05, + "loss": 0.5341, + "step": 40508 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798780818973667e-05, + "loss": 0.835, + "step": 40509 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798770873105624e-05, + "loss": 0.9066, + "step": 40510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798760926992775e-05, + "loss": 0.2816, + "step": 40511 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798750980635134e-05, + "loss": 0.6443, + "step": 40512 + }, + { + "epoch": 0.04, + "learning_rate": 4.979874103403268e-05, + "loss": 1.0581, + "step": 40513 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798731087185426e-05, + "loss": 0.4819, + "step": 40514 + }, + { + "epoch": 0.04, + "learning_rate": 4.979872114009337e-05, + "loss": 0.2877, + "step": 40515 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798711192756514e-05, + "loss": 0.7907, + "step": 40516 + }, + { + "epoch": 0.04, + "learning_rate": 4.979870124517485e-05, + "loss": 0.8198, + "step": 40517 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798691297348385e-05, + "loss": 0.9639, + "step": 40518 + }, + { + "epoch": 0.04, + "learning_rate": 4.979868134927712e-05, + "loss": 1.0129, + "step": 40519 + }, + { + "epoch": 0.04, + "learning_rate": 4.979867140096106e-05, + "loss": 0.4883, + "step": 40520 + }, + { + "epoch": 0.04, + "learning_rate": 4.979866145240018e-05, + "loss": 1.0144, + "step": 40521 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798651503594515e-05, + "loss": 1.0059, + "step": 40522 + }, + { + "epoch": 0.04, + "learning_rate": 4.979864155454405e-05, + "loss": 1.0204, + "step": 40523 + }, + { + "epoch": 0.04, + "learning_rate": 4.979863160524877e-05, + "loss": 0.8635, + "step": 40524 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798621655708695e-05, + "loss": 0.9062, + "step": 40525 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798611705923816e-05, + "loss": 1.1596, + "step": 40526 + }, + { + "epoch": 0.04, + "learning_rate": 4.979860175589414e-05, + "loss": 1.2606, + "step": 40527 + }, + { + "epoch": 0.04, + "learning_rate": 4.979859180561965e-05, + "loss": 0.5067, + "step": 40528 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798581855100376e-05, + "loss": 1.1451, + "step": 40529 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798571904336287e-05, + "loss": 1.1187, + "step": 40530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798561953327404e-05, + "loss": 0.8118, + "step": 40531 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798552002073716e-05, + "loss": 1.1882, + "step": 40532 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798542050575235e-05, + "loss": 0.9043, + "step": 40533 + }, + { + "epoch": 0.04, + "learning_rate": 4.979853209883195e-05, + "loss": 1.0109, + "step": 40534 + }, + { + "epoch": 0.04, + "learning_rate": 4.979852214684386e-05, + "loss": 1.2129, + "step": 40535 + }, + { + "epoch": 0.04, + "learning_rate": 4.979851219461097e-05, + "loss": 1.1097, + "step": 40536 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798502242133285e-05, + "loss": 1.1431, + "step": 40537 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798492289410795e-05, + "loss": 0.9305, + "step": 40538 + }, + { + "epoch": 0.04, + "learning_rate": 4.979848233644351e-05, + "loss": 0.9603, + "step": 40539 + }, + { + "epoch": 0.04, + "learning_rate": 4.979847238323142e-05, + "loss": 1.3143, + "step": 40540 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798462429774526e-05, + "loss": 0.868, + "step": 40541 + }, + { + "epoch": 0.04, + "learning_rate": 4.979845247607284e-05, + "loss": 0.9027, + "step": 40542 + }, + { + "epoch": 0.04, + "learning_rate": 4.979844252212635e-05, + "loss": 0.7703, + "step": 40543 + }, + { + "epoch": 0.04, + "learning_rate": 4.979843256793506e-05, + "loss": 0.9659, + "step": 40544 + }, + { + "epoch": 0.04, + "learning_rate": 4.979842261349897e-05, + "loss": 0.9336, + "step": 40545 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798412658818086e-05, + "loss": 1.3644, + "step": 40546 + }, + { + "epoch": 0.04, + "learning_rate": 4.97984027038924e-05, + "loss": 0.9922, + "step": 40547 + }, + { + "epoch": 0.04, + "learning_rate": 4.979839274872191e-05, + "loss": 1.4754, + "step": 40548 + }, + { + "epoch": 0.04, + "learning_rate": 4.979838279330662e-05, + "loss": 0.8188, + "step": 40549 + }, + { + "epoch": 0.04, + "learning_rate": 4.979837283764653e-05, + "loss": 1.0754, + "step": 40550 + }, + { + "epoch": 0.04, + "learning_rate": 4.979836288174166e-05, + "loss": 0.9152, + "step": 40551 + }, + { + "epoch": 0.04, + "learning_rate": 4.979835292559197e-05, + "loss": 0.7892, + "step": 40552 + }, + { + "epoch": 0.04, + "learning_rate": 4.979834296919749e-05, + "loss": 1.0841, + "step": 40553 + }, + { + "epoch": 0.04, + "learning_rate": 4.979833301255821e-05, + "loss": 1.0044, + "step": 40554 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798323055674124e-05, + "loss": 0.9743, + "step": 40555 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798313098545246e-05, + "loss": 1.1729, + "step": 40556 + }, + { + "epoch": 0.04, + "learning_rate": 4.979830314117157e-05, + "loss": 0.8815, + "step": 40557 + }, + { + "epoch": 0.04, + "learning_rate": 4.979829318355309e-05, + "loss": 0.85, + "step": 40558 + }, + { + "epoch": 0.04, + "learning_rate": 4.979828322568982e-05, + "loss": 1.141, + "step": 40559 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798273267581754e-05, + "loss": 0.942, + "step": 40560 + }, + { + "epoch": 0.04, + "learning_rate": 4.979826330922888e-05, + "loss": 1.0155, + "step": 40561 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798253350631205e-05, + "loss": 1.1933, + "step": 40562 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798243391788745e-05, + "loss": 0.9039, + "step": 40563 + }, + { + "epoch": 0.04, + "learning_rate": 4.979823343270148e-05, + "loss": 1.0931, + "step": 40564 + }, + { + "epoch": 0.04, + "learning_rate": 4.979822347336942e-05, + "loss": 1.1469, + "step": 40565 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798213513792557e-05, + "loss": 0.9339, + "step": 40566 + }, + { + "epoch": 0.04, + "learning_rate": 4.97982035539709e-05, + "loss": 0.6482, + "step": 40567 + }, + { + "epoch": 0.04, + "learning_rate": 4.979819359390445e-05, + "loss": 1.0677, + "step": 40568 + }, + { + "epoch": 0.04, + "learning_rate": 4.97981836335932e-05, + "loss": 1.0192, + "step": 40569 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798173673037153e-05, + "loss": 1.0792, + "step": 40570 + }, + { + "epoch": 0.04, + "learning_rate": 4.97981637122363e-05, + "loss": 0.9144, + "step": 40571 + }, + { + "epoch": 0.04, + "learning_rate": 4.979815375119066e-05, + "loss": 1.0269, + "step": 40572 + }, + { + "epoch": 0.04, + "learning_rate": 4.979814378990022e-05, + "loss": 0.901, + "step": 40573 + }, + { + "epoch": 0.04, + "learning_rate": 4.979813382836498e-05, + "loss": 0.7365, + "step": 40574 + }, + { + "epoch": 0.04, + "learning_rate": 4.979812386658495e-05, + "loss": 0.9899, + "step": 40575 + }, + { + "epoch": 0.04, + "learning_rate": 4.979811390456012e-05, + "loss": 1.0944, + "step": 40576 + }, + { + "epoch": 0.04, + "learning_rate": 4.97981039422905e-05, + "loss": 1.3782, + "step": 40577 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798093979776076e-05, + "loss": 1.0068, + "step": 40578 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798084017016854e-05, + "loss": 0.7401, + "step": 40579 + }, + { + "epoch": 0.04, + "learning_rate": 4.979807405401284e-05, + "loss": 1.0764, + "step": 40580 + }, + { + "epoch": 0.04, + "learning_rate": 4.979806409076403e-05, + "loss": 0.2329, + "step": 40581 + }, + { + "epoch": 0.04, + "learning_rate": 4.979805412727042e-05, + "loss": 0.657, + "step": 40582 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798044163532016e-05, + "loss": 0.649, + "step": 40583 + }, + { + "epoch": 0.04, + "learning_rate": 4.979803419954881e-05, + "loss": 1.1981, + "step": 40584 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798024235320815e-05, + "loss": 0.8992, + "step": 40585 + }, + { + "epoch": 0.04, + "learning_rate": 4.979801427084803e-05, + "loss": 0.5391, + "step": 40586 + }, + { + "epoch": 0.04, + "learning_rate": 4.9798004306130444e-05, + "loss": 0.8977, + "step": 40587 + }, + { + "epoch": 0.04, + "learning_rate": 4.979799434116806e-05, + "loss": 1.1953, + "step": 40588 + }, + { + "epoch": 0.04, + "learning_rate": 4.979798437596088e-05, + "loss": 1.4581, + "step": 40589 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797974410508916e-05, + "loss": 1.1743, + "step": 40590 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797964444812144e-05, + "loss": 1.1606, + "step": 40591 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797954478870586e-05, + "loss": 1.4195, + "step": 40592 + }, + { + "epoch": 0.04, + "learning_rate": 4.979794451268422e-05, + "loss": 0.9894, + "step": 40593 + }, + { + "epoch": 0.04, + "learning_rate": 4.979793454625307e-05, + "loss": 1.1225, + "step": 40594 + }, + { + "epoch": 0.04, + "learning_rate": 4.979792457957713e-05, + "loss": 0.8619, + "step": 40595 + }, + { + "epoch": 0.04, + "learning_rate": 4.979791461265638e-05, + "loss": 0.9453, + "step": 40596 + }, + { + "epoch": 0.04, + "learning_rate": 4.979790464549085e-05, + "loss": 0.6413, + "step": 40597 + }, + { + "epoch": 0.04, + "learning_rate": 4.979789467808051e-05, + "loss": 0.7261, + "step": 40598 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797884710425394e-05, + "loss": 0.9137, + "step": 40599 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797874742525475e-05, + "loss": 1.0982, + "step": 40600 + }, + { + "epoch": 0.04, + "learning_rate": 4.979786477438076e-05, + "loss": 1.0322, + "step": 40601 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797854805991254e-05, + "loss": 1.1399, + "step": 40602 + }, + { + "epoch": 0.04, + "learning_rate": 4.979784483735695e-05, + "loss": 1.2355, + "step": 40603 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797834868477856e-05, + "loss": 0.9711, + "step": 40604 + }, + { + "epoch": 0.04, + "learning_rate": 4.979782489935396e-05, + "loss": 0.8249, + "step": 40605 + }, + { + "epoch": 0.04, + "learning_rate": 4.979781492998528e-05, + "loss": 1.0791, + "step": 40606 + }, + { + "epoch": 0.04, + "learning_rate": 4.979780496037181e-05, + "loss": 0.9677, + "step": 40607 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797794990513535e-05, + "loss": 0.9108, + "step": 40608 + }, + { + "epoch": 0.04, + "learning_rate": 4.979778502041047e-05, + "loss": 1.0888, + "step": 40609 + }, + { + "epoch": 0.04, + "learning_rate": 4.979777505006261e-05, + "loss": 0.4806, + "step": 40610 + }, + { + "epoch": 0.04, + "learning_rate": 4.979776507946996e-05, + "loss": 1.021, + "step": 40611 + }, + { + "epoch": 0.04, + "learning_rate": 4.979775510863252e-05, + "loss": 0.9507, + "step": 40612 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797745137550286e-05, + "loss": 1.0948, + "step": 40613 + }, + { + "epoch": 0.04, + "learning_rate": 4.979773516622326e-05, + "loss": 0.9403, + "step": 40614 + }, + { + "epoch": 0.04, + "learning_rate": 4.979772519465143e-05, + "loss": 1.2827, + "step": 40615 + }, + { + "epoch": 0.04, + "learning_rate": 4.979771522283482e-05, + "loss": 1.0337, + "step": 40616 + }, + { + "epoch": 0.04, + "learning_rate": 4.979770525077342e-05, + "loss": 1.2199, + "step": 40617 + }, + { + "epoch": 0.04, + "learning_rate": 4.979769527846722e-05, + "loss": 1.224, + "step": 40618 + }, + { + "epoch": 0.04, + "learning_rate": 4.979768530591622e-05, + "loss": 0.9633, + "step": 40619 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797675333120444e-05, + "loss": 1.0227, + "step": 40620 + }, + { + "epoch": 0.04, + "learning_rate": 4.979766536007986e-05, + "loss": 1.2224, + "step": 40621 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797655386794495e-05, + "loss": 1.2391, + "step": 40622 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797645413264335e-05, + "loss": 0.6712, + "step": 40623 + }, + { + "epoch": 0.04, + "learning_rate": 4.979763543948938e-05, + "loss": 0.7211, + "step": 40624 + }, + { + "epoch": 0.04, + "learning_rate": 4.979762546546964e-05, + "loss": 0.9912, + "step": 40625 + }, + { + "epoch": 0.04, + "learning_rate": 4.97976154912051e-05, + "loss": 0.8863, + "step": 40626 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797605516695776e-05, + "loss": 0.8564, + "step": 40627 + }, + { + "epoch": 0.04, + "learning_rate": 4.979759554194165e-05, + "loss": 1.4876, + "step": 40628 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797585566942745e-05, + "loss": 1.2812, + "step": 40629 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797575591699044e-05, + "loss": 2.071, + "step": 40630 + }, + { + "epoch": 0.04, + "learning_rate": 4.979756561621055e-05, + "loss": 1.1933, + "step": 40631 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797555640477264e-05, + "loss": 1.0193, + "step": 40632 + }, + { + "epoch": 0.04, + "learning_rate": 4.979754566449919e-05, + "loss": 0.9944, + "step": 40633 + }, + { + "epoch": 0.04, + "learning_rate": 4.979753568827633e-05, + "loss": 1.0738, + "step": 40634 + }, + { + "epoch": 0.04, + "learning_rate": 4.979752571180867e-05, + "loss": 1.2131, + "step": 40635 + }, + { + "epoch": 0.04, + "learning_rate": 4.979751573509622e-05, + "loss": 1.0412, + "step": 40636 + }, + { + "epoch": 0.04, + "learning_rate": 4.979750575813899e-05, + "loss": 1.1766, + "step": 40637 + }, + { + "epoch": 0.04, + "learning_rate": 4.979749578093695e-05, + "loss": 1.3725, + "step": 40638 + }, + { + "epoch": 0.04, + "learning_rate": 4.979748580349013e-05, + "loss": 0.8553, + "step": 40639 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797475825798526e-05, + "loss": 1.1618, + "step": 40640 + }, + { + "epoch": 0.04, + "learning_rate": 4.979746584786212e-05, + "loss": 0.8989, + "step": 40641 + }, + { + "epoch": 0.04, + "learning_rate": 4.979745586968094e-05, + "loss": 0.6319, + "step": 40642 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797445891254954e-05, + "loss": 0.9079, + "step": 40643 + }, + { + "epoch": 0.04, + "learning_rate": 4.979743591258419e-05, + "loss": 0.8157, + "step": 40644 + }, + { + "epoch": 0.04, + "learning_rate": 4.979742593366863e-05, + "loss": 1.0585, + "step": 40645 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797415954508275e-05, + "loss": 1.171, + "step": 40646 + }, + { + "epoch": 0.04, + "learning_rate": 4.979740597510314e-05, + "loss": 0.948, + "step": 40647 + }, + { + "epoch": 0.04, + "learning_rate": 4.979739599545321e-05, + "loss": 1.1799, + "step": 40648 + }, + { + "epoch": 0.04, + "learning_rate": 4.979738601555849e-05, + "loss": 1.1419, + "step": 40649 + }, + { + "epoch": 0.04, + "learning_rate": 4.979737603541899e-05, + "loss": 0.9446, + "step": 40650 + }, + { + "epoch": 0.04, + "learning_rate": 4.979736605503469e-05, + "loss": 1.2455, + "step": 40651 + }, + { + "epoch": 0.04, + "learning_rate": 4.97973560744056e-05, + "loss": 0.6968, + "step": 40652 + }, + { + "epoch": 0.04, + "learning_rate": 4.979734609353173e-05, + "loss": 0.8307, + "step": 40653 + }, + { + "epoch": 0.04, + "learning_rate": 4.979733611241306e-05, + "loss": 0.8944, + "step": 40654 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797326131049607e-05, + "loss": 1.485, + "step": 40655 + }, + { + "epoch": 0.04, + "learning_rate": 4.979731614944137e-05, + "loss": 0.6272, + "step": 40656 + }, + { + "epoch": 0.04, + "learning_rate": 4.979730616758834e-05, + "loss": 1.0061, + "step": 40657 + }, + { + "epoch": 0.04, + "learning_rate": 4.979729618549052e-05, + "loss": 0.7641, + "step": 40658 + }, + { + "epoch": 0.04, + "learning_rate": 4.979728620314792e-05, + "loss": 0.8742, + "step": 40659 + }, + { + "epoch": 0.04, + "learning_rate": 4.979727622056052e-05, + "loss": 0.8394, + "step": 40660 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797266237728333e-05, + "loss": 0.9611, + "step": 40661 + }, + { + "epoch": 0.04, + "learning_rate": 4.979725625465137e-05, + "loss": 0.9958, + "step": 40662 + }, + { + "epoch": 0.04, + "learning_rate": 4.97972462713296e-05, + "loss": 0.9443, + "step": 40663 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797236287763055e-05, + "loss": 0.9862, + "step": 40664 + }, + { + "epoch": 0.04, + "learning_rate": 4.979722630395172e-05, + "loss": 0.7253, + "step": 40665 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797216319895594e-05, + "loss": 0.5387, + "step": 40666 + }, + { + "epoch": 0.04, + "learning_rate": 4.979720633559468e-05, + "loss": 1.2267, + "step": 40667 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797196351048984e-05, + "loss": 1.148, + "step": 40668 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797186366258496e-05, + "loss": 1.2058, + "step": 40669 + }, + { + "epoch": 0.04, + "learning_rate": 4.979717638122322e-05, + "loss": 0.8128, + "step": 40670 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797166395943164e-05, + "loss": 1.1351, + "step": 40671 + }, + { + "epoch": 0.04, + "learning_rate": 4.979715641041832e-05, + "loss": 1.1355, + "step": 40672 + }, + { + "epoch": 0.04, + "learning_rate": 4.979714642464868e-05, + "loss": 1.1695, + "step": 40673 + }, + { + "epoch": 0.04, + "learning_rate": 4.979713643863426e-05, + "loss": 1.1367, + "step": 40674 + }, + { + "epoch": 0.04, + "learning_rate": 4.979712645237505e-05, + "loss": 0.841, + "step": 40675 + }, + { + "epoch": 0.04, + "learning_rate": 4.979711646587105e-05, + "loss": 0.9031, + "step": 40676 + }, + { + "epoch": 0.04, + "learning_rate": 4.979710647912228e-05, + "loss": 1.1988, + "step": 40677 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797096492128704e-05, + "loss": 1.1432, + "step": 40678 + }, + { + "epoch": 0.04, + "learning_rate": 4.979708650489035e-05, + "loss": 1.3378, + "step": 40679 + }, + { + "epoch": 0.04, + "learning_rate": 4.97970765174072e-05, + "loss": 1.2303, + "step": 40680 + }, + { + "epoch": 0.04, + "learning_rate": 4.979706652967928e-05, + "loss": 1.0918, + "step": 40681 + }, + { + "epoch": 0.04, + "learning_rate": 4.979705654170656e-05, + "loss": 1.2434, + "step": 40682 + }, + { + "epoch": 0.04, + "learning_rate": 4.979704655348906e-05, + "loss": 1.1039, + "step": 40683 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797036565026775e-05, + "loss": 1.1193, + "step": 40684 + }, + { + "epoch": 0.04, + "learning_rate": 4.979702657631971e-05, + "loss": 1.0276, + "step": 40685 + }, + { + "epoch": 0.04, + "learning_rate": 4.979701658736785e-05, + "loss": 0.9569, + "step": 40686 + }, + { + "epoch": 0.04, + "learning_rate": 4.9797006598171206e-05, + "loss": 1.0207, + "step": 40687 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796996608729775e-05, + "loss": 1.0177, + "step": 40688 + }, + { + "epoch": 0.04, + "learning_rate": 4.979698661904356e-05, + "loss": 0.5871, + "step": 40689 + }, + { + "epoch": 0.04, + "learning_rate": 4.979697662911256e-05, + "loss": 0.2233, + "step": 40690 + }, + { + "epoch": 0.04, + "learning_rate": 4.979696663893677e-05, + "loss": 0.182, + "step": 40691 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796956648516204e-05, + "loss": 0.716, + "step": 40692 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796946657850846e-05, + "loss": 1.1881, + "step": 40693 + }, + { + "epoch": 0.04, + "learning_rate": 4.979693666694071e-05, + "loss": 1.7227, + "step": 40694 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796926675785784e-05, + "loss": 1.0681, + "step": 40695 + }, + { + "epoch": 0.04, + "learning_rate": 4.979691668438607e-05, + "loss": 0.8764, + "step": 40696 + }, + { + "epoch": 0.04, + "learning_rate": 4.979690669274157e-05, + "loss": 1.2415, + "step": 40697 + }, + { + "epoch": 0.04, + "learning_rate": 4.979689670085229e-05, + "loss": 1.4504, + "step": 40698 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796886708718226e-05, + "loss": 0.8389, + "step": 40699 + }, + { + "epoch": 0.04, + "learning_rate": 4.979687671633938e-05, + "loss": 1.1211, + "step": 40700 + }, + { + "epoch": 0.04, + "learning_rate": 4.979686672371575e-05, + "loss": 0.8011, + "step": 40701 + }, + { + "epoch": 0.04, + "learning_rate": 4.979685673084733e-05, + "loss": 1.0731, + "step": 40702 + }, + { + "epoch": 0.04, + "learning_rate": 4.979684673773413e-05, + "loss": 1.0304, + "step": 40703 + }, + { + "epoch": 0.04, + "learning_rate": 4.979683674437614e-05, + "loss": 0.8665, + "step": 40704 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796826750773374e-05, + "loss": 0.9637, + "step": 40705 + }, + { + "epoch": 0.04, + "learning_rate": 4.979681675692582e-05, + "loss": 0.3584, + "step": 40706 + }, + { + "epoch": 0.04, + "learning_rate": 4.979680676283348e-05, + "loss": 0.1462, + "step": 40707 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796796768496364e-05, + "loss": 0.1672, + "step": 40708 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796786773914453e-05, + "loss": 0.8188, + "step": 40709 + }, + { + "epoch": 0.04, + "learning_rate": 4.979677677908777e-05, + "loss": 1.0545, + "step": 40710 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796766784016296e-05, + "loss": 0.8262, + "step": 40711 + }, + { + "epoch": 0.04, + "learning_rate": 4.979675678870004e-05, + "loss": 1.1985, + "step": 40712 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796746793139e-05, + "loss": 0.9251, + "step": 40713 + }, + { + "epoch": 0.04, + "learning_rate": 4.979673679733319e-05, + "loss": 1.0197, + "step": 40714 + }, + { + "epoch": 0.04, + "learning_rate": 4.979672680128258e-05, + "loss": 0.4224, + "step": 40715 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796716804987195e-05, + "loss": 0.8796, + "step": 40716 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796706808447026e-05, + "loss": 0.9384, + "step": 40717 + }, + { + "epoch": 0.04, + "learning_rate": 4.979669681166208e-05, + "loss": 0.9805, + "step": 40718 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796686814632343e-05, + "loss": 0.7653, + "step": 40719 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796676817357824e-05, + "loss": 0.9247, + "step": 40720 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796666819838525e-05, + "loss": 0.9989, + "step": 40721 + }, + { + "epoch": 0.04, + "learning_rate": 4.979665682207445e-05, + "loss": 0.8841, + "step": 40722 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796646824065584e-05, + "loss": 0.997, + "step": 40723 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796636825811935e-05, + "loss": 0.8537, + "step": 40724 + }, + { + "epoch": 0.04, + "learning_rate": 4.979662682731351e-05, + "loss": 0.9708, + "step": 40725 + }, + { + "epoch": 0.04, + "learning_rate": 4.97966168285703e-05, + "loss": 0.8826, + "step": 40726 + }, + { + "epoch": 0.04, + "learning_rate": 4.979660682958231e-05, + "loss": 1.089, + "step": 40727 + }, + { + "epoch": 0.04, + "learning_rate": 4.979659683034954e-05, + "loss": 0.7987, + "step": 40728 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796586830871984e-05, + "loss": 1.0305, + "step": 40729 + }, + { + "epoch": 0.04, + "learning_rate": 4.979657683114964e-05, + "loss": 1.0633, + "step": 40730 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796566831182525e-05, + "loss": 0.7876, + "step": 40731 + }, + { + "epoch": 0.04, + "learning_rate": 4.979655683097063e-05, + "loss": 0.7105, + "step": 40732 + }, + { + "epoch": 0.04, + "learning_rate": 4.979654683051395e-05, + "loss": 1.1563, + "step": 40733 + }, + { + "epoch": 0.04, + "learning_rate": 4.979653682981249e-05, + "loss": 0.6082, + "step": 40734 + }, + { + "epoch": 0.04, + "learning_rate": 4.979652682886625e-05, + "loss": 1.0015, + "step": 40735 + }, + { + "epoch": 0.04, + "learning_rate": 4.979651682767523e-05, + "loss": 1.0619, + "step": 40736 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796506826239427e-05, + "loss": 1.1714, + "step": 40737 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796496824558845e-05, + "loss": 0.6498, + "step": 40738 + }, + { + "epoch": 0.04, + "learning_rate": 4.979648682263348e-05, + "loss": 1.2718, + "step": 40739 + }, + { + "epoch": 0.04, + "learning_rate": 4.979647682046333e-05, + "loss": 1.0274, + "step": 40740 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796466818048405e-05, + "loss": 1.2354, + "step": 40741 + }, + { + "epoch": 0.04, + "learning_rate": 4.97964568153887e-05, + "loss": 0.9614, + "step": 40742 + }, + { + "epoch": 0.04, + "learning_rate": 4.979644681248422e-05, + "loss": 0.9431, + "step": 40743 + }, + { + "epoch": 0.04, + "learning_rate": 4.979643680933495e-05, + "loss": 1.0266, + "step": 40744 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796426805940907e-05, + "loss": 0.9752, + "step": 40745 + }, + { + "epoch": 0.04, + "learning_rate": 4.979641680230208e-05, + "loss": 1.1483, + "step": 40746 + }, + { + "epoch": 0.04, + "learning_rate": 4.979640679841847e-05, + "loss": 0.7543, + "step": 40747 + }, + { + "epoch": 0.04, + "learning_rate": 4.979639679429009e-05, + "loss": 0.8442, + "step": 40748 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796386789916924e-05, + "loss": 0.8936, + "step": 40749 + }, + { + "epoch": 0.04, + "learning_rate": 4.979637678529898e-05, + "loss": 0.8388, + "step": 40750 + }, + { + "epoch": 0.04, + "learning_rate": 4.979636678043625e-05, + "loss": 1.0148, + "step": 40751 + }, + { + "epoch": 0.04, + "learning_rate": 4.979635677532876e-05, + "loss": 0.9627, + "step": 40752 + }, + { + "epoch": 0.04, + "learning_rate": 4.979634676997648e-05, + "loss": 1.2127, + "step": 40753 + }, + { + "epoch": 0.04, + "learning_rate": 4.979633676437941e-05, + "loss": 1.0287, + "step": 40754 + }, + { + "epoch": 0.04, + "learning_rate": 4.979632675853757e-05, + "loss": 1.0668, + "step": 40755 + }, + { + "epoch": 0.04, + "learning_rate": 4.979631675245096e-05, + "loss": 2.0564, + "step": 40756 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796306746119555e-05, + "loss": 1.3779, + "step": 40757 + }, + { + "epoch": 0.04, + "learning_rate": 4.979629673954338e-05, + "loss": 1.2846, + "step": 40758 + }, + { + "epoch": 0.04, + "learning_rate": 4.979628673272243e-05, + "loss": 1.086, + "step": 40759 + }, + { + "epoch": 0.04, + "learning_rate": 4.979627672565669e-05, + "loss": 1.4195, + "step": 40760 + }, + { + "epoch": 0.04, + "learning_rate": 4.979626671834618e-05, + "loss": 1.0227, + "step": 40761 + }, + { + "epoch": 0.04, + "learning_rate": 4.979625671079089e-05, + "loss": 1.2238, + "step": 40762 + }, + { + "epoch": 0.04, + "learning_rate": 4.979624670299082e-05, + "loss": 1.0829, + "step": 40763 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796236694945974e-05, + "loss": 1.0518, + "step": 40764 + }, + { + "epoch": 0.04, + "learning_rate": 4.979622668665635e-05, + "loss": 1.047, + "step": 40765 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796216678121946e-05, + "loss": 1.1603, + "step": 40766 + }, + { + "epoch": 0.04, + "learning_rate": 4.979620666934277e-05, + "loss": 0.9955, + "step": 40767 + }, + { + "epoch": 0.04, + "learning_rate": 4.979619666031881e-05, + "loss": 0.8709, + "step": 40768 + }, + { + "epoch": 0.04, + "learning_rate": 4.979618665105007e-05, + "loss": 1.0375, + "step": 40769 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796176641536565e-05, + "loss": 0.8333, + "step": 40770 + }, + { + "epoch": 0.04, + "learning_rate": 4.979616663177828e-05, + "loss": 0.9025, + "step": 40771 + }, + { + "epoch": 0.04, + "learning_rate": 4.97961566217752e-05, + "loss": 0.8363, + "step": 40772 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796146611527364e-05, + "loss": 0.5365, + "step": 40773 + }, + { + "epoch": 0.04, + "learning_rate": 4.979613660103474e-05, + "loss": 0.7728, + "step": 40774 + }, + { + "epoch": 0.04, + "learning_rate": 4.979612659029734e-05, + "loss": 0.6163, + "step": 40775 + }, + { + "epoch": 0.04, + "learning_rate": 4.979611657931517e-05, + "loss": 0.5488, + "step": 40776 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796106568088205e-05, + "loss": 0.7245, + "step": 40777 + }, + { + "epoch": 0.04, + "learning_rate": 4.979609655661648e-05, + "loss": 0.7005, + "step": 40778 + }, + { + "epoch": 0.04, + "learning_rate": 4.979608654489998e-05, + "loss": 0.9557, + "step": 40779 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796076532938694e-05, + "loss": 1.1047, + "step": 40780 + }, + { + "epoch": 0.04, + "learning_rate": 4.979606652073263e-05, + "loss": 0.6328, + "step": 40781 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796056508281794e-05, + "loss": 0.0855, + "step": 40782 + }, + { + "epoch": 0.04, + "learning_rate": 4.979604649558619e-05, + "loss": 0.0651, + "step": 40783 + }, + { + "epoch": 0.04, + "learning_rate": 4.9796036482645805e-05, + "loss": 0.2659, + "step": 40784 + }, + { + "epoch": 0.04, + "learning_rate": 4.979602646946063e-05, + "loss": 0.9159, + "step": 40785 + }, + { + "epoch": 0.04, + "learning_rate": 4.97960164560307e-05, + "loss": 0.9664, + "step": 40786 + }, + { + "epoch": 0.04, + "learning_rate": 4.979600644235598e-05, + "loss": 0.6937, + "step": 40787 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795996428436486e-05, + "loss": 0.851, + "step": 40788 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795986414272224e-05, + "loss": 1.1629, + "step": 40789 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795976399863184e-05, + "loss": 1.0847, + "step": 40790 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795966385209364e-05, + "loss": 1.027, + "step": 40791 + }, + { + "epoch": 0.04, + "learning_rate": 4.979595637031077e-05, + "loss": 0.847, + "step": 40792 + }, + { + "epoch": 0.04, + "learning_rate": 4.97959463551674e-05, + "loss": 0.9571, + "step": 40793 + }, + { + "epoch": 0.04, + "learning_rate": 4.979593633977926e-05, + "loss": 0.937, + "step": 40794 + }, + { + "epoch": 0.04, + "learning_rate": 4.979592632414634e-05, + "loss": 1.0992, + "step": 40795 + }, + { + "epoch": 0.04, + "learning_rate": 4.979591630826865e-05, + "loss": 1.083, + "step": 40796 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795906292146175e-05, + "loss": 1.2643, + "step": 40797 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795896275778936e-05, + "loss": 1.0904, + "step": 40798 + }, + { + "epoch": 0.04, + "learning_rate": 4.979588625916692e-05, + "loss": 0.9749, + "step": 40799 + }, + { + "epoch": 0.04, + "learning_rate": 4.979587624231012e-05, + "loss": 0.9231, + "step": 40800 + }, + { + "epoch": 0.04, + "learning_rate": 4.979586622520856e-05, + "loss": 0.9092, + "step": 40801 + }, + { + "epoch": 0.04, + "learning_rate": 4.979585620786222e-05, + "loss": 0.7054, + "step": 40802 + }, + { + "epoch": 0.04, + "learning_rate": 4.97958461902711e-05, + "loss": 1.0353, + "step": 40803 + }, + { + "epoch": 0.04, + "learning_rate": 4.979583617243522e-05, + "loss": 1.3114, + "step": 40804 + }, + { + "epoch": 0.04, + "learning_rate": 4.979582615435455e-05, + "loss": 1.3163, + "step": 40805 + }, + { + "epoch": 0.04, + "learning_rate": 4.979581613602912e-05, + "loss": 0.8458, + "step": 40806 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795806117458904e-05, + "loss": 1.0279, + "step": 40807 + }, + { + "epoch": 0.04, + "learning_rate": 4.979579609864392e-05, + "loss": 0.8615, + "step": 40808 + }, + { + "epoch": 0.04, + "learning_rate": 4.979578607958416e-05, + "loss": 0.9482, + "step": 40809 + }, + { + "epoch": 0.04, + "learning_rate": 4.979577606027963e-05, + "loss": 0.9214, + "step": 40810 + }, + { + "epoch": 0.04, + "learning_rate": 4.979576604073032e-05, + "loss": 1.188, + "step": 40811 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795756020936244e-05, + "loss": 1.1021, + "step": 40812 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795746000897395e-05, + "loss": 1.0787, + "step": 40813 + }, + { + "epoch": 0.04, + "learning_rate": 4.979573598061377e-05, + "loss": 0.9858, + "step": 40814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795725960085374e-05, + "loss": 1.016, + "step": 40815 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795715939312195e-05, + "loss": 1.0312, + "step": 40816 + }, + { + "epoch": 0.04, + "learning_rate": 4.979570591829426e-05, + "loss": 0.8616, + "step": 40817 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795695897031535e-05, + "loss": 1.1016, + "step": 40818 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795685875524046e-05, + "loss": 1.268, + "step": 40819 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795675853771785e-05, + "loss": 0.9939, + "step": 40820 + }, + { + "epoch": 0.04, + "learning_rate": 4.979566583177475e-05, + "loss": 0.9836, + "step": 40821 + }, + { + "epoch": 0.04, + "learning_rate": 4.979565580953294e-05, + "loss": 0.8793, + "step": 40822 + }, + { + "epoch": 0.04, + "learning_rate": 4.979564578704636e-05, + "loss": 1.5872, + "step": 40823 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795635764315014e-05, + "loss": 1.3929, + "step": 40824 + }, + { + "epoch": 0.04, + "learning_rate": 4.979562574133889e-05, + "loss": 1.4169, + "step": 40825 + }, + { + "epoch": 0.04, + "learning_rate": 4.979561571811799e-05, + "loss": 1.0343, + "step": 40826 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795605694652325e-05, + "loss": 1.2037, + "step": 40827 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795595670941887e-05, + "loss": 1.0374, + "step": 40828 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795585646986676e-05, + "loss": 1.1486, + "step": 40829 + }, + { + "epoch": 0.04, + "learning_rate": 4.979557562278669e-05, + "loss": 0.868, + "step": 40830 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795565598341945e-05, + "loss": 0.9121, + "step": 40831 + }, + { + "epoch": 0.04, + "learning_rate": 4.979555557365241e-05, + "loss": 1.0718, + "step": 40832 + }, + { + "epoch": 0.04, + "learning_rate": 4.979554554871811e-05, + "loss": 0.8771, + "step": 40833 + }, + { + "epoch": 0.04, + "learning_rate": 4.979553552353905e-05, + "loss": 1.5028, + "step": 40834 + }, + { + "epoch": 0.04, + "learning_rate": 4.979552549811522e-05, + "loss": 1.1552, + "step": 40835 + }, + { + "epoch": 0.04, + "learning_rate": 4.97955154724466e-05, + "loss": 0.9703, + "step": 40836 + }, + { + "epoch": 0.04, + "learning_rate": 4.979550544653322e-05, + "loss": 0.8517, + "step": 40837 + }, + { + "epoch": 0.04, + "learning_rate": 4.979549542037507e-05, + "loss": 0.9362, + "step": 40838 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795485393972154e-05, + "loss": 0.8521, + "step": 40839 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795475367324456e-05, + "loss": 0.5898, + "step": 40840 + }, + { + "epoch": 0.04, + "learning_rate": 4.979546534043199e-05, + "loss": 0.7565, + "step": 40841 + }, + { + "epoch": 0.04, + "learning_rate": 4.979545531329476e-05, + "loss": 0.6696, + "step": 40842 + }, + { + "epoch": 0.04, + "learning_rate": 4.979544528591276e-05, + "loss": 0.8595, + "step": 40843 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795435258285984e-05, + "loss": 1.0117, + "step": 40844 + }, + { + "epoch": 0.04, + "learning_rate": 4.979542523041444e-05, + "loss": 0.9242, + "step": 40845 + }, + { + "epoch": 0.04, + "learning_rate": 4.979541520229812e-05, + "loss": 0.4601, + "step": 40846 + }, + { + "epoch": 0.04, + "learning_rate": 4.979540517393704e-05, + "loss": 1.0551, + "step": 40847 + }, + { + "epoch": 0.04, + "learning_rate": 4.979539514533118e-05, + "loss": 1.2189, + "step": 40848 + }, + { + "epoch": 0.04, + "learning_rate": 4.979538511648056e-05, + "loss": 1.1547, + "step": 40849 + }, + { + "epoch": 0.04, + "learning_rate": 4.979537508738516e-05, + "loss": 1.0173, + "step": 40850 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795365058045005e-05, + "loss": 1.1752, + "step": 40851 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795355028460074e-05, + "loss": 1.1225, + "step": 40852 + }, + { + "epoch": 0.04, + "learning_rate": 4.979534499863038e-05, + "loss": 0.8081, + "step": 40853 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795334968555904e-05, + "loss": 0.9704, + "step": 40854 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795324938236664e-05, + "loss": 1.064, + "step": 40855 + }, + { + "epoch": 0.04, + "learning_rate": 4.979531490767265e-05, + "loss": 1.2471, + "step": 40856 + }, + { + "epoch": 0.04, + "learning_rate": 4.979530487686388e-05, + "loss": 0.8457, + "step": 40857 + }, + { + "epoch": 0.04, + "learning_rate": 4.979529484581033e-05, + "loss": 0.9813, + "step": 40858 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795284814512015e-05, + "loss": 1.0597, + "step": 40859 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795274782968935e-05, + "loss": 0.857, + "step": 40860 + }, + { + "epoch": 0.04, + "learning_rate": 4.979526475118108e-05, + "loss": 0.5628, + "step": 40861 + }, + { + "epoch": 0.04, + "learning_rate": 4.979525471914846e-05, + "loss": 0.6094, + "step": 40862 + }, + { + "epoch": 0.04, + "learning_rate": 4.979524468687107e-05, + "loss": 1.1697, + "step": 40863 + }, + { + "epoch": 0.04, + "learning_rate": 4.979523465434892e-05, + "loss": 0.8584, + "step": 40864 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795224621581995e-05, + "loss": 0.8765, + "step": 40865 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795214588570295e-05, + "loss": 1.021, + "step": 40866 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795204555313836e-05, + "loss": 0.8593, + "step": 40867 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795194521812605e-05, + "loss": 0.5363, + "step": 40868 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795184488066615e-05, + "loss": 1.1051, + "step": 40869 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795174454075846e-05, + "loss": 0.7458, + "step": 40870 + }, + { + "epoch": 0.04, + "learning_rate": 4.979516441984031e-05, + "loss": 1.0879, + "step": 40871 + }, + { + "epoch": 0.04, + "learning_rate": 4.979515438536002e-05, + "loss": 1.6994, + "step": 40872 + }, + { + "epoch": 0.04, + "learning_rate": 4.979514435063495e-05, + "loss": 0.9293, + "step": 40873 + }, + { + "epoch": 0.04, + "learning_rate": 4.979513431566512e-05, + "loss": 0.9087, + "step": 40874 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795124280450514e-05, + "loss": 0.8977, + "step": 40875 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795114244991146e-05, + "loss": 1.1037, + "step": 40876 + }, + { + "epoch": 0.04, + "learning_rate": 4.979510420928701e-05, + "loss": 0.9806, + "step": 40877 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795094173338114e-05, + "loss": 1.0297, + "step": 40878 + }, + { + "epoch": 0.04, + "learning_rate": 4.979508413714444e-05, + "loss": 1.2711, + "step": 40879 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795074100706006e-05, + "loss": 0.8142, + "step": 40880 + }, + { + "epoch": 0.04, + "learning_rate": 4.979506406402281e-05, + "loss": 0.9367, + "step": 40881 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795054027094844e-05, + "loss": 2.0353, + "step": 40882 + }, + { + "epoch": 0.04, + "learning_rate": 4.979504398992211e-05, + "loss": 1.5174, + "step": 40883 + }, + { + "epoch": 0.04, + "learning_rate": 4.979503395250461e-05, + "loss": 0.8455, + "step": 40884 + }, + { + "epoch": 0.04, + "learning_rate": 4.979502391484234e-05, + "loss": 0.4455, + "step": 40885 + }, + { + "epoch": 0.04, + "learning_rate": 4.9795013876935307e-05, + "loss": 1.2014, + "step": 40886 + }, + { + "epoch": 0.04, + "learning_rate": 4.979500383878351e-05, + "loss": 0.933, + "step": 40887 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794993800386945e-05, + "loss": 0.9475, + "step": 40888 + }, + { + "epoch": 0.04, + "learning_rate": 4.979498376174562e-05, + "loss": 1.127, + "step": 40889 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794973722859516e-05, + "loss": 0.7486, + "step": 40890 + }, + { + "epoch": 0.04, + "learning_rate": 4.979496368372866e-05, + "loss": 1.0076, + "step": 40891 + }, + { + "epoch": 0.04, + "learning_rate": 4.979495364435303e-05, + "loss": 1.1111, + "step": 40892 + }, + { + "epoch": 0.04, + "learning_rate": 4.979494360473264e-05, + "loss": 0.7144, + "step": 40893 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794933564867486e-05, + "loss": 0.8178, + "step": 40894 + }, + { + "epoch": 0.04, + "learning_rate": 4.979492352475756e-05, + "loss": 0.6377, + "step": 40895 + }, + { + "epoch": 0.04, + "learning_rate": 4.979491348440287e-05, + "loss": 0.7227, + "step": 40896 + }, + { + "epoch": 0.04, + "learning_rate": 4.979490344380342e-05, + "loss": 0.8179, + "step": 40897 + }, + { + "epoch": 0.04, + "learning_rate": 4.979489340295921e-05, + "loss": 0.6143, + "step": 40898 + }, + { + "epoch": 0.04, + "learning_rate": 4.979488336187023e-05, + "loss": 0.3968, + "step": 40899 + }, + { + "epoch": 0.04, + "learning_rate": 4.979487332053648e-05, + "loss": 0.2904, + "step": 40900 + }, + { + "epoch": 0.04, + "learning_rate": 4.979486327895797e-05, + "loss": 0.2312, + "step": 40901 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794853237134685e-05, + "loss": 0.1676, + "step": 40902 + }, + { + "epoch": 0.04, + "learning_rate": 4.979484319506666e-05, + "loss": 0.4419, + "step": 40903 + }, + { + "epoch": 0.04, + "learning_rate": 4.979483315275385e-05, + "loss": 0.8777, + "step": 40904 + }, + { + "epoch": 0.04, + "learning_rate": 4.979482311019628e-05, + "loss": 1.0197, + "step": 40905 + }, + { + "epoch": 0.04, + "learning_rate": 4.979481306739395e-05, + "loss": 1.1007, + "step": 40906 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794803024346845e-05, + "loss": 1.0292, + "step": 40907 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794792981055e-05, + "loss": 0.8755, + "step": 40908 + }, + { + "epoch": 0.04, + "learning_rate": 4.979478293751837e-05, + "loss": 1.2952, + "step": 40909 + }, + { + "epoch": 0.04, + "learning_rate": 4.979477289373698e-05, + "loss": 1.0761, + "step": 40910 + }, + { + "epoch": 0.04, + "learning_rate": 4.979476284971083e-05, + "loss": 1.0876, + "step": 40911 + }, + { + "epoch": 0.04, + "learning_rate": 4.979475280543992e-05, + "loss": 1.0971, + "step": 40912 + }, + { + "epoch": 0.04, + "learning_rate": 4.979474276092424e-05, + "loss": 1.36, + "step": 40913 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794732716163796e-05, + "loss": 1.4432, + "step": 40914 + }, + { + "epoch": 0.04, + "learning_rate": 4.979472267115859e-05, + "loss": 0.7877, + "step": 40915 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794712625908626e-05, + "loss": 0.8794, + "step": 40916 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794702580413896e-05, + "loss": 0.9831, + "step": 40917 + }, + { + "epoch": 0.04, + "learning_rate": 4.97946925346744e-05, + "loss": 0.5918, + "step": 40918 + }, + { + "epoch": 0.04, + "learning_rate": 4.979468248869015e-05, + "loss": 1.021, + "step": 40919 + }, + { + "epoch": 0.04, + "learning_rate": 4.979467244246113e-05, + "loss": 0.9207, + "step": 40920 + }, + { + "epoch": 0.04, + "learning_rate": 4.979466239598734e-05, + "loss": 0.9763, + "step": 40921 + }, + { + "epoch": 0.04, + "learning_rate": 4.97946523492688e-05, + "loss": 1.0186, + "step": 40922 + }, + { + "epoch": 0.04, + "learning_rate": 4.97946423023055e-05, + "loss": 1.014, + "step": 40923 + }, + { + "epoch": 0.04, + "learning_rate": 4.979463225509743e-05, + "loss": 1.7229, + "step": 40924 + }, + { + "epoch": 0.04, + "learning_rate": 4.97946222076446e-05, + "loss": 4.5377, + "step": 40925 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794612159947006e-05, + "loss": 3.909, + "step": 40926 + }, + { + "epoch": 0.04, + "learning_rate": 4.979460211200465e-05, + "loss": 1.2429, + "step": 40927 + }, + { + "epoch": 0.04, + "learning_rate": 4.979459206381753e-05, + "loss": 0.9186, + "step": 40928 + }, + { + "epoch": 0.04, + "learning_rate": 4.979458201538566e-05, + "loss": 0.9709, + "step": 40929 + }, + { + "epoch": 0.04, + "learning_rate": 4.979457196670902e-05, + "loss": 1.4263, + "step": 40930 + }, + { + "epoch": 0.04, + "learning_rate": 4.979456191778762e-05, + "loss": 1.1351, + "step": 40931 + }, + { + "epoch": 0.04, + "learning_rate": 4.979455186862145e-05, + "loss": 1.4749, + "step": 40932 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794541819210535e-05, + "loss": 1.1656, + "step": 40933 + }, + { + "epoch": 0.04, + "learning_rate": 4.979453176955485e-05, + "loss": 0.9613, + "step": 40934 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794521719654394e-05, + "loss": 0.9406, + "step": 40935 + }, + { + "epoch": 0.04, + "learning_rate": 4.979451166950919e-05, + "loss": 1.0009, + "step": 40936 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794501619119226e-05, + "loss": 1.135, + "step": 40937 + }, + { + "epoch": 0.04, + "learning_rate": 4.979449156848449e-05, + "loss": 1.0691, + "step": 40938 + }, + { + "epoch": 0.04, + "learning_rate": 4.979448151760501e-05, + "loss": 1.1483, + "step": 40939 + }, + { + "epoch": 0.04, + "learning_rate": 4.979447146648075e-05, + "loss": 1.0791, + "step": 40940 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794461415111744e-05, + "loss": 1.2825, + "step": 40941 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794451363497974e-05, + "loss": 0.9814, + "step": 40942 + }, + { + "epoch": 0.04, + "learning_rate": 4.979444131163944e-05, + "loss": 0.9849, + "step": 40943 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794431259536144e-05, + "loss": 1.0751, + "step": 40944 + }, + { + "epoch": 0.04, + "learning_rate": 4.979442120718809e-05, + "loss": 1.2128, + "step": 40945 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794411154595286e-05, + "loss": 1.6338, + "step": 40946 + }, + { + "epoch": 0.04, + "learning_rate": 4.979440110175771e-05, + "loss": 1.326, + "step": 40947 + }, + { + "epoch": 0.04, + "learning_rate": 4.979439104867538e-05, + "loss": 1.1183, + "step": 40948 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794380995348285e-05, + "loss": 0.8999, + "step": 40949 + }, + { + "epoch": 0.04, + "learning_rate": 4.979437094177644e-05, + "loss": 1.1321, + "step": 40950 + }, + { + "epoch": 0.04, + "learning_rate": 4.979436088795982e-05, + "loss": 1.1497, + "step": 40951 + }, + { + "epoch": 0.04, + "learning_rate": 4.979435083389845e-05, + "loss": 1.0581, + "step": 40952 + }, + { + "epoch": 0.04, + "learning_rate": 4.979434077959232e-05, + "loss": 0.9533, + "step": 40953 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794330725041427e-05, + "loss": 1.0292, + "step": 40954 + }, + { + "epoch": 0.04, + "learning_rate": 4.979432067024578e-05, + "loss": 1.0483, + "step": 40955 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794310615205375e-05, + "loss": 1.1424, + "step": 40956 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794300559920205e-05, + "loss": 0.9158, + "step": 40957 + }, + { + "epoch": 0.04, + "learning_rate": 4.979429050439028e-05, + "loss": 0.9945, + "step": 40958 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794280448615595e-05, + "loss": 1.3328, + "step": 40959 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794270392596156e-05, + "loss": 1.2378, + "step": 40960 + }, + { + "epoch": 0.04, + "learning_rate": 4.979426033633195e-05, + "loss": 0.9737, + "step": 40961 + }, + { + "epoch": 0.04, + "learning_rate": 4.979425027982299e-05, + "loss": 1.0008, + "step": 40962 + }, + { + "epoch": 0.04, + "learning_rate": 4.979424022306927e-05, + "loss": 0.9552, + "step": 40963 + }, + { + "epoch": 0.04, + "learning_rate": 4.979423016607079e-05, + "loss": 1.2045, + "step": 40964 + }, + { + "epoch": 0.04, + "learning_rate": 4.979422010882756e-05, + "loss": 1.003, + "step": 40965 + }, + { + "epoch": 0.04, + "learning_rate": 4.979421005133956e-05, + "loss": 0.8391, + "step": 40966 + }, + { + "epoch": 0.04, + "learning_rate": 4.979419999360682e-05, + "loss": 1.1561, + "step": 40967 + }, + { + "epoch": 0.04, + "learning_rate": 4.97941899356293e-05, + "loss": 0.929, + "step": 40968 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794179877407035e-05, + "loss": 0.7954, + "step": 40969 + }, + { + "epoch": 0.04, + "learning_rate": 4.979416981894001e-05, + "loss": 0.9071, + "step": 40970 + }, + { + "epoch": 0.04, + "learning_rate": 4.979415976022823e-05, + "loss": 1.4266, + "step": 40971 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794149701271695e-05, + "loss": 1.3173, + "step": 40972 + }, + { + "epoch": 0.04, + "learning_rate": 4.979413964207039e-05, + "loss": 0.8479, + "step": 40973 + }, + { + "epoch": 0.04, + "learning_rate": 4.979412958262434e-05, + "loss": 1.2729, + "step": 40974 + }, + { + "epoch": 0.04, + "learning_rate": 4.979411952293353e-05, + "loss": 1.157, + "step": 40975 + }, + { + "epoch": 0.04, + "learning_rate": 4.979410946299796e-05, + "loss": 1.0183, + "step": 40976 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794099402817627e-05, + "loss": 1.0359, + "step": 40977 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794089342392545e-05, + "loss": 1.1713, + "step": 40978 + }, + { + "epoch": 0.04, + "learning_rate": 4.979407928172271e-05, + "loss": 0.9039, + "step": 40979 + }, + { + "epoch": 0.04, + "learning_rate": 4.979406922080811e-05, + "loss": 0.9003, + "step": 40980 + }, + { + "epoch": 0.04, + "learning_rate": 4.979405915964876e-05, + "loss": 2.0488, + "step": 40981 + }, + { + "epoch": 0.04, + "learning_rate": 4.979404909824465e-05, + "loss": 1.7172, + "step": 40982 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794039036595785e-05, + "loss": 1.0283, + "step": 40983 + }, + { + "epoch": 0.04, + "learning_rate": 4.9794028974702166e-05, + "loss": 1.0857, + "step": 40984 + }, + { + "epoch": 0.04, + "learning_rate": 4.979401891256379e-05, + "loss": 1.1042, + "step": 40985 + }, + { + "epoch": 0.04, + "learning_rate": 4.979400885018065e-05, + "loss": 1.1553, + "step": 40986 + }, + { + "epoch": 0.04, + "learning_rate": 4.979399878755276e-05, + "loss": 1.2124, + "step": 40987 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793988724680114e-05, + "loss": 1.229, + "step": 40988 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793978661562715e-05, + "loss": 0.9783, + "step": 40989 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793968598200557e-05, + "loss": 0.8406, + "step": 40990 + }, + { + "epoch": 0.04, + "learning_rate": 4.979395853459365e-05, + "loss": 0.8865, + "step": 40991 + }, + { + "epoch": 0.04, + "learning_rate": 4.979394847074197e-05, + "loss": 0.9932, + "step": 40992 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793938406645544e-05, + "loss": 1.0827, + "step": 40993 + }, + { + "epoch": 0.04, + "learning_rate": 4.979392834230437e-05, + "loss": 0.8926, + "step": 40994 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793918277718434e-05, + "loss": 1.0067, + "step": 40995 + }, + { + "epoch": 0.04, + "learning_rate": 4.979390821288774e-05, + "loss": 0.9064, + "step": 40996 + }, + { + "epoch": 0.04, + "learning_rate": 4.97938981478123e-05, + "loss": 1.0775, + "step": 40997 + }, + { + "epoch": 0.04, + "learning_rate": 4.97938880824921e-05, + "loss": 1.1318, + "step": 40998 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793878016927145e-05, + "loss": 0.9529, + "step": 40999 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793867951117434e-05, + "loss": 1.0216, + "step": 41000 + }, + { + "epoch": 0.04, + "eval_loss": 1.0827760696411133, + "eval_runtime": 130.4454, + "eval_samples_per_second": 10.617, + "eval_steps_per_second": 5.313, + "step": 41000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793857885062965e-05, + "loss": 0.939, + "step": 41001 + }, + { + "epoch": 0.04, + "learning_rate": 4.979384781876375e-05, + "loss": 1.2295, + "step": 41002 + }, + { + "epoch": 0.04, + "learning_rate": 4.979383775221978e-05, + "loss": 1.3058, + "step": 41003 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793827685431046e-05, + "loss": 1.3647, + "step": 41004 + }, + { + "epoch": 0.04, + "learning_rate": 4.979381761839757e-05, + "loss": 1.0407, + "step": 41005 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793807551119334e-05, + "loss": 1.1236, + "step": 41006 + }, + { + "epoch": 0.04, + "learning_rate": 4.979379748359635e-05, + "loss": 1.0465, + "step": 41007 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793787415828595e-05, + "loss": 0.9899, + "step": 41008 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793777347816104e-05, + "loss": 0.9079, + "step": 41009 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793767279558854e-05, + "loss": 0.8553, + "step": 41010 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793757211056846e-05, + "loss": 1.5017, + "step": 41011 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793747142310086e-05, + "loss": 1.4316, + "step": 41012 + }, + { + "epoch": 0.04, + "learning_rate": 4.979373707331858e-05, + "loss": 1.1193, + "step": 41013 + }, + { + "epoch": 0.04, + "learning_rate": 4.979372700408231e-05, + "loss": 1.1392, + "step": 41014 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793716934601295e-05, + "loss": 1.0666, + "step": 41015 + }, + { + "epoch": 0.04, + "learning_rate": 4.979370686487552e-05, + "loss": 0.9475, + "step": 41016 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793696794905e-05, + "loss": 0.9462, + "step": 41017 + }, + { + "epoch": 0.04, + "learning_rate": 4.979368672468972e-05, + "loss": 0.7857, + "step": 41018 + }, + { + "epoch": 0.04, + "learning_rate": 4.979367665422969e-05, + "loss": 0.8213, + "step": 41019 + }, + { + "epoch": 0.04, + "learning_rate": 4.979366658352491e-05, + "loss": 1.3567, + "step": 41020 + }, + { + "epoch": 0.04, + "learning_rate": 4.979365651257537e-05, + "loss": 0.8561, + "step": 41021 + }, + { + "epoch": 0.04, + "learning_rate": 4.979364644138108e-05, + "loss": 1.0883, + "step": 41022 + }, + { + "epoch": 0.04, + "learning_rate": 4.979363636994204e-05, + "loss": 1.0243, + "step": 41023 + }, + { + "epoch": 0.04, + "learning_rate": 4.979362629825825e-05, + "loss": 1.2735, + "step": 41024 + }, + { + "epoch": 0.04, + "learning_rate": 4.979361622632971e-05, + "loss": 1.1357, + "step": 41025 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793606154156405e-05, + "loss": 0.9827, + "step": 41026 + }, + { + "epoch": 0.04, + "learning_rate": 4.979359608173836e-05, + "loss": 0.9403, + "step": 41027 + }, + { + "epoch": 0.04, + "learning_rate": 4.979358600907556e-05, + "loss": 0.9605, + "step": 41028 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793575936168e-05, + "loss": 0.945, + "step": 41029 + }, + { + "epoch": 0.04, + "learning_rate": 4.97935658630157e-05, + "loss": 0.8344, + "step": 41030 + }, + { + "epoch": 0.04, + "learning_rate": 4.979355578961864e-05, + "loss": 0.7077, + "step": 41031 + }, + { + "epoch": 0.04, + "learning_rate": 4.979354571597684e-05, + "loss": 1.1085, + "step": 41032 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793535642090276e-05, + "loss": 1.1478, + "step": 41033 + }, + { + "epoch": 0.04, + "learning_rate": 4.979352556795897e-05, + "loss": 0.4995, + "step": 41034 + }, + { + "epoch": 0.04, + "learning_rate": 4.97935154935829e-05, + "loss": 0.3819, + "step": 41035 + }, + { + "epoch": 0.04, + "learning_rate": 4.97935054189621e-05, + "loss": 0.7369, + "step": 41036 + }, + { + "epoch": 0.04, + "learning_rate": 4.979349534409653e-05, + "loss": 0.9669, + "step": 41037 + }, + { + "epoch": 0.04, + "learning_rate": 4.979348526898621e-05, + "loss": 1.6522, + "step": 41038 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793475193631144e-05, + "loss": 1.0046, + "step": 41039 + }, + { + "epoch": 0.04, + "learning_rate": 4.979346511803133e-05, + "loss": 1.3287, + "step": 41040 + }, + { + "epoch": 0.04, + "learning_rate": 4.979345504218677e-05, + "loss": 1.13, + "step": 41041 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793444966097445e-05, + "loss": 0.9385, + "step": 41042 + }, + { + "epoch": 0.04, + "learning_rate": 4.979343488976338e-05, + "loss": 0.9134, + "step": 41043 + }, + { + "epoch": 0.04, + "learning_rate": 4.979342481318456e-05, + "loss": 1.1597, + "step": 41044 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793414736360986e-05, + "loss": 2.0257, + "step": 41045 + }, + { + "epoch": 0.04, + "learning_rate": 4.979340465929267e-05, + "loss": 1.4026, + "step": 41046 + }, + { + "epoch": 0.04, + "learning_rate": 4.97933945819796e-05, + "loss": 0.9137, + "step": 41047 + }, + { + "epoch": 0.04, + "learning_rate": 4.979338450442178e-05, + "loss": 0.83, + "step": 41048 + }, + { + "epoch": 0.04, + "learning_rate": 4.979337442661921e-05, + "loss": 1.0749, + "step": 41049 + }, + { + "epoch": 0.04, + "learning_rate": 4.979336434857189e-05, + "loss": 1.1871, + "step": 41050 + }, + { + "epoch": 0.04, + "learning_rate": 4.979335427027982e-05, + "loss": 1.1551, + "step": 41051 + }, + { + "epoch": 0.04, + "learning_rate": 4.979334419174301e-05, + "loss": 1.0977, + "step": 41052 + }, + { + "epoch": 0.04, + "learning_rate": 4.979333411296144e-05, + "loss": 1.008, + "step": 41053 + }, + { + "epoch": 0.04, + "learning_rate": 4.979332403393513e-05, + "loss": 1.0999, + "step": 41054 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793313954664054e-05, + "loss": 1.3, + "step": 41055 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793303875148246e-05, + "loss": 1.0094, + "step": 41056 + }, + { + "epoch": 0.04, + "learning_rate": 4.979329379538768e-05, + "loss": 0.9381, + "step": 41057 + }, + { + "epoch": 0.04, + "learning_rate": 4.979328371538237e-05, + "loss": 1.1652, + "step": 41058 + }, + { + "epoch": 0.04, + "learning_rate": 4.979327363513231e-05, + "loss": 0.785, + "step": 41059 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793263554637494e-05, + "loss": 1.0145, + "step": 41060 + }, + { + "epoch": 0.04, + "learning_rate": 4.979325347389794e-05, + "loss": 0.7387, + "step": 41061 + }, + { + "epoch": 0.04, + "learning_rate": 4.979324339291363e-05, + "loss": 1.1376, + "step": 41062 + }, + { + "epoch": 0.04, + "learning_rate": 4.979323331168457e-05, + "loss": 0.9783, + "step": 41063 + }, + { + "epoch": 0.04, + "learning_rate": 4.979322323021077e-05, + "loss": 0.8317, + "step": 41064 + }, + { + "epoch": 0.04, + "learning_rate": 4.979321314849221e-05, + "loss": 1.31, + "step": 41065 + }, + { + "epoch": 0.04, + "learning_rate": 4.979320306652891e-05, + "loss": 0.9026, + "step": 41066 + }, + { + "epoch": 0.04, + "learning_rate": 4.979319298432087e-05, + "loss": 1.1282, + "step": 41067 + }, + { + "epoch": 0.04, + "learning_rate": 4.979318290186807e-05, + "loss": 0.8821, + "step": 41068 + }, + { + "epoch": 0.04, + "learning_rate": 4.979317281917052e-05, + "loss": 1.0077, + "step": 41069 + }, + { + "epoch": 0.04, + "learning_rate": 4.979316273622823e-05, + "loss": 0.9351, + "step": 41070 + }, + { + "epoch": 0.04, + "learning_rate": 4.979315265304119e-05, + "loss": 1.0323, + "step": 41071 + }, + { + "epoch": 0.04, + "learning_rate": 4.979314256960941e-05, + "loss": 0.853, + "step": 41072 + }, + { + "epoch": 0.04, + "learning_rate": 4.979313248593287e-05, + "loss": 1.3601, + "step": 41073 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793122402011584e-05, + "loss": 0.9759, + "step": 41074 + }, + { + "epoch": 0.04, + "learning_rate": 4.979311231784556e-05, + "loss": 0.8499, + "step": 41075 + }, + { + "epoch": 0.04, + "learning_rate": 4.979310223343478e-05, + "loss": 0.8892, + "step": 41076 + }, + { + "epoch": 0.04, + "learning_rate": 4.979309214877925e-05, + "loss": 1.0848, + "step": 41077 + }, + { + "epoch": 0.04, + "learning_rate": 4.979308206387898e-05, + "loss": 1.1487, + "step": 41078 + }, + { + "epoch": 0.04, + "learning_rate": 4.979307197873396e-05, + "loss": 0.6095, + "step": 41079 + }, + { + "epoch": 0.04, + "learning_rate": 4.97930618933442e-05, + "loss": 0.9847, + "step": 41080 + }, + { + "epoch": 0.04, + "learning_rate": 4.979305180770969e-05, + "loss": 1.1438, + "step": 41081 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793041721830434e-05, + "loss": 1.1241, + "step": 41082 + }, + { + "epoch": 0.04, + "learning_rate": 4.979303163570643e-05, + "loss": 1.1474, + "step": 41083 + }, + { + "epoch": 0.04, + "learning_rate": 4.979302154933768e-05, + "loss": 1.151, + "step": 41084 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793011462724185e-05, + "loss": 1.1381, + "step": 41085 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793001375865936e-05, + "loss": 1.0368, + "step": 41086 + }, + { + "epoch": 0.04, + "learning_rate": 4.979299128876295e-05, + "loss": 0.8393, + "step": 41087 + }, + { + "epoch": 0.04, + "learning_rate": 4.979298120141521e-05, + "loss": 1.0045, + "step": 41088 + }, + { + "epoch": 0.04, + "learning_rate": 4.979297111382273e-05, + "loss": 1.0442, + "step": 41089 + }, + { + "epoch": 0.04, + "learning_rate": 4.979296102598551e-05, + "loss": 1.2169, + "step": 41090 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792950937903535e-05, + "loss": 1.2859, + "step": 41091 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792940849576816e-05, + "loss": 0.9384, + "step": 41092 + }, + { + "epoch": 0.04, + "learning_rate": 4.979293076100535e-05, + "loss": 1.0186, + "step": 41093 + }, + { + "epoch": 0.04, + "learning_rate": 4.979292067218915e-05, + "loss": 0.9974, + "step": 41094 + }, + { + "epoch": 0.04, + "learning_rate": 4.979291058312819e-05, + "loss": 1.2048, + "step": 41095 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792900493822494e-05, + "loss": 0.8147, + "step": 41096 + }, + { + "epoch": 0.04, + "learning_rate": 4.979289040427205e-05, + "loss": 0.4064, + "step": 41097 + }, + { + "epoch": 0.04, + "learning_rate": 4.979288031447685e-05, + "loss": 1.1764, + "step": 41098 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792870224436924e-05, + "loss": 1.1032, + "step": 41099 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792860134152244e-05, + "loss": 1.031, + "step": 41100 + }, + { + "epoch": 0.04, + "learning_rate": 4.979285004362282e-05, + "loss": 1.0201, + "step": 41101 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792839952848655e-05, + "loss": 1.3128, + "step": 41102 + }, + { + "epoch": 0.04, + "learning_rate": 4.979282986182974e-05, + "loss": 1.095, + "step": 41103 + }, + { + "epoch": 0.04, + "learning_rate": 4.979281977056608e-05, + "loss": 1.0507, + "step": 41104 + }, + { + "epoch": 0.04, + "learning_rate": 4.979280967905768e-05, + "loss": 0.8423, + "step": 41105 + }, + { + "epoch": 0.04, + "learning_rate": 4.979279958730454e-05, + "loss": 0.9229, + "step": 41106 + }, + { + "epoch": 0.04, + "learning_rate": 4.979278949530665e-05, + "loss": 0.375, + "step": 41107 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792779403064014e-05, + "loss": 0.059, + "step": 41108 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792769310576635e-05, + "loss": 0.0623, + "step": 41109 + }, + { + "epoch": 0.04, + "learning_rate": 4.979275921784452e-05, + "loss": 0.0636, + "step": 41110 + }, + { + "epoch": 0.04, + "learning_rate": 4.979274912486765e-05, + "loss": 0.0459, + "step": 41111 + }, + { + "epoch": 0.04, + "learning_rate": 4.979273903164604e-05, + "loss": 0.0296, + "step": 41112 + }, + { + "epoch": 0.04, + "learning_rate": 4.979272893817969e-05, + "loss": 0.0352, + "step": 41113 + }, + { + "epoch": 0.04, + "learning_rate": 4.97927188444686e-05, + "loss": 0.0648, + "step": 41114 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792708750512754e-05, + "loss": 0.0686, + "step": 41115 + }, + { + "epoch": 0.04, + "learning_rate": 4.979269865631217e-05, + "loss": 0.0384, + "step": 41116 + }, + { + "epoch": 0.04, + "learning_rate": 4.979268856186685e-05, + "loss": 0.3288, + "step": 41117 + }, + { + "epoch": 0.04, + "learning_rate": 4.979267846717678e-05, + "loss": 0.9563, + "step": 41118 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792668372241965e-05, + "loss": 0.9358, + "step": 41119 + }, + { + "epoch": 0.04, + "learning_rate": 4.979265827706242e-05, + "loss": 0.6838, + "step": 41120 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792648181638124e-05, + "loss": 0.5481, + "step": 41121 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792638085969086e-05, + "loss": 0.9899, + "step": 41122 + }, + { + "epoch": 0.04, + "learning_rate": 4.979262799005531e-05, + "loss": 1.04, + "step": 41123 + }, + { + "epoch": 0.04, + "learning_rate": 4.979261789389678e-05, + "loss": 0.6783, + "step": 41124 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792607797493515e-05, + "loss": 1.1898, + "step": 41125 + }, + { + "epoch": 0.04, + "learning_rate": 4.97925977008455e-05, + "loss": 1.2219, + "step": 41126 + }, + { + "epoch": 0.04, + "learning_rate": 4.979258760395275e-05, + "loss": 0.9756, + "step": 41127 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792577506815264e-05, + "loss": 0.7528, + "step": 41128 + }, + { + "epoch": 0.04, + "learning_rate": 4.979256740943303e-05, + "loss": 0.8252, + "step": 41129 + }, + { + "epoch": 0.04, + "learning_rate": 4.979255731180605e-05, + "loss": 0.76, + "step": 41130 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792547213934336e-05, + "loss": 0.9482, + "step": 41131 + }, + { + "epoch": 0.04, + "learning_rate": 4.979253711581788e-05, + "loss": 0.8926, + "step": 41132 + }, + { + "epoch": 0.04, + "learning_rate": 4.979252701745668e-05, + "loss": 0.8482, + "step": 41133 + }, + { + "epoch": 0.04, + "learning_rate": 4.979251691885074e-05, + "loss": 0.5956, + "step": 41134 + }, + { + "epoch": 0.04, + "learning_rate": 4.979250682000005e-05, + "loss": 0.3637, + "step": 41135 + }, + { + "epoch": 0.04, + "learning_rate": 4.979249672090463e-05, + "loss": 0.696, + "step": 41136 + }, + { + "epoch": 0.04, + "learning_rate": 4.979248662156447e-05, + "loss": 0.9896, + "step": 41137 + }, + { + "epoch": 0.04, + "learning_rate": 4.979247652197956e-05, + "loss": 0.9104, + "step": 41138 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792466422149906e-05, + "loss": 1.3247, + "step": 41139 + }, + { + "epoch": 0.04, + "learning_rate": 4.979245632207552e-05, + "loss": 1.2672, + "step": 41140 + }, + { + "epoch": 0.04, + "learning_rate": 4.97924462217564e-05, + "loss": 1.2489, + "step": 41141 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792436121192525e-05, + "loss": 1.1041, + "step": 41142 + }, + { + "epoch": 0.04, + "learning_rate": 4.979242602038392e-05, + "loss": 1.1169, + "step": 41143 + }, + { + "epoch": 0.04, + "learning_rate": 4.979241591933057e-05, + "loss": 1.0832, + "step": 41144 + }, + { + "epoch": 0.04, + "learning_rate": 4.979240581803247e-05, + "loss": 0.7253, + "step": 41145 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792395716489647e-05, + "loss": 0.7662, + "step": 41146 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792385614702076e-05, + "loss": 0.8042, + "step": 41147 + }, + { + "epoch": 0.04, + "learning_rate": 4.979237551266977e-05, + "loss": 0.9164, + "step": 41148 + }, + { + "epoch": 0.04, + "learning_rate": 4.979236541039271e-05, + "loss": 1.0141, + "step": 41149 + }, + { + "epoch": 0.04, + "learning_rate": 4.979235530787093e-05, + "loss": 0.8994, + "step": 41150 + }, + { + "epoch": 0.04, + "learning_rate": 4.97923452051044e-05, + "loss": 0.8838, + "step": 41151 + }, + { + "epoch": 0.04, + "learning_rate": 4.979233510209312e-05, + "loss": 0.6715, + "step": 41152 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792324998837115e-05, + "loss": 1.0137, + "step": 41153 + }, + { + "epoch": 0.04, + "learning_rate": 4.979231489533637e-05, + "loss": 1.0871, + "step": 41154 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792304791590885e-05, + "loss": 1.121, + "step": 41155 + }, + { + "epoch": 0.04, + "learning_rate": 4.979229468760065e-05, + "loss": 0.9135, + "step": 41156 + }, + { + "epoch": 0.04, + "learning_rate": 4.979228458336569e-05, + "loss": 0.9271, + "step": 41157 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792274478885984e-05, + "loss": 1.734, + "step": 41158 + }, + { + "epoch": 0.04, + "learning_rate": 4.979226437416153e-05, + "loss": 1.748, + "step": 41159 + }, + { + "epoch": 0.04, + "learning_rate": 4.979225426919236e-05, + "loss": 1.4903, + "step": 41160 + }, + { + "epoch": 0.04, + "learning_rate": 4.979224416397844e-05, + "loss": 1.3612, + "step": 41161 + }, + { + "epoch": 0.04, + "learning_rate": 4.979223405851977e-05, + "loss": 1.0017, + "step": 41162 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792223952816375e-05, + "loss": 1.029, + "step": 41163 + }, + { + "epoch": 0.04, + "learning_rate": 4.979221384686824e-05, + "loss": 0.7353, + "step": 41164 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792203740675366e-05, + "loss": 1.2257, + "step": 41165 + }, + { + "epoch": 0.04, + "learning_rate": 4.979219363423775e-05, + "loss": 0.8683, + "step": 41166 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792183527555404e-05, + "loss": 1.1169, + "step": 41167 + }, + { + "epoch": 0.04, + "learning_rate": 4.979217342062831e-05, + "loss": 0.9933, + "step": 41168 + }, + { + "epoch": 0.04, + "learning_rate": 4.979216331345649e-05, + "loss": 1.2589, + "step": 41169 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792153206039916e-05, + "loss": 0.9823, + "step": 41170 + }, + { + "epoch": 0.04, + "learning_rate": 4.979214309837862e-05, + "loss": 1.0913, + "step": 41171 + }, + { + "epoch": 0.04, + "learning_rate": 4.979213299047257e-05, + "loss": 0.8954, + "step": 41172 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792122882321804e-05, + "loss": 1.1442, + "step": 41173 + }, + { + "epoch": 0.04, + "learning_rate": 4.979211277392628e-05, + "loss": 1.0027, + "step": 41174 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792102665286036e-05, + "loss": 0.7512, + "step": 41175 + }, + { + "epoch": 0.04, + "learning_rate": 4.979209255640104e-05, + "loss": 0.9932, + "step": 41176 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792082447271314e-05, + "loss": 0.8733, + "step": 41177 + }, + { + "epoch": 0.04, + "learning_rate": 4.979207233789685e-05, + "loss": 1.2919, + "step": 41178 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792062228277646e-05, + "loss": 0.8648, + "step": 41179 + }, + { + "epoch": 0.04, + "learning_rate": 4.979205211841371e-05, + "loss": 0.8247, + "step": 41180 + }, + { + "epoch": 0.04, + "learning_rate": 4.979204200830504e-05, + "loss": 0.8047, + "step": 41181 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792031897951627e-05, + "loss": 1.2789, + "step": 41182 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792021787353484e-05, + "loss": 1.1336, + "step": 41183 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792011676510596e-05, + "loss": 1.0478, + "step": 41184 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792001565422976e-05, + "loss": 1.1668, + "step": 41185 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791991454090625e-05, + "loss": 1.4286, + "step": 41186 + }, + { + "epoch": 0.04, + "learning_rate": 4.979198134251353e-05, + "loss": 0.9207, + "step": 41187 + }, + { + "epoch": 0.04, + "learning_rate": 4.97919712306917e-05, + "loss": 1.2979, + "step": 41188 + }, + { + "epoch": 0.04, + "learning_rate": 4.979196111862514e-05, + "loss": 1.2578, + "step": 41189 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791951006313844e-05, + "loss": 1.1883, + "step": 41190 + }, + { + "epoch": 0.04, + "learning_rate": 4.97919408937578e-05, + "loss": 1.0239, + "step": 41191 + }, + { + "epoch": 0.04, + "learning_rate": 4.979193078095703e-05, + "loss": 0.9507, + "step": 41192 + }, + { + "epoch": 0.04, + "learning_rate": 4.979192066791153e-05, + "loss": 1.0876, + "step": 41193 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791910554621284e-05, + "loss": 0.8698, + "step": 41194 + }, + { + "epoch": 0.04, + "learning_rate": 4.97919004410863e-05, + "loss": 1.0752, + "step": 41195 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791890327306595e-05, + "loss": 0.7877, + "step": 41196 + }, + { + "epoch": 0.04, + "learning_rate": 4.979188021328215e-05, + "loss": 0.9128, + "step": 41197 + }, + { + "epoch": 0.04, + "learning_rate": 4.979187009901296e-05, + "loss": 1.1592, + "step": 41198 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791859984499044e-05, + "loss": 1.1503, + "step": 41199 + }, + { + "epoch": 0.04, + "learning_rate": 4.979184986974039e-05, + "loss": 1.2841, + "step": 41200 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791839754737005e-05, + "loss": 1.1385, + "step": 41201 + }, + { + "epoch": 0.04, + "learning_rate": 4.979182963948889e-05, + "loss": 0.5703, + "step": 41202 + }, + { + "epoch": 0.04, + "learning_rate": 4.979181952399603e-05, + "loss": 0.713, + "step": 41203 + }, + { + "epoch": 0.04, + "learning_rate": 4.979180940825844e-05, + "loss": 0.6471, + "step": 41204 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791799292276115e-05, + "loss": 0.8482, + "step": 41205 + }, + { + "epoch": 0.04, + "learning_rate": 4.979178917604905e-05, + "loss": 1.0606, + "step": 41206 + }, + { + "epoch": 0.04, + "learning_rate": 4.979177905957726e-05, + "loss": 1.1813, + "step": 41207 + }, + { + "epoch": 0.04, + "learning_rate": 4.979176894286074e-05, + "loss": 1.4641, + "step": 41208 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791758825899474e-05, + "loss": 1.1416, + "step": 41209 + }, + { + "epoch": 0.04, + "learning_rate": 4.979174870869348e-05, + "loss": 1.1356, + "step": 41210 + }, + { + "epoch": 0.04, + "learning_rate": 4.979173859124276e-05, + "loss": 1.0445, + "step": 41211 + }, + { + "epoch": 0.04, + "learning_rate": 4.979172847354729e-05, + "loss": 1.3935, + "step": 41212 + }, + { + "epoch": 0.04, + "learning_rate": 4.97917183556071e-05, + "loss": 0.7645, + "step": 41213 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791708237422164e-05, + "loss": 0.4798, + "step": 41214 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791698118992504e-05, + "loss": 0.3519, + "step": 41215 + }, + { + "epoch": 0.04, + "learning_rate": 4.979168800031811e-05, + "loss": 0.2662, + "step": 41216 + }, + { + "epoch": 0.04, + "learning_rate": 4.979167788139898e-05, + "loss": 0.2123, + "step": 41217 + }, + { + "epoch": 0.04, + "learning_rate": 4.979166776223512e-05, + "loss": 0.2305, + "step": 41218 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791657642826526e-05, + "loss": 0.2222, + "step": 41219 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791647523173195e-05, + "loss": 0.1001, + "step": 41220 + }, + { + "epoch": 0.04, + "learning_rate": 4.979163740327514e-05, + "loss": 0.2893, + "step": 41221 + }, + { + "epoch": 0.04, + "learning_rate": 4.979162728313235e-05, + "loss": 0.3024, + "step": 41222 + }, + { + "epoch": 0.04, + "learning_rate": 4.979161716274482e-05, + "loss": 0.3084, + "step": 41223 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791607042112566e-05, + "loss": 0.2829, + "step": 41224 + }, + { + "epoch": 0.04, + "learning_rate": 4.979159692123557e-05, + "loss": 0.5217, + "step": 41225 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791586800113855e-05, + "loss": 1.0504, + "step": 41226 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791576678747396e-05, + "loss": 0.4616, + "step": 41227 + }, + { + "epoch": 0.04, + "learning_rate": 4.979156655713621e-05, + "loss": 0.8988, + "step": 41228 + }, + { + "epoch": 0.04, + "learning_rate": 4.97915564352803e-05, + "loss": 0.9871, + "step": 41229 + }, + { + "epoch": 0.04, + "learning_rate": 4.979154631317964e-05, + "loss": 1.0571, + "step": 41230 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791536190834264e-05, + "loss": 1.0006, + "step": 41231 + }, + { + "epoch": 0.04, + "learning_rate": 4.979152606824415e-05, + "loss": 1.0851, + "step": 41232 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791515945409306e-05, + "loss": 0.9599, + "step": 41233 + }, + { + "epoch": 0.04, + "learning_rate": 4.979150582232973e-05, + "loss": 0.9435, + "step": 41234 + }, + { + "epoch": 0.04, + "learning_rate": 4.979149569900542e-05, + "loss": 1.1359, + "step": 41235 + }, + { + "epoch": 0.04, + "learning_rate": 4.979148557543638e-05, + "loss": 1.2216, + "step": 41236 + }, + { + "epoch": 0.04, + "learning_rate": 4.979147545162262e-05, + "loss": 0.9071, + "step": 41237 + }, + { + "epoch": 0.04, + "learning_rate": 4.979146532756411e-05, + "loss": 0.9262, + "step": 41238 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791455203260886e-05, + "loss": 0.9061, + "step": 41239 + }, + { + "epoch": 0.04, + "learning_rate": 4.979144507871292e-05, + "loss": 0.5216, + "step": 41240 + }, + { + "epoch": 0.04, + "learning_rate": 4.979143495392022e-05, + "loss": 0.9255, + "step": 41241 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791424828882805e-05, + "loss": 0.9883, + "step": 41242 + }, + { + "epoch": 0.04, + "learning_rate": 4.979141470360065e-05, + "loss": 1.3282, + "step": 41243 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791404578073766e-05, + "loss": 0.945, + "step": 41244 + }, + { + "epoch": 0.04, + "learning_rate": 4.979139445230215e-05, + "loss": 0.9848, + "step": 41245 + }, + { + "epoch": 0.04, + "learning_rate": 4.979138432628581e-05, + "loss": 0.9453, + "step": 41246 + }, + { + "epoch": 0.04, + "learning_rate": 4.979137420002473e-05, + "loss": 0.2338, + "step": 41247 + }, + { + "epoch": 0.04, + "learning_rate": 4.979136407351893e-05, + "loss": 0.8352, + "step": 41248 + }, + { + "epoch": 0.04, + "learning_rate": 4.979135394676839e-05, + "loss": 1.0176, + "step": 41249 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791343819773126e-05, + "loss": 1.0191, + "step": 41250 + }, + { + "epoch": 0.04, + "learning_rate": 4.979133369253314e-05, + "loss": 0.9374, + "step": 41251 + }, + { + "epoch": 0.04, + "learning_rate": 4.979132356504841e-05, + "loss": 0.8733, + "step": 41252 + }, + { + "epoch": 0.04, + "learning_rate": 4.979131343731895e-05, + "loss": 0.8893, + "step": 41253 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791303309344774e-05, + "loss": 1.053, + "step": 41254 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791293181125866e-05, + "loss": 1.3469, + "step": 41255 + }, + { + "epoch": 0.04, + "learning_rate": 4.979128305266222e-05, + "loss": 1.029, + "step": 41256 + }, + { + "epoch": 0.04, + "learning_rate": 4.979127292395385e-05, + "loss": 1.0788, + "step": 41257 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791262795000745e-05, + "loss": 1.3079, + "step": 41258 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791252665802924e-05, + "loss": 1.0385, + "step": 41259 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791242536360365e-05, + "loss": 1.0194, + "step": 41260 + }, + { + "epoch": 0.04, + "learning_rate": 4.979123240667308e-05, + "loss": 0.9519, + "step": 41261 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791222276741065e-05, + "loss": 1.074, + "step": 41262 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791212146564325e-05, + "loss": 1.1525, + "step": 41263 + }, + { + "epoch": 0.04, + "learning_rate": 4.979120201614285e-05, + "loss": 1.2236, + "step": 41264 + }, + { + "epoch": 0.04, + "learning_rate": 4.979119188547665e-05, + "loss": 1.1108, + "step": 41265 + }, + { + "epoch": 0.04, + "learning_rate": 4.979118175456572e-05, + "loss": 0.883, + "step": 41266 + }, + { + "epoch": 0.04, + "learning_rate": 4.979117162341007e-05, + "loss": 0.45, + "step": 41267 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791161492009684e-05, + "loss": 0.2732, + "step": 41268 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791151360364575e-05, + "loss": 0.3392, + "step": 41269 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791141228474734e-05, + "loss": 0.2568, + "step": 41270 + }, + { + "epoch": 0.04, + "learning_rate": 4.979113109634017e-05, + "loss": 0.7965, + "step": 41271 + }, + { + "epoch": 0.04, + "learning_rate": 4.979112096396087e-05, + "loss": 1.2703, + "step": 41272 + }, + { + "epoch": 0.04, + "learning_rate": 4.979111083133685e-05, + "loss": 1.1062, + "step": 41273 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791100698468096e-05, + "loss": 0.9047, + "step": 41274 + }, + { + "epoch": 0.04, + "learning_rate": 4.979109056535462e-05, + "loss": 1.0241, + "step": 41275 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791080431996416e-05, + "loss": 0.3011, + "step": 41276 + }, + { + "epoch": 0.04, + "learning_rate": 4.979107029839348e-05, + "loss": 0.1454, + "step": 41277 + }, + { + "epoch": 0.04, + "learning_rate": 4.979106016454583e-05, + "loss": 1.0615, + "step": 41278 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791050030453445e-05, + "loss": 1.4512, + "step": 41279 + }, + { + "epoch": 0.04, + "learning_rate": 4.979103989611633e-05, + "loss": 1.1346, + "step": 41280 + }, + { + "epoch": 0.04, + "learning_rate": 4.979102976153449e-05, + "loss": 1.1818, + "step": 41281 + }, + { + "epoch": 0.04, + "learning_rate": 4.9791019626707924e-05, + "loss": 1.2401, + "step": 41282 + }, + { + "epoch": 0.04, + "learning_rate": 4.979100949163663e-05, + "loss": 0.9102, + "step": 41283 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790999356320613e-05, + "loss": 0.8178, + "step": 41284 + }, + { + "epoch": 0.04, + "learning_rate": 4.979098922075987e-05, + "loss": 1.1504, + "step": 41285 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790979084954403e-05, + "loss": 1.0782, + "step": 41286 + }, + { + "epoch": 0.04, + "learning_rate": 4.97909689489042e-05, + "loss": 0.8875, + "step": 41287 + }, + { + "epoch": 0.04, + "learning_rate": 4.979095881260928e-05, + "loss": 0.7536, + "step": 41288 + }, + { + "epoch": 0.04, + "learning_rate": 4.979094867606963e-05, + "loss": 0.9272, + "step": 41289 + }, + { + "epoch": 0.04, + "learning_rate": 4.979093853928526e-05, + "loss": 1.0363, + "step": 41290 + }, + { + "epoch": 0.04, + "learning_rate": 4.979092840225615e-05, + "loss": 1.049, + "step": 41291 + }, + { + "epoch": 0.04, + "learning_rate": 4.979091826498233e-05, + "loss": 0.8471, + "step": 41292 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790908127463774e-05, + "loss": 0.5622, + "step": 41293 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790897989700505e-05, + "loss": 0.3371, + "step": 41294 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790887851692505e-05, + "loss": 0.8168, + "step": 41295 + }, + { + "epoch": 0.04, + "learning_rate": 4.979087771343977e-05, + "loss": 0.995, + "step": 41296 + }, + { + "epoch": 0.04, + "learning_rate": 4.979086757494232e-05, + "loss": 1.3143, + "step": 41297 + }, + { + "epoch": 0.04, + "learning_rate": 4.979085743620014e-05, + "loss": 1.0343, + "step": 41298 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790847297213236e-05, + "loss": 0.8775, + "step": 41299 + }, + { + "epoch": 0.04, + "learning_rate": 4.979083715798162e-05, + "loss": 0.7598, + "step": 41300 + }, + { + "epoch": 0.04, + "learning_rate": 4.979082701850526e-05, + "loss": 0.9568, + "step": 41301 + }, + { + "epoch": 0.04, + "learning_rate": 4.979081687878419e-05, + "loss": 1.1404, + "step": 41302 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790806738818384e-05, + "loss": 1.0428, + "step": 41303 + }, + { + "epoch": 0.04, + "learning_rate": 4.979079659860786e-05, + "loss": 1.072, + "step": 41304 + }, + { + "epoch": 0.04, + "learning_rate": 4.979078645815261e-05, + "loss": 1.2259, + "step": 41305 + }, + { + "epoch": 0.04, + "learning_rate": 4.979077631745264e-05, + "loss": 0.9637, + "step": 41306 + }, + { + "epoch": 0.04, + "learning_rate": 4.979076617650794e-05, + "loss": 0.8131, + "step": 41307 + }, + { + "epoch": 0.04, + "learning_rate": 4.979075603531852e-05, + "loss": 1.0763, + "step": 41308 + }, + { + "epoch": 0.04, + "learning_rate": 4.979074589388437e-05, + "loss": 0.9283, + "step": 41309 + }, + { + "epoch": 0.04, + "learning_rate": 4.97907357522055e-05, + "loss": 0.4011, + "step": 41310 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790725610281905e-05, + "loss": 0.804, + "step": 41311 + }, + { + "epoch": 0.04, + "learning_rate": 4.979071546811359e-05, + "loss": 0.7534, + "step": 41312 + }, + { + "epoch": 0.04, + "learning_rate": 4.979070532570055e-05, + "loss": 1.1111, + "step": 41313 + }, + { + "epoch": 0.04, + "learning_rate": 4.979069518304279e-05, + "loss": 0.8077, + "step": 41314 + }, + { + "epoch": 0.04, + "learning_rate": 4.97906850401403e-05, + "loss": 1.2849, + "step": 41315 + }, + { + "epoch": 0.04, + "learning_rate": 4.979067489699309e-05, + "loss": 1.219, + "step": 41316 + }, + { + "epoch": 0.04, + "learning_rate": 4.979066475360116e-05, + "loss": 1.0586, + "step": 41317 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790654609964504e-05, + "loss": 0.8061, + "step": 41318 + }, + { + "epoch": 0.04, + "learning_rate": 4.979064446608312e-05, + "loss": 1.169, + "step": 41319 + }, + { + "epoch": 0.04, + "learning_rate": 4.979063432195702e-05, + "loss": 0.8767, + "step": 41320 + }, + { + "epoch": 0.04, + "learning_rate": 4.97906241775862e-05, + "loss": 1.0211, + "step": 41321 + }, + { + "epoch": 0.04, + "learning_rate": 4.979061403297065e-05, + "loss": 1.0901, + "step": 41322 + }, + { + "epoch": 0.04, + "learning_rate": 4.979060388811038e-05, + "loss": 1.0455, + "step": 41323 + }, + { + "epoch": 0.04, + "learning_rate": 4.979059374300539e-05, + "loss": 1.1865, + "step": 41324 + }, + { + "epoch": 0.04, + "learning_rate": 4.979058359765567e-05, + "loss": 0.9658, + "step": 41325 + }, + { + "epoch": 0.04, + "learning_rate": 4.979057345206124e-05, + "loss": 1.1257, + "step": 41326 + }, + { + "epoch": 0.04, + "learning_rate": 4.979056330622208e-05, + "loss": 1.0041, + "step": 41327 + }, + { + "epoch": 0.04, + "learning_rate": 4.97905531601382e-05, + "loss": 0.8082, + "step": 41328 + }, + { + "epoch": 0.04, + "learning_rate": 4.979054301380959e-05, + "loss": 0.9844, + "step": 41329 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790532867236267e-05, + "loss": 0.8831, + "step": 41330 + }, + { + "epoch": 0.04, + "learning_rate": 4.979052272041822e-05, + "loss": 1.1112, + "step": 41331 + }, + { + "epoch": 0.04, + "learning_rate": 4.979051257335545e-05, + "loss": 1.3952, + "step": 41332 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790502426047966e-05, + "loss": 1.6382, + "step": 41333 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790492278495755e-05, + "loss": 1.2365, + "step": 41334 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790482130698825e-05, + "loss": 0.986, + "step": 41335 + }, + { + "epoch": 0.04, + "learning_rate": 4.979047198265717e-05, + "loss": 1.052, + "step": 41336 + }, + { + "epoch": 0.04, + "learning_rate": 4.979046183437079e-05, + "loss": 0.7259, + "step": 41337 + }, + { + "epoch": 0.04, + "learning_rate": 4.97904516858397e-05, + "loss": 1.1034, + "step": 41338 + }, + { + "epoch": 0.04, + "learning_rate": 4.979044153706388e-05, + "loss": 1.3415, + "step": 41339 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790431388043345e-05, + "loss": 1.0248, + "step": 41340 + }, + { + "epoch": 0.04, + "learning_rate": 4.979042123877809e-05, + "loss": 0.6168, + "step": 41341 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790411089268105e-05, + "loss": 1.3731, + "step": 41342 + }, + { + "epoch": 0.04, + "learning_rate": 4.979040093951342e-05, + "loss": 0.6298, + "step": 41343 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790390789514e-05, + "loss": 1.0784, + "step": 41344 + }, + { + "epoch": 0.04, + "learning_rate": 4.979038063926985e-05, + "loss": 0.9681, + "step": 41345 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790370488780994e-05, + "loss": 0.8467, + "step": 41346 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790360338047414e-05, + "loss": 0.8826, + "step": 41347 + }, + { + "epoch": 0.04, + "learning_rate": 4.979035018706911e-05, + "loss": 2.2049, + "step": 41348 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790340035846095e-05, + "loss": 1.3379, + "step": 41349 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790329884378355e-05, + "loss": 0.8062, + "step": 41350 + }, + { + "epoch": 0.04, + "learning_rate": 4.97903197326659e-05, + "loss": 0.8789, + "step": 41351 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790309580708714e-05, + "loss": 0.9745, + "step": 41352 + }, + { + "epoch": 0.04, + "learning_rate": 4.979029942850682e-05, + "loss": 1.2103, + "step": 41353 + }, + { + "epoch": 0.04, + "learning_rate": 4.97902892760602e-05, + "loss": 1.1818, + "step": 41354 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790279123368864e-05, + "loss": 0.9056, + "step": 41355 + }, + { + "epoch": 0.04, + "learning_rate": 4.979026897043281e-05, + "loss": 1.1903, + "step": 41356 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790258817252036e-05, + "loss": 0.984, + "step": 41357 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790248663826546e-05, + "loss": 1.1765, + "step": 41358 + }, + { + "epoch": 0.04, + "learning_rate": 4.979023851015633e-05, + "loss": 1.305, + "step": 41359 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790228356241396e-05, + "loss": 0.7871, + "step": 41360 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790218202081745e-05, + "loss": 1.0186, + "step": 41361 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790208047677375e-05, + "loss": 0.9245, + "step": 41362 + }, + { + "epoch": 0.04, + "learning_rate": 4.979019789302829e-05, + "loss": 0.9866, + "step": 41363 + }, + { + "epoch": 0.04, + "learning_rate": 4.979018773813449e-05, + "loss": 1.0434, + "step": 41364 + }, + { + "epoch": 0.04, + "learning_rate": 4.979017758299596e-05, + "loss": 0.8409, + "step": 41365 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790167427612724e-05, + "loss": 1.0594, + "step": 41366 + }, + { + "epoch": 0.04, + "learning_rate": 4.979015727198476e-05, + "loss": 0.8824, + "step": 41367 + }, + { + "epoch": 0.04, + "learning_rate": 4.979014711611208e-05, + "loss": 0.793, + "step": 41368 + }, + { + "epoch": 0.04, + "learning_rate": 4.979013695999468e-05, + "loss": 0.9518, + "step": 41369 + }, + { + "epoch": 0.04, + "learning_rate": 4.979012680363257e-05, + "loss": 1.0431, + "step": 41370 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790116647025734e-05, + "loss": 1.1422, + "step": 41371 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790106490174185e-05, + "loss": 1.2858, + "step": 41372 + }, + { + "epoch": 0.04, + "learning_rate": 4.979009633307792e-05, + "loss": 1.1424, + "step": 41373 + }, + { + "epoch": 0.04, + "learning_rate": 4.979008617573694e-05, + "loss": 0.862, + "step": 41374 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790076018151236e-05, + "loss": 0.3523, + "step": 41375 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790065860320814e-05, + "loss": 0.4887, + "step": 41376 + }, + { + "epoch": 0.04, + "learning_rate": 4.979005570224568e-05, + "loss": 1.1707, + "step": 41377 + }, + { + "epoch": 0.04, + "learning_rate": 4.979004554392583e-05, + "loss": 0.9905, + "step": 41378 + }, + { + "epoch": 0.04, + "learning_rate": 4.9790035385361256e-05, + "loss": 1.3182, + "step": 41379 + }, + { + "epoch": 0.04, + "learning_rate": 4.979002522655197e-05, + "loss": 0.7959, + "step": 41380 + }, + { + "epoch": 0.04, + "learning_rate": 4.979001506749797e-05, + "loss": 1.1707, + "step": 41381 + }, + { + "epoch": 0.04, + "learning_rate": 4.979000490819925e-05, + "loss": 0.8291, + "step": 41382 + }, + { + "epoch": 0.04, + "learning_rate": 4.978999474865581e-05, + "loss": 0.7934, + "step": 41383 + }, + { + "epoch": 0.04, + "learning_rate": 4.978998458886766e-05, + "loss": 0.5011, + "step": 41384 + }, + { + "epoch": 0.04, + "learning_rate": 4.978997442883479e-05, + "loss": 1.1467, + "step": 41385 + }, + { + "epoch": 0.04, + "learning_rate": 4.978996426855721e-05, + "loss": 0.6888, + "step": 41386 + }, + { + "epoch": 0.04, + "learning_rate": 4.978995410803491e-05, + "loss": 1.1079, + "step": 41387 + }, + { + "epoch": 0.04, + "learning_rate": 4.978994394726789e-05, + "loss": 1.2962, + "step": 41388 + }, + { + "epoch": 0.04, + "learning_rate": 4.978993378625616e-05, + "loss": 0.8496, + "step": 41389 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789923624999705e-05, + "loss": 1.109, + "step": 41390 + }, + { + "epoch": 0.04, + "learning_rate": 4.978991346349854e-05, + "loss": 1.0008, + "step": 41391 + }, + { + "epoch": 0.04, + "learning_rate": 4.978990330175266e-05, + "loss": 0.9962, + "step": 41392 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789893139762066e-05, + "loss": 0.6585, + "step": 41393 + }, + { + "epoch": 0.04, + "learning_rate": 4.978988297752676e-05, + "loss": 0.8782, + "step": 41394 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789872815046726e-05, + "loss": 1.0086, + "step": 41395 + }, + { + "epoch": 0.04, + "learning_rate": 4.978986265232199e-05, + "loss": 1.1103, + "step": 41396 + }, + { + "epoch": 0.04, + "learning_rate": 4.978985248935253e-05, + "loss": 1.0354, + "step": 41397 + }, + { + "epoch": 0.04, + "learning_rate": 4.978984232613836e-05, + "loss": 0.8904, + "step": 41398 + }, + { + "epoch": 0.04, + "learning_rate": 4.978983216267947e-05, + "loss": 0.9766, + "step": 41399 + }, + { + "epoch": 0.04, + "learning_rate": 4.978982199897587e-05, + "loss": 1.1553, + "step": 41400 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789811835027555e-05, + "loss": 1.6969, + "step": 41401 + }, + { + "epoch": 0.04, + "learning_rate": 4.978980167083452e-05, + "loss": 1.7232, + "step": 41402 + }, + { + "epoch": 0.04, + "learning_rate": 4.978979150639678e-05, + "loss": 1.234, + "step": 41403 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789781341714325e-05, + "loss": 1.1794, + "step": 41404 + }, + { + "epoch": 0.04, + "learning_rate": 4.978977117678715e-05, + "loss": 1.0198, + "step": 41405 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789761011615263e-05, + "loss": 1.1099, + "step": 41406 + }, + { + "epoch": 0.04, + "learning_rate": 4.978975084619866e-05, + "loss": 1.0915, + "step": 41407 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789740680537344e-05, + "loss": 1.0289, + "step": 41408 + }, + { + "epoch": 0.04, + "learning_rate": 4.978973051463132e-05, + "loss": 1.1217, + "step": 41409 + }, + { + "epoch": 0.04, + "learning_rate": 4.978972034848057e-05, + "loss": 1.1149, + "step": 41410 + }, + { + "epoch": 0.04, + "learning_rate": 4.978971018208512e-05, + "loss": 1.0499, + "step": 41411 + }, + { + "epoch": 0.04, + "learning_rate": 4.978970001544494e-05, + "loss": 0.966, + "step": 41412 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789689848560065e-05, + "loss": 1.056, + "step": 41413 + }, + { + "epoch": 0.04, + "learning_rate": 4.978967968143047e-05, + "loss": 1.1158, + "step": 41414 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789669514056155e-05, + "loss": 0.8423, + "step": 41415 + }, + { + "epoch": 0.04, + "learning_rate": 4.978965934643713e-05, + "loss": 0.9034, + "step": 41416 + }, + { + "epoch": 0.04, + "learning_rate": 4.978964917857339e-05, + "loss": 1.4173, + "step": 41417 + }, + { + "epoch": 0.04, + "learning_rate": 4.978963901046494e-05, + "loss": 1.108, + "step": 41418 + }, + { + "epoch": 0.04, + "learning_rate": 4.978962884211178e-05, + "loss": 0.8271, + "step": 41419 + }, + { + "epoch": 0.04, + "learning_rate": 4.97896186735139e-05, + "loss": 0.9005, + "step": 41420 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789608504671314e-05, + "loss": 0.9312, + "step": 41421 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789598335584015e-05, + "loss": 0.8106, + "step": 41422 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789588166252e-05, + "loss": 1.0492, + "step": 41423 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789577996675275e-05, + "loss": 1.0757, + "step": 41424 + }, + { + "epoch": 0.04, + "learning_rate": 4.978956782685384e-05, + "loss": 0.8137, + "step": 41425 + }, + { + "epoch": 0.04, + "learning_rate": 4.978955765678769e-05, + "loss": 1.0106, + "step": 41426 + }, + { + "epoch": 0.04, + "learning_rate": 4.978954748647683e-05, + "loss": 1.4606, + "step": 41427 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789537315921256e-05, + "loss": 1.0561, + "step": 41428 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789527145120964e-05, + "loss": 0.9717, + "step": 41429 + }, + { + "epoch": 0.04, + "learning_rate": 4.978951697407597e-05, + "loss": 0.8606, + "step": 41430 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789506802786255e-05, + "loss": 1.2156, + "step": 41431 + }, + { + "epoch": 0.04, + "learning_rate": 4.978949663125184e-05, + "loss": 1.1101, + "step": 41432 + }, + { + "epoch": 0.04, + "learning_rate": 4.978948645947271e-05, + "loss": 1.222, + "step": 41433 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789476287448866e-05, + "loss": 0.7971, + "step": 41434 + }, + { + "epoch": 0.04, + "learning_rate": 4.978946611518031e-05, + "loss": 0.9198, + "step": 41435 + }, + { + "epoch": 0.04, + "learning_rate": 4.978945594266704e-05, + "loss": 0.8073, + "step": 41436 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789445769909064e-05, + "loss": 1.1486, + "step": 41437 + }, + { + "epoch": 0.04, + "learning_rate": 4.978943559690638e-05, + "loss": 1.0857, + "step": 41438 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789425423658975e-05, + "loss": 1.0554, + "step": 41439 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789415250166874e-05, + "loss": 1.0714, + "step": 41440 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789405076430055e-05, + "loss": 0.9248, + "step": 41441 + }, + { + "epoch": 0.04, + "learning_rate": 4.978939490244852e-05, + "loss": 0.89, + "step": 41442 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789384728222276e-05, + "loss": 1.0103, + "step": 41443 + }, + { + "epoch": 0.04, + "learning_rate": 4.978937455375132e-05, + "loss": 1.2113, + "step": 41444 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789364379035666e-05, + "loss": 1.0159, + "step": 41445 + }, + { + "epoch": 0.04, + "learning_rate": 4.978935420407529e-05, + "loss": 0.9241, + "step": 41446 + }, + { + "epoch": 0.04, + "learning_rate": 4.978934402887021e-05, + "loss": 1.101, + "step": 41447 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789333853420414e-05, + "loss": 1.0591, + "step": 41448 + }, + { + "epoch": 0.04, + "learning_rate": 4.978932367772591e-05, + "loss": 1.024, + "step": 41449 + }, + { + "epoch": 0.04, + "learning_rate": 4.97893135017867e-05, + "loss": 0.7874, + "step": 41450 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789303325602774e-05, + "loss": 0.8313, + "step": 41451 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789293149174146e-05, + "loss": 0.9089, + "step": 41452 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789282972500806e-05, + "loss": 0.9578, + "step": 41453 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789272795582754e-05, + "loss": 1.3403, + "step": 41454 + }, + { + "epoch": 0.04, + "learning_rate": 4.978926261842e-05, + "loss": 1.203, + "step": 41455 + }, + { + "epoch": 0.04, + "learning_rate": 4.978925244101253e-05, + "loss": 0.9004, + "step": 41456 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789242263360354e-05, + "loss": 0.9821, + "step": 41457 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789232085463465e-05, + "loss": 1.0595, + "step": 41458 + }, + { + "epoch": 0.04, + "learning_rate": 4.978922190732187e-05, + "loss": 1.1171, + "step": 41459 + }, + { + "epoch": 0.04, + "learning_rate": 4.978921172893557e-05, + "loss": 1.0264, + "step": 41460 + }, + { + "epoch": 0.04, + "learning_rate": 4.978920155030456e-05, + "loss": 1.1766, + "step": 41461 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789191371428836e-05, + "loss": 0.8881, + "step": 41462 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789181192308404e-05, + "loss": 1.2069, + "step": 41463 + }, + { + "epoch": 0.04, + "learning_rate": 4.978917101294326e-05, + "loss": 0.8621, + "step": 41464 + }, + { + "epoch": 0.04, + "learning_rate": 4.978916083333342e-05, + "loss": 0.9328, + "step": 41465 + }, + { + "epoch": 0.04, + "learning_rate": 4.978915065347887e-05, + "loss": 1.1159, + "step": 41466 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789140473379606e-05, + "loss": 0.8249, + "step": 41467 + }, + { + "epoch": 0.04, + "learning_rate": 4.978913029303564e-05, + "loss": 1.0736, + "step": 41468 + }, + { + "epoch": 0.04, + "learning_rate": 4.978912011244695e-05, + "loss": 1.0249, + "step": 41469 + }, + { + "epoch": 0.04, + "learning_rate": 4.978910993161356e-05, + "loss": 1.014, + "step": 41470 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789099750535476e-05, + "loss": 1.1359, + "step": 41471 + }, + { + "epoch": 0.04, + "learning_rate": 4.978908956921268e-05, + "loss": 1.1373, + "step": 41472 + }, + { + "epoch": 0.04, + "learning_rate": 4.978907938764517e-05, + "loss": 1.0936, + "step": 41473 + }, + { + "epoch": 0.04, + "learning_rate": 4.978906920583295e-05, + "loss": 1.1841, + "step": 41474 + }, + { + "epoch": 0.04, + "learning_rate": 4.978905902377603e-05, + "loss": 1.3778, + "step": 41475 + }, + { + "epoch": 0.04, + "learning_rate": 4.97890488414744e-05, + "loss": 0.9585, + "step": 41476 + }, + { + "epoch": 0.04, + "learning_rate": 4.9789038658928056e-05, + "loss": 1.0257, + "step": 41477 + }, + { + "epoch": 0.04, + "learning_rate": 4.978902847613701e-05, + "loss": 1.0103, + "step": 41478 + }, + { + "epoch": 0.04, + "learning_rate": 4.978901829310127e-05, + "loss": 1.0996, + "step": 41479 + }, + { + "epoch": 0.04, + "learning_rate": 4.97890081098208e-05, + "loss": 0.7431, + "step": 41480 + }, + { + "epoch": 0.04, + "learning_rate": 4.978899792629565e-05, + "loss": 1.0957, + "step": 41481 + }, + { + "epoch": 0.04, + "learning_rate": 4.978898774252577e-05, + "loss": 0.9144, + "step": 41482 + }, + { + "epoch": 0.04, + "learning_rate": 4.978897755851119e-05, + "loss": 1.0449, + "step": 41483 + }, + { + "epoch": 0.04, + "learning_rate": 4.97889673742519e-05, + "loss": 0.5993, + "step": 41484 + }, + { + "epoch": 0.04, + "learning_rate": 4.978895718974792e-05, + "loss": 0.3246, + "step": 41485 + }, + { + "epoch": 0.04, + "learning_rate": 4.978894700499922e-05, + "loss": 0.9746, + "step": 41486 + }, + { + "epoch": 0.04, + "learning_rate": 4.978893682000582e-05, + "loss": 0.9469, + "step": 41487 + }, + { + "epoch": 0.04, + "learning_rate": 4.978892663476771e-05, + "loss": 1.1226, + "step": 41488 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788916449284894e-05, + "loss": 1.1667, + "step": 41489 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788906263557375e-05, + "loss": 0.599, + "step": 41490 + }, + { + "epoch": 0.04, + "learning_rate": 4.978889607758515e-05, + "loss": 0.6699, + "step": 41491 + }, + { + "epoch": 0.04, + "learning_rate": 4.978888589136822e-05, + "loss": 1.0742, + "step": 41492 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788875704906584e-05, + "loss": 0.961, + "step": 41493 + }, + { + "epoch": 0.04, + "learning_rate": 4.978886551820025e-05, + "loss": 0.8797, + "step": 41494 + }, + { + "epoch": 0.04, + "learning_rate": 4.97888553312492e-05, + "loss": 0.8878, + "step": 41495 + }, + { + "epoch": 0.04, + "learning_rate": 4.978884514405344e-05, + "loss": 0.698, + "step": 41496 + }, + { + "epoch": 0.04, + "learning_rate": 4.978883495661298e-05, + "loss": 1.0637, + "step": 41497 + }, + { + "epoch": 0.04, + "learning_rate": 4.978882476892782e-05, + "loss": 0.9255, + "step": 41498 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788814580997956e-05, + "loss": 0.8825, + "step": 41499 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788804392823385e-05, + "loss": 1.2729, + "step": 41500 + }, + { + "epoch": 0.04, + "eval_loss": 1.0742453336715698, + "eval_runtime": 126.3752, + "eval_samples_per_second": 10.959, + "eval_steps_per_second": 5.484, + "step": 41500 + }, + { + "epoch": 0.04, + "learning_rate": 4.978879420440411e-05, + "loss": 1.0302, + "step": 41501 + }, + { + "epoch": 0.04, + "learning_rate": 4.978878401574013e-05, + "loss": 1.0832, + "step": 41502 + }, + { + "epoch": 0.04, + "learning_rate": 4.978877382683144e-05, + "loss": 1.1322, + "step": 41503 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788763637678053e-05, + "loss": 0.8893, + "step": 41504 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788753448279954e-05, + "loss": 1.3078, + "step": 41505 + }, + { + "epoch": 0.04, + "learning_rate": 4.978874325863716e-05, + "loss": 1.0762, + "step": 41506 + }, + { + "epoch": 0.04, + "learning_rate": 4.978873306874966e-05, + "loss": 1.2672, + "step": 41507 + }, + { + "epoch": 0.04, + "learning_rate": 4.978872287861746e-05, + "loss": 1.6134, + "step": 41508 + }, + { + "epoch": 0.04, + "learning_rate": 4.978871268824055e-05, + "loss": 0.7576, + "step": 41509 + }, + { + "epoch": 0.04, + "learning_rate": 4.978870249761893e-05, + "loss": 0.9819, + "step": 41510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788692306752614e-05, + "loss": 1.1491, + "step": 41511 + }, + { + "epoch": 0.04, + "learning_rate": 4.97886821156416e-05, + "loss": 1.341, + "step": 41512 + }, + { + "epoch": 0.04, + "learning_rate": 4.978867192428587e-05, + "loss": 1.2528, + "step": 41513 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788661732685444e-05, + "loss": 1.2909, + "step": 41514 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788651540840314e-05, + "loss": 1.2261, + "step": 41515 + }, + { + "epoch": 0.04, + "learning_rate": 4.978864134875047e-05, + "loss": 1.0531, + "step": 41516 + }, + { + "epoch": 0.04, + "learning_rate": 4.978863115641594e-05, + "loss": 0.9123, + "step": 41517 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788620963836705e-05, + "loss": 0.9992, + "step": 41518 + }, + { + "epoch": 0.04, + "learning_rate": 4.978861077101276e-05, + "loss": 0.9787, + "step": 41519 + }, + { + "epoch": 0.04, + "learning_rate": 4.978860057794411e-05, + "loss": 0.9754, + "step": 41520 + }, + { + "epoch": 0.04, + "learning_rate": 4.978859038463076e-05, + "loss": 0.9675, + "step": 41521 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788580191072715e-05, + "loss": 1.2194, + "step": 41522 + }, + { + "epoch": 0.04, + "learning_rate": 4.978856999726996e-05, + "loss": 1.2863, + "step": 41523 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788559803222513e-05, + "loss": 0.8826, + "step": 41524 + }, + { + "epoch": 0.04, + "learning_rate": 4.978854960893035e-05, + "loss": 0.8453, + "step": 41525 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788539414393495e-05, + "loss": 0.9033, + "step": 41526 + }, + { + "epoch": 0.04, + "learning_rate": 4.978852921961193e-05, + "loss": 0.903, + "step": 41527 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788519024585665e-05, + "loss": 0.9561, + "step": 41528 + }, + { + "epoch": 0.04, + "learning_rate": 4.97885088293147e-05, + "loss": 1.1876, + "step": 41529 + }, + { + "epoch": 0.04, + "learning_rate": 4.978849863379904e-05, + "loss": 0.9494, + "step": 41530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788488438038664e-05, + "loss": 1.3303, + "step": 41531 + }, + { + "epoch": 0.04, + "learning_rate": 4.97884782420336e-05, + "loss": 1.518, + "step": 41532 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788468045783823e-05, + "loss": 0.9134, + "step": 41533 + }, + { + "epoch": 0.04, + "learning_rate": 4.978845784928935e-05, + "loss": 0.9738, + "step": 41534 + }, + { + "epoch": 0.04, + "learning_rate": 4.978844765255018e-05, + "loss": 0.9411, + "step": 41535 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788437455566303e-05, + "loss": 0.9152, + "step": 41536 + }, + { + "epoch": 0.04, + "learning_rate": 4.978842725833773e-05, + "loss": 0.985, + "step": 41537 + }, + { + "epoch": 0.04, + "learning_rate": 4.978841706086445e-05, + "loss": 0.907, + "step": 41538 + }, + { + "epoch": 0.04, + "learning_rate": 4.978840686314648e-05, + "loss": 1.1376, + "step": 41539 + }, + { + "epoch": 0.04, + "learning_rate": 4.97883966651838e-05, + "loss": 0.7565, + "step": 41540 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788386466976414e-05, + "loss": 0.8465, + "step": 41541 + }, + { + "epoch": 0.04, + "learning_rate": 4.978837626852434e-05, + "loss": 0.8682, + "step": 41542 + }, + { + "epoch": 0.04, + "learning_rate": 4.978836606982755e-05, + "loss": 1.0209, + "step": 41543 + }, + { + "epoch": 0.04, + "learning_rate": 4.978835587088607e-05, + "loss": 1.045, + "step": 41544 + }, + { + "epoch": 0.04, + "learning_rate": 4.97883456716999e-05, + "loss": 0.9994, + "step": 41545 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788335472269015e-05, + "loss": 1.0741, + "step": 41546 + }, + { + "epoch": 0.04, + "learning_rate": 4.978832527259343e-05, + "loss": 0.9521, + "step": 41547 + }, + { + "epoch": 0.04, + "learning_rate": 4.978831507267315e-05, + "loss": 0.9276, + "step": 41548 + }, + { + "epoch": 0.04, + "learning_rate": 4.978830487250817e-05, + "loss": 0.9582, + "step": 41549 + }, + { + "epoch": 0.04, + "learning_rate": 4.978829467209849e-05, + "loss": 0.8807, + "step": 41550 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788284471444114e-05, + "loss": 0.8463, + "step": 41551 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788274270545035e-05, + "loss": 1.0248, + "step": 41552 + }, + { + "epoch": 0.04, + "learning_rate": 4.978826406940126e-05, + "loss": 0.6777, + "step": 41553 + }, + { + "epoch": 0.04, + "learning_rate": 4.978825386801278e-05, + "loss": 0.7962, + "step": 41554 + }, + { + "epoch": 0.04, + "learning_rate": 4.97882436663796e-05, + "loss": 0.7336, + "step": 41555 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788233464501724e-05, + "loss": 0.7592, + "step": 41556 + }, + { + "epoch": 0.04, + "learning_rate": 4.978822326237915e-05, + "loss": 1.2757, + "step": 41557 + }, + { + "epoch": 0.04, + "learning_rate": 4.978821306001188e-05, + "loss": 0.898, + "step": 41558 + }, + { + "epoch": 0.04, + "learning_rate": 4.97882028573999e-05, + "loss": 1.2395, + "step": 41559 + }, + { + "epoch": 0.04, + "learning_rate": 4.978819265454323e-05, + "loss": 1.1107, + "step": 41560 + }, + { + "epoch": 0.04, + "learning_rate": 4.978818245144186e-05, + "loss": 1.301, + "step": 41561 + }, + { + "epoch": 0.04, + "learning_rate": 4.97881722480958e-05, + "loss": 1.1411, + "step": 41562 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788162044505023e-05, + "loss": 1.0909, + "step": 41563 + }, + { + "epoch": 0.04, + "learning_rate": 4.978815184066956e-05, + "loss": 0.8742, + "step": 41564 + }, + { + "epoch": 0.04, + "learning_rate": 4.97881416365894e-05, + "loss": 1.2157, + "step": 41565 + }, + { + "epoch": 0.04, + "learning_rate": 4.978813143226454e-05, + "loss": 0.9579, + "step": 41566 + }, + { + "epoch": 0.04, + "learning_rate": 4.978812122769498e-05, + "loss": 0.862, + "step": 41567 + }, + { + "epoch": 0.04, + "learning_rate": 4.978811102288073e-05, + "loss": 0.9763, + "step": 41568 + }, + { + "epoch": 0.04, + "learning_rate": 4.978810081782177e-05, + "loss": 1.0116, + "step": 41569 + }, + { + "epoch": 0.04, + "learning_rate": 4.978809061251811e-05, + "loss": 0.9979, + "step": 41570 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788080406969763e-05, + "loss": 1.0784, + "step": 41571 + }, + { + "epoch": 0.04, + "learning_rate": 4.978807020117672e-05, + "loss": 0.9571, + "step": 41572 + }, + { + "epoch": 0.04, + "learning_rate": 4.978805999513897e-05, + "loss": 1.0027, + "step": 41573 + }, + { + "epoch": 0.04, + "learning_rate": 4.978804978885653e-05, + "loss": 1.0967, + "step": 41574 + }, + { + "epoch": 0.04, + "learning_rate": 4.978803958232939e-05, + "loss": 0.9216, + "step": 41575 + }, + { + "epoch": 0.04, + "learning_rate": 4.978802937555755e-05, + "loss": 0.9396, + "step": 41576 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788019168541014e-05, + "loss": 1.5417, + "step": 41577 + }, + { + "epoch": 0.04, + "learning_rate": 4.978800896127979e-05, + "loss": 1.7133, + "step": 41578 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787998753773854e-05, + "loss": 0.8149, + "step": 41579 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787988546023234e-05, + "loss": 3.3005, + "step": 41580 + }, + { + "epoch": 0.04, + "learning_rate": 4.978797833802791e-05, + "loss": 5.9322, + "step": 41581 + }, + { + "epoch": 0.04, + "learning_rate": 4.97879681297879e-05, + "loss": 5.8657, + "step": 41582 + }, + { + "epoch": 0.04, + "learning_rate": 4.978795792130319e-05, + "loss": 5.8149, + "step": 41583 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787947712573765e-05, + "loss": 5.8186, + "step": 41584 + }, + { + "epoch": 0.04, + "learning_rate": 4.978793750359967e-05, + "loss": 5.8094, + "step": 41585 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787927294380865e-05, + "loss": 5.9047, + "step": 41586 + }, + { + "epoch": 0.04, + "learning_rate": 4.978791708491737e-05, + "loss": 5.7411, + "step": 41587 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787906875209174e-05, + "loss": 3.0991, + "step": 41588 + }, + { + "epoch": 0.04, + "learning_rate": 4.978789666525628e-05, + "loss": 1.0054, + "step": 41589 + }, + { + "epoch": 0.04, + "learning_rate": 4.97878864550587e-05, + "loss": 0.8912, + "step": 41590 + }, + { + "epoch": 0.04, + "learning_rate": 4.978787624461642e-05, + "loss": 1.2098, + "step": 41591 + }, + { + "epoch": 0.04, + "learning_rate": 4.978786603392944e-05, + "loss": 0.9231, + "step": 41592 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787855822997766e-05, + "loss": 0.9341, + "step": 41593 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787845611821406e-05, + "loss": 1.0772, + "step": 41594 + }, + { + "epoch": 0.04, + "learning_rate": 4.978783540040034e-05, + "loss": 1.1013, + "step": 41595 + }, + { + "epoch": 0.04, + "learning_rate": 4.978782518873458e-05, + "loss": 1.5413, + "step": 41596 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787814976824134e-05, + "loss": 0.8394, + "step": 41597 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787804764668984e-05, + "loss": 1.0263, + "step": 41598 + }, + { + "epoch": 0.04, + "learning_rate": 4.978779455226914e-05, + "loss": 1.0331, + "step": 41599 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787784339624603e-05, + "loss": 1.0185, + "step": 41600 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787774126735374e-05, + "loss": 1.1961, + "step": 41601 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787763913601446e-05, + "loss": 1.1708, + "step": 41602 + }, + { + "epoch": 0.04, + "learning_rate": 4.978775370022283e-05, + "loss": 1.1274, + "step": 41603 + }, + { + "epoch": 0.04, + "learning_rate": 4.978774348659951e-05, + "loss": 1.0332, + "step": 41604 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787733272731505e-05, + "loss": 1.0903, + "step": 41605 + }, + { + "epoch": 0.04, + "learning_rate": 4.97877230586188e-05, + "loss": 0.6382, + "step": 41606 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787712844261405e-05, + "loss": 0.5962, + "step": 41607 + }, + { + "epoch": 0.04, + "learning_rate": 4.978770262965932e-05, + "loss": 0.5539, + "step": 41608 + }, + { + "epoch": 0.04, + "learning_rate": 4.978769241481253e-05, + "loss": 0.7947, + "step": 41609 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787682199721053e-05, + "loss": 1.1345, + "step": 41610 + }, + { + "epoch": 0.04, + "learning_rate": 4.978767198438488e-05, + "loss": 1.1788, + "step": 41611 + }, + { + "epoch": 0.04, + "learning_rate": 4.978766176880402e-05, + "loss": 1.0451, + "step": 41612 + }, + { + "epoch": 0.04, + "learning_rate": 4.978765155297846e-05, + "loss": 0.9822, + "step": 41613 + }, + { + "epoch": 0.04, + "learning_rate": 4.978764133690821e-05, + "loss": 0.8985, + "step": 41614 + }, + { + "epoch": 0.04, + "learning_rate": 4.978763112059326e-05, + "loss": 1.2806, + "step": 41615 + }, + { + "epoch": 0.04, + "learning_rate": 4.978762090403363e-05, + "loss": 0.9431, + "step": 41616 + }, + { + "epoch": 0.04, + "learning_rate": 4.97876106872293e-05, + "loss": 0.8795, + "step": 41617 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787600470180275e-05, + "loss": 0.9483, + "step": 41618 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787590252886555e-05, + "loss": 1.133, + "step": 41619 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787580035348145e-05, + "loss": 1.1109, + "step": 41620 + }, + { + "epoch": 0.04, + "learning_rate": 4.978756981756505e-05, + "loss": 1.044, + "step": 41621 + }, + { + "epoch": 0.04, + "learning_rate": 4.978755959953725e-05, + "loss": 1.0277, + "step": 41622 + }, + { + "epoch": 0.04, + "learning_rate": 4.978754938126477e-05, + "loss": 1.2206, + "step": 41623 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787539162747586e-05, + "loss": 0.7238, + "step": 41624 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787528943985715e-05, + "loss": 1.0849, + "step": 41625 + }, + { + "epoch": 0.04, + "learning_rate": 4.978751872497915e-05, + "loss": 0.9558, + "step": 41626 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787508505727905e-05, + "loss": 1.1675, + "step": 41627 + }, + { + "epoch": 0.04, + "learning_rate": 4.978749828623196e-05, + "loss": 1.5187, + "step": 41628 + }, + { + "epoch": 0.04, + "learning_rate": 4.978748806649132e-05, + "loss": 1.317, + "step": 41629 + }, + { + "epoch": 0.04, + "learning_rate": 4.978747784650599e-05, + "loss": 0.9134, + "step": 41630 + }, + { + "epoch": 0.04, + "learning_rate": 4.978746762627597e-05, + "loss": 1.1495, + "step": 41631 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787457405801254e-05, + "loss": 1.1674, + "step": 41632 + }, + { + "epoch": 0.04, + "learning_rate": 4.978744718508185e-05, + "loss": 1.1557, + "step": 41633 + }, + { + "epoch": 0.04, + "learning_rate": 4.978743696411776e-05, + "loss": 0.72, + "step": 41634 + }, + { + "epoch": 0.04, + "learning_rate": 4.978742674290897e-05, + "loss": 0.6327, + "step": 41635 + }, + { + "epoch": 0.04, + "learning_rate": 4.978741652145549e-05, + "loss": 0.9368, + "step": 41636 + }, + { + "epoch": 0.04, + "learning_rate": 4.978740629975732e-05, + "loss": 1.2403, + "step": 41637 + }, + { + "epoch": 0.04, + "learning_rate": 4.978739607781446e-05, + "loss": 0.8422, + "step": 41638 + }, + { + "epoch": 0.04, + "learning_rate": 4.978738585562692e-05, + "loss": 0.8623, + "step": 41639 + }, + { + "epoch": 0.04, + "learning_rate": 4.978737563319468e-05, + "loss": 0.8883, + "step": 41640 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787365410517745e-05, + "loss": 0.8078, + "step": 41641 + }, + { + "epoch": 0.04, + "learning_rate": 4.978735518759612e-05, + "loss": 0.9402, + "step": 41642 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787344964429814e-05, + "loss": 0.747, + "step": 41643 + }, + { + "epoch": 0.04, + "learning_rate": 4.978733474101881e-05, + "loss": 0.5809, + "step": 41644 + }, + { + "epoch": 0.04, + "learning_rate": 4.978732451736312e-05, + "loss": 1.0436, + "step": 41645 + }, + { + "epoch": 0.04, + "learning_rate": 4.978731429346274e-05, + "loss": 0.903, + "step": 41646 + }, + { + "epoch": 0.04, + "learning_rate": 4.978730406931766e-05, + "loss": 0.8859, + "step": 41647 + }, + { + "epoch": 0.04, + "learning_rate": 4.97872938449279e-05, + "loss": 1.198, + "step": 41648 + }, + { + "epoch": 0.04, + "learning_rate": 4.978728362029345e-05, + "loss": 0.9098, + "step": 41649 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787273395414315e-05, + "loss": 0.3261, + "step": 41650 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787263170290474e-05, + "loss": 0.6021, + "step": 41651 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787252944921955e-05, + "loss": 0.9895, + "step": 41652 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787242719308746e-05, + "loss": 1.0333, + "step": 41653 + }, + { + "epoch": 0.04, + "learning_rate": 4.978723249345085e-05, + "loss": 1.0224, + "step": 41654 + }, + { + "epoch": 0.04, + "learning_rate": 4.978722226734827e-05, + "loss": 1.1011, + "step": 41655 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787212041000985e-05, + "loss": 0.9502, + "step": 41656 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787201814409025e-05, + "loss": 1.0286, + "step": 41657 + }, + { + "epoch": 0.04, + "learning_rate": 4.978719158757237e-05, + "loss": 1.3429, + "step": 41658 + }, + { + "epoch": 0.04, + "learning_rate": 4.978718136049102e-05, + "loss": 0.798, + "step": 41659 + }, + { + "epoch": 0.04, + "learning_rate": 4.978717113316499e-05, + "loss": 0.7813, + "step": 41660 + }, + { + "epoch": 0.04, + "learning_rate": 4.978716090559427e-05, + "loss": 1.1063, + "step": 41661 + }, + { + "epoch": 0.04, + "learning_rate": 4.978715067777886e-05, + "loss": 0.8652, + "step": 41662 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787140449718766e-05, + "loss": 0.8233, + "step": 41663 + }, + { + "epoch": 0.04, + "learning_rate": 4.978713022141398e-05, + "loss": 0.6085, + "step": 41664 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787119992864504e-05, + "loss": 0.9054, + "step": 41665 + }, + { + "epoch": 0.04, + "learning_rate": 4.978710976407035e-05, + "loss": 0.7774, + "step": 41666 + }, + { + "epoch": 0.04, + "learning_rate": 4.97870995350315e-05, + "loss": 1.2068, + "step": 41667 + }, + { + "epoch": 0.04, + "learning_rate": 4.978708930574796e-05, + "loss": 1.0268, + "step": 41668 + }, + { + "epoch": 0.04, + "learning_rate": 4.978707907621974e-05, + "loss": 1.4449, + "step": 41669 + }, + { + "epoch": 0.04, + "learning_rate": 4.978706884644683e-05, + "loss": 1.0677, + "step": 41670 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787058616429225e-05, + "loss": 0.9488, + "step": 41671 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787048386166936e-05, + "loss": 1.0836, + "step": 41672 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787038155659956e-05, + "loss": 1.234, + "step": 41673 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787027924908306e-05, + "loss": 0.8942, + "step": 41674 + }, + { + "epoch": 0.04, + "learning_rate": 4.978701769391195e-05, + "loss": 0.9727, + "step": 41675 + }, + { + "epoch": 0.04, + "learning_rate": 4.978700746267091e-05, + "loss": 1.0277, + "step": 41676 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786997231185196e-05, + "loss": 1.3238, + "step": 41677 + }, + { + "epoch": 0.04, + "learning_rate": 4.978698699945479e-05, + "loss": 1.1946, + "step": 41678 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786976767479685e-05, + "loss": 1.3559, + "step": 41679 + }, + { + "epoch": 0.04, + "learning_rate": 4.97869665352599e-05, + "loss": 1.1756, + "step": 41680 + }, + { + "epoch": 0.04, + "learning_rate": 4.978695630279544e-05, + "loss": 0.6372, + "step": 41681 + }, + { + "epoch": 0.04, + "learning_rate": 4.978694607008628e-05, + "loss": 1.1993, + "step": 41682 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786935837132445e-05, + "loss": 0.9268, + "step": 41683 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786925603933906e-05, + "loss": 0.8175, + "step": 41684 + }, + { + "epoch": 0.04, + "learning_rate": 4.97869153704907e-05, + "loss": 0.8922, + "step": 41685 + }, + { + "epoch": 0.04, + "learning_rate": 4.97869051368028e-05, + "loss": 1.3419, + "step": 41686 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786894902870206e-05, + "loss": 1.4617, + "step": 41687 + }, + { + "epoch": 0.04, + "learning_rate": 4.978688466869294e-05, + "loss": 1.1141, + "step": 41688 + }, + { + "epoch": 0.04, + "learning_rate": 4.978687443427098e-05, + "loss": 0.841, + "step": 41689 + }, + { + "epoch": 0.04, + "learning_rate": 4.978686419960434e-05, + "loss": 0.9871, + "step": 41690 + }, + { + "epoch": 0.04, + "learning_rate": 4.978685396469301e-05, + "loss": 1.1518, + "step": 41691 + }, + { + "epoch": 0.04, + "learning_rate": 4.978684372953699e-05, + "loss": 1.0024, + "step": 41692 + }, + { + "epoch": 0.04, + "learning_rate": 4.97868334941363e-05, + "loss": 0.6559, + "step": 41693 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786823258490913e-05, + "loss": 1.1016, + "step": 41694 + }, + { + "epoch": 0.04, + "learning_rate": 4.978681302260084e-05, + "loss": 0.8793, + "step": 41695 + }, + { + "epoch": 0.04, + "learning_rate": 4.978680278646609e-05, + "loss": 1.4717, + "step": 41696 + }, + { + "epoch": 0.04, + "learning_rate": 4.978679255008665e-05, + "loss": 0.7806, + "step": 41697 + }, + { + "epoch": 0.04, + "learning_rate": 4.978678231346252e-05, + "loss": 1.0197, + "step": 41698 + }, + { + "epoch": 0.04, + "learning_rate": 4.978677207659371e-05, + "loss": 1.1762, + "step": 41699 + }, + { + "epoch": 0.04, + "learning_rate": 4.978676183948022e-05, + "loss": 0.7341, + "step": 41700 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786751602122046e-05, + "loss": 1.0016, + "step": 41701 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786741364519187e-05, + "loss": 1.1506, + "step": 41702 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786731126671636e-05, + "loss": 1.0221, + "step": 41703 + }, + { + "epoch": 0.04, + "learning_rate": 4.978672088857941e-05, + "loss": 1.1224, + "step": 41704 + }, + { + "epoch": 0.04, + "learning_rate": 4.978671065024249e-05, + "loss": 1.1899, + "step": 41705 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786700411660895e-05, + "loss": 1.076, + "step": 41706 + }, + { + "epoch": 0.04, + "learning_rate": 4.978669017283461e-05, + "loss": 0.9113, + "step": 41707 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786679933763644e-05, + "loss": 0.8184, + "step": 41708 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786669694447996e-05, + "loss": 0.948, + "step": 41709 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786659454887664e-05, + "loss": 1.1063, + "step": 41710 + }, + { + "epoch": 0.04, + "learning_rate": 4.978664921508264e-05, + "loss": 0.9291, + "step": 41711 + }, + { + "epoch": 0.04, + "learning_rate": 4.978663897503294e-05, + "loss": 1.0873, + "step": 41712 + }, + { + "epoch": 0.04, + "learning_rate": 4.978662873473856e-05, + "loss": 1.1122, + "step": 41713 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786618494199494e-05, + "loss": 1.1485, + "step": 41714 + }, + { + "epoch": 0.04, + "learning_rate": 4.978660825341575e-05, + "loss": 0.7843, + "step": 41715 + }, + { + "epoch": 0.04, + "learning_rate": 4.978659801238731e-05, + "loss": 0.5159, + "step": 41716 + }, + { + "epoch": 0.04, + "learning_rate": 4.97865877711142e-05, + "loss": 1.1093, + "step": 41717 + }, + { + "epoch": 0.04, + "learning_rate": 4.97865775295964e-05, + "loss": 0.8315, + "step": 41718 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786567287833917e-05, + "loss": 0.7692, + "step": 41719 + }, + { + "epoch": 0.04, + "learning_rate": 4.978655704582676e-05, + "loss": 0.9052, + "step": 41720 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786546803574906e-05, + "loss": 0.8731, + "step": 41721 + }, + { + "epoch": 0.04, + "learning_rate": 4.978653656107838e-05, + "loss": 0.9449, + "step": 41722 + }, + { + "epoch": 0.04, + "learning_rate": 4.978652631833717e-05, + "loss": 0.8601, + "step": 41723 + }, + { + "epoch": 0.04, + "learning_rate": 4.978651607535128e-05, + "loss": 0.7358, + "step": 41724 + }, + { + "epoch": 0.04, + "learning_rate": 4.97865058321207e-05, + "loss": 0.7117, + "step": 41725 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786495588645447e-05, + "loss": 1.029, + "step": 41726 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786485344925505e-05, + "loss": 0.7824, + "step": 41727 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786475100960886e-05, + "loss": 1.0328, + "step": 41728 + }, + { + "epoch": 0.04, + "learning_rate": 4.978646485675158e-05, + "loss": 0.9886, + "step": 41729 + }, + { + "epoch": 0.04, + "learning_rate": 4.97864546122976e-05, + "loss": 0.9992, + "step": 41730 + }, + { + "epoch": 0.04, + "learning_rate": 4.978644436759894e-05, + "loss": 0.849, + "step": 41731 + }, + { + "epoch": 0.04, + "learning_rate": 4.978643412265559e-05, + "loss": 1.0968, + "step": 41732 + }, + { + "epoch": 0.04, + "learning_rate": 4.978642387746756e-05, + "loss": 1.1532, + "step": 41733 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786413632034854e-05, + "loss": 0.857, + "step": 41734 + }, + { + "epoch": 0.04, + "learning_rate": 4.978640338635746e-05, + "loss": 1.1479, + "step": 41735 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786393140435395e-05, + "loss": 1.0138, + "step": 41736 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786382894268647e-05, + "loss": 1.2694, + "step": 41737 + }, + { + "epoch": 0.04, + "learning_rate": 4.978637264785721e-05, + "loss": 1.463, + "step": 41738 + }, + { + "epoch": 0.04, + "learning_rate": 4.97863624012011e-05, + "loss": 0.9457, + "step": 41739 + }, + { + "epoch": 0.04, + "learning_rate": 4.978635215430031e-05, + "loss": 1.1236, + "step": 41740 + }, + { + "epoch": 0.04, + "learning_rate": 4.978634190715483e-05, + "loss": 1.4244, + "step": 41741 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786331659764675e-05, + "loss": 1.1833, + "step": 41742 + }, + { + "epoch": 0.04, + "learning_rate": 4.978632141212984e-05, + "loss": 1.0467, + "step": 41743 + }, + { + "epoch": 0.04, + "learning_rate": 4.978631116425033e-05, + "loss": 1.3316, + "step": 41744 + }, + { + "epoch": 0.04, + "learning_rate": 4.978630091612614e-05, + "loss": 1.2116, + "step": 41745 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786290667757266e-05, + "loss": 0.755, + "step": 41746 + }, + { + "epoch": 0.04, + "learning_rate": 4.97862804191437e-05, + "loss": 0.995, + "step": 41747 + }, + { + "epoch": 0.04, + "learning_rate": 4.978627017028548e-05, + "loss": 1.0632, + "step": 41748 + }, + { + "epoch": 0.04, + "learning_rate": 4.978625992118256e-05, + "loss": 0.7551, + "step": 41749 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786249671834965e-05, + "loss": 0.9347, + "step": 41750 + }, + { + "epoch": 0.04, + "learning_rate": 4.97862394222427e-05, + "loss": 1.4451, + "step": 41751 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786229172405744e-05, + "loss": 1.3419, + "step": 41752 + }, + { + "epoch": 0.04, + "learning_rate": 4.978621892232411e-05, + "loss": 1.0109, + "step": 41753 + }, + { + "epoch": 0.04, + "learning_rate": 4.97862086719978e-05, + "loss": 1.0336, + "step": 41754 + }, + { + "epoch": 0.04, + "learning_rate": 4.978619842142681e-05, + "loss": 1.2453, + "step": 41755 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786188170611147e-05, + "loss": 1.0519, + "step": 41756 + }, + { + "epoch": 0.04, + "learning_rate": 4.97861779195508e-05, + "loss": 0.777, + "step": 41757 + }, + { + "epoch": 0.04, + "learning_rate": 4.978616766824578e-05, + "loss": 1.0783, + "step": 41758 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786157416696066e-05, + "loss": 0.9003, + "step": 41759 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786147164901685e-05, + "loss": 1.135, + "step": 41760 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786136912862626e-05, + "loss": 0.9451, + "step": 41761 + }, + { + "epoch": 0.04, + "learning_rate": 4.978612666057888e-05, + "loss": 1.0367, + "step": 41762 + }, + { + "epoch": 0.04, + "learning_rate": 4.978611640805047e-05, + "loss": 1.1601, + "step": 41763 + }, + { + "epoch": 0.04, + "learning_rate": 4.978610615527737e-05, + "loss": 1.0165, + "step": 41764 + }, + { + "epoch": 0.04, + "learning_rate": 4.97860959022596e-05, + "loss": 1.0432, + "step": 41765 + }, + { + "epoch": 0.04, + "learning_rate": 4.978608564899715e-05, + "loss": 0.7482, + "step": 41766 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786075395490016e-05, + "loss": 1.0208, + "step": 41767 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786065141738216e-05, + "loss": 1.1923, + "step": 41768 + }, + { + "epoch": 0.04, + "learning_rate": 4.978605488774173e-05, + "loss": 1.1727, + "step": 41769 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786044633500564e-05, + "loss": 0.69, + "step": 41770 + }, + { + "epoch": 0.04, + "learning_rate": 4.978603437901473e-05, + "loss": 1.0808, + "step": 41771 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786024124284216e-05, + "loss": 1.2684, + "step": 41772 + }, + { + "epoch": 0.04, + "learning_rate": 4.9786013869309016e-05, + "loss": 0.8209, + "step": 41773 + }, + { + "epoch": 0.04, + "learning_rate": 4.978600361408915e-05, + "loss": 1.1877, + "step": 41774 + }, + { + "epoch": 0.04, + "learning_rate": 4.97859933586246e-05, + "loss": 1.204, + "step": 41775 + }, + { + "epoch": 0.04, + "learning_rate": 4.978598310291537e-05, + "loss": 1.0205, + "step": 41776 + }, + { + "epoch": 0.04, + "learning_rate": 4.978597284696147e-05, + "loss": 0.5764, + "step": 41777 + }, + { + "epoch": 0.04, + "learning_rate": 4.978596259076289e-05, + "loss": 1.1456, + "step": 41778 + }, + { + "epoch": 0.04, + "learning_rate": 4.978595233431964e-05, + "loss": 0.8904, + "step": 41779 + }, + { + "epoch": 0.04, + "learning_rate": 4.978594207763171e-05, + "loss": 1.1218, + "step": 41780 + }, + { + "epoch": 0.04, + "learning_rate": 4.97859318206991e-05, + "loss": 0.7835, + "step": 41781 + }, + { + "epoch": 0.04, + "learning_rate": 4.978592156352182e-05, + "loss": 0.8894, + "step": 41782 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785911306099856e-05, + "loss": 0.989, + "step": 41783 + }, + { + "epoch": 0.04, + "learning_rate": 4.978590104843322e-05, + "loss": 1.1678, + "step": 41784 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785890790521904e-05, + "loss": 1.049, + "step": 41785 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785880532365926e-05, + "loss": 0.8575, + "step": 41786 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785870273965256e-05, + "loss": 1.1115, + "step": 41787 + }, + { + "epoch": 0.04, + "learning_rate": 4.978586001531992e-05, + "loss": 1.0346, + "step": 41788 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785849756429906e-05, + "loss": 0.8551, + "step": 41789 + }, + { + "epoch": 0.04, + "learning_rate": 4.978583949729521e-05, + "loss": 0.8041, + "step": 41790 + }, + { + "epoch": 0.04, + "learning_rate": 4.978582923791585e-05, + "loss": 0.9785, + "step": 41791 + }, + { + "epoch": 0.04, + "learning_rate": 4.978581897829181e-05, + "loss": 1.4908, + "step": 41792 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785808718423085e-05, + "loss": 1.263, + "step": 41793 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785798458309695e-05, + "loss": 1.2248, + "step": 41794 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785788197951634e-05, + "loss": 0.9106, + "step": 41795 + }, + { + "epoch": 0.04, + "learning_rate": 4.978577793734889e-05, + "loss": 0.7043, + "step": 41796 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785767676501474e-05, + "loss": 0.79, + "step": 41797 + }, + { + "epoch": 0.04, + "learning_rate": 4.978575741540938e-05, + "loss": 0.706, + "step": 41798 + }, + { + "epoch": 0.04, + "learning_rate": 4.978574715407262e-05, + "loss": 0.6587, + "step": 41799 + }, + { + "epoch": 0.04, + "learning_rate": 4.978573689249118e-05, + "loss": 0.7197, + "step": 41800 + }, + { + "epoch": 0.04, + "learning_rate": 4.978572663066507e-05, + "loss": 0.625, + "step": 41801 + }, + { + "epoch": 0.04, + "learning_rate": 4.978571636859428e-05, + "loss": 0.8843, + "step": 41802 + }, + { + "epoch": 0.04, + "learning_rate": 4.978570610627882e-05, + "loss": 1.038, + "step": 41803 + }, + { + "epoch": 0.04, + "learning_rate": 4.978569584371868e-05, + "loss": 1.4417, + "step": 41804 + }, + { + "epoch": 0.04, + "learning_rate": 4.978568558091386e-05, + "loss": 0.668, + "step": 41805 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785675317864385e-05, + "loss": 0.3567, + "step": 41806 + }, + { + "epoch": 0.04, + "learning_rate": 4.978566505457023e-05, + "loss": 1.2339, + "step": 41807 + }, + { + "epoch": 0.04, + "learning_rate": 4.97856547910314e-05, + "loss": 1.247, + "step": 41808 + }, + { + "epoch": 0.04, + "learning_rate": 4.978564452724789e-05, + "loss": 1.078, + "step": 41809 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785634263219716e-05, + "loss": 1.0129, + "step": 41810 + }, + { + "epoch": 0.04, + "learning_rate": 4.978562399894686e-05, + "loss": 0.9433, + "step": 41811 + }, + { + "epoch": 0.04, + "learning_rate": 4.978561373442934e-05, + "loss": 0.7192, + "step": 41812 + }, + { + "epoch": 0.04, + "learning_rate": 4.978560346966714e-05, + "loss": 0.851, + "step": 41813 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785593204660264e-05, + "loss": 0.5452, + "step": 41814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785582939408725e-05, + "loss": 0.8635, + "step": 41815 + }, + { + "epoch": 0.04, + "learning_rate": 4.978557267391251e-05, + "loss": 0.9556, + "step": 41816 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785562408171614e-05, + "loss": 1.0575, + "step": 41817 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785552142186056e-05, + "loss": 1.0094, + "step": 41818 + }, + { + "epoch": 0.04, + "learning_rate": 4.978554187595582e-05, + "loss": 1.1081, + "step": 41819 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785531609480915e-05, + "loss": 0.7079, + "step": 41820 + }, + { + "epoch": 0.04, + "learning_rate": 4.978552134276133e-05, + "loss": 1.1802, + "step": 41821 + }, + { + "epoch": 0.04, + "learning_rate": 4.978551107579708e-05, + "loss": 0.9618, + "step": 41822 + }, + { + "epoch": 0.04, + "learning_rate": 4.978550080858816e-05, + "loss": 0.7795, + "step": 41823 + }, + { + "epoch": 0.04, + "learning_rate": 4.978549054113456e-05, + "loss": 0.943, + "step": 41824 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785480273436294e-05, + "loss": 1.0025, + "step": 41825 + }, + { + "epoch": 0.04, + "learning_rate": 4.978547000549335e-05, + "loss": 0.8763, + "step": 41826 + }, + { + "epoch": 0.04, + "learning_rate": 4.978545973730574e-05, + "loss": 0.6836, + "step": 41827 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785449468873455e-05, + "loss": 0.788, + "step": 41828 + }, + { + "epoch": 0.04, + "learning_rate": 4.978543920019651e-05, + "loss": 0.5872, + "step": 41829 + }, + { + "epoch": 0.04, + "learning_rate": 4.978542893127488e-05, + "loss": 0.5076, + "step": 41830 + }, + { + "epoch": 0.04, + "learning_rate": 4.978541866210859e-05, + "loss": 0.5415, + "step": 41831 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785408392697615e-05, + "loss": 0.7801, + "step": 41832 + }, + { + "epoch": 0.04, + "learning_rate": 4.978539812304197e-05, + "loss": 1.0428, + "step": 41833 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785387853141665e-05, + "loss": 1.3157, + "step": 41834 + }, + { + "epoch": 0.04, + "learning_rate": 4.978537758299669e-05, + "loss": 1.3762, + "step": 41835 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785367312607026e-05, + "loss": 0.9221, + "step": 41836 + }, + { + "epoch": 0.04, + "learning_rate": 4.97853570419727e-05, + "loss": 1.4136, + "step": 41837 + }, + { + "epoch": 0.04, + "learning_rate": 4.978534677109371e-05, + "loss": 0.8915, + "step": 41838 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785336499970045e-05, + "loss": 0.9973, + "step": 41839 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785326228601714e-05, + "loss": 0.685, + "step": 41840 + }, + { + "epoch": 0.04, + "learning_rate": 4.978531595698871e-05, + "loss": 2.5145, + "step": 41841 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785305685131035e-05, + "loss": 1.5555, + "step": 41842 + }, + { + "epoch": 0.04, + "learning_rate": 4.978529541302869e-05, + "loss": 1.5005, + "step": 41843 + }, + { + "epoch": 0.04, + "learning_rate": 4.978528514068167e-05, + "loss": 1.4096, + "step": 41844 + }, + { + "epoch": 0.04, + "learning_rate": 4.978527486808999e-05, + "loss": 1.0146, + "step": 41845 + }, + { + "epoch": 0.04, + "learning_rate": 4.978526459525363e-05, + "loss": 0.8681, + "step": 41846 + }, + { + "epoch": 0.04, + "learning_rate": 4.978525432217261e-05, + "loss": 1.1896, + "step": 41847 + }, + { + "epoch": 0.04, + "learning_rate": 4.978524404884691e-05, + "loss": 1.4484, + "step": 41848 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785233775276554e-05, + "loss": 0.9581, + "step": 41849 + }, + { + "epoch": 0.04, + "learning_rate": 4.978522350146152e-05, + "loss": 1.1346, + "step": 41850 + }, + { + "epoch": 0.04, + "learning_rate": 4.978521322740182e-05, + "loss": 0.838, + "step": 41851 + }, + { + "epoch": 0.04, + "learning_rate": 4.978520295309744e-05, + "loss": 1.1778, + "step": 41852 + }, + { + "epoch": 0.04, + "learning_rate": 4.978519267854841e-05, + "loss": 0.7333, + "step": 41853 + }, + { + "epoch": 0.04, + "learning_rate": 4.978518240375469e-05, + "loss": 1.0586, + "step": 41854 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785172128716315e-05, + "loss": 1.2492, + "step": 41855 + }, + { + "epoch": 0.04, + "learning_rate": 4.978516185343327e-05, + "loss": 1.0398, + "step": 41856 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785151577905553e-05, + "loss": 1.1311, + "step": 41857 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785141302133174e-05, + "loss": 1.0741, + "step": 41858 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785131026116116e-05, + "loss": 1.2302, + "step": 41859 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785120749854396e-05, + "loss": 0.7887, + "step": 41860 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785110473348004e-05, + "loss": 1.0376, + "step": 41861 + }, + { + "epoch": 0.04, + "learning_rate": 4.978510019659695e-05, + "loss": 0.9561, + "step": 41862 + }, + { + "epoch": 0.04, + "learning_rate": 4.978508991960122e-05, + "loss": 0.9527, + "step": 41863 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785079642360834e-05, + "loss": 1.0988, + "step": 41864 + }, + { + "epoch": 0.04, + "learning_rate": 4.978506936487577e-05, + "loss": 0.798, + "step": 41865 + }, + { + "epoch": 0.04, + "learning_rate": 4.978505908714604e-05, + "loss": 0.8391, + "step": 41866 + }, + { + "epoch": 0.04, + "learning_rate": 4.978504880917164e-05, + "loss": 1.2621, + "step": 41867 + }, + { + "epoch": 0.04, + "learning_rate": 4.978503853095258e-05, + "loss": 1.2989, + "step": 41868 + }, + { + "epoch": 0.04, + "learning_rate": 4.978502825248885e-05, + "loss": 0.9042, + "step": 41869 + }, + { + "epoch": 0.04, + "learning_rate": 4.978501797378045e-05, + "loss": 1.2476, + "step": 41870 + }, + { + "epoch": 0.04, + "learning_rate": 4.978500769482738e-05, + "loss": 1.6848, + "step": 41871 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784997415629645e-05, + "loss": 1.0286, + "step": 41872 + }, + { + "epoch": 0.04, + "learning_rate": 4.978498713618725e-05, + "loss": 0.9087, + "step": 41873 + }, + { + "epoch": 0.04, + "learning_rate": 4.978497685650017e-05, + "loss": 1.1347, + "step": 41874 + }, + { + "epoch": 0.04, + "learning_rate": 4.978496657656845e-05, + "loss": 1.3111, + "step": 41875 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784956296392046e-05, + "loss": 0.8416, + "step": 41876 + }, + { + "epoch": 0.04, + "learning_rate": 4.978494601597098e-05, + "loss": 1.0426, + "step": 41877 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784935735305237e-05, + "loss": 1.2041, + "step": 41878 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784925454394836e-05, + "loss": 1.1071, + "step": 41879 + }, + { + "epoch": 0.04, + "learning_rate": 4.978491517323978e-05, + "loss": 1.3589, + "step": 41880 + }, + { + "epoch": 0.04, + "learning_rate": 4.978490489184004e-05, + "loss": 1.1949, + "step": 41881 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784894610195646e-05, + "loss": 1.2351, + "step": 41882 + }, + { + "epoch": 0.04, + "learning_rate": 4.978488432830658e-05, + "loss": 1.1794, + "step": 41883 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784874046172844e-05, + "loss": 0.8076, + "step": 41884 + }, + { + "epoch": 0.04, + "learning_rate": 4.978486376379444e-05, + "loss": 1.2011, + "step": 41885 + }, + { + "epoch": 0.04, + "learning_rate": 4.978485348117139e-05, + "loss": 1.2103, + "step": 41886 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784843198303656e-05, + "loss": 0.7747, + "step": 41887 + }, + { + "epoch": 0.04, + "learning_rate": 4.978483291519126e-05, + "loss": 1.0283, + "step": 41888 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784822631834196e-05, + "loss": 1.0663, + "step": 41889 + }, + { + "epoch": 0.04, + "learning_rate": 4.978481234823248e-05, + "loss": 0.7727, + "step": 41890 + }, + { + "epoch": 0.04, + "learning_rate": 4.978480206438608e-05, + "loss": 0.8588, + "step": 41891 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784791780295025e-05, + "loss": 0.9858, + "step": 41892 + }, + { + "epoch": 0.04, + "learning_rate": 4.978478149595931e-05, + "loss": 0.7719, + "step": 41893 + }, + { + "epoch": 0.04, + "learning_rate": 4.978477121137892e-05, + "loss": 0.7576, + "step": 41894 + }, + { + "epoch": 0.04, + "learning_rate": 4.978476092655387e-05, + "loss": 0.7586, + "step": 41895 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784750641484154e-05, + "loss": 0.5968, + "step": 41896 + }, + { + "epoch": 0.04, + "learning_rate": 4.978474035616978e-05, + "loss": 1.722, + "step": 41897 + }, + { + "epoch": 0.04, + "learning_rate": 4.978473007061073e-05, + "loss": 1.7212, + "step": 41898 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784719784807024e-05, + "loss": 1.7314, + "step": 41899 + }, + { + "epoch": 0.04, + "learning_rate": 4.978470949875865e-05, + "loss": 1.5642, + "step": 41900 + }, + { + "epoch": 0.04, + "learning_rate": 4.978469921246562e-05, + "loss": 0.8548, + "step": 41901 + }, + { + "epoch": 0.04, + "learning_rate": 4.978468892592791e-05, + "loss": 1.0447, + "step": 41902 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784678639145554e-05, + "loss": 1.3985, + "step": 41903 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784668352118525e-05, + "loss": 1.2606, + "step": 41904 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784658064846826e-05, + "loss": 0.7309, + "step": 41905 + }, + { + "epoch": 0.04, + "learning_rate": 4.978464777733047e-05, + "loss": 1.0225, + "step": 41906 + }, + { + "epoch": 0.04, + "learning_rate": 4.978463748956945e-05, + "loss": 1.078, + "step": 41907 + }, + { + "epoch": 0.04, + "learning_rate": 4.978462720156377e-05, + "loss": 0.945, + "step": 41908 + }, + { + "epoch": 0.04, + "learning_rate": 4.978461691331342e-05, + "loss": 1.4417, + "step": 41909 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784606624818415e-05, + "loss": 0.8945, + "step": 41910 + }, + { + "epoch": 0.04, + "learning_rate": 4.978459633607874e-05, + "loss": 0.9329, + "step": 41911 + }, + { + "epoch": 0.04, + "learning_rate": 4.978458604709441e-05, + "loss": 0.9706, + "step": 41912 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784575757865405e-05, + "loss": 0.8955, + "step": 41913 + }, + { + "epoch": 0.04, + "learning_rate": 4.978456546839174e-05, + "loss": 1.1979, + "step": 41914 + }, + { + "epoch": 0.04, + "learning_rate": 4.978455517867342e-05, + "loss": 0.7929, + "step": 41915 + }, + { + "epoch": 0.04, + "learning_rate": 4.978454488871043e-05, + "loss": 1.1118, + "step": 41916 + }, + { + "epoch": 0.04, + "learning_rate": 4.978453459850277e-05, + "loss": 1.1264, + "step": 41917 + }, + { + "epoch": 0.04, + "learning_rate": 4.978452430805046e-05, + "loss": 1.0089, + "step": 41918 + }, + { + "epoch": 0.04, + "learning_rate": 4.978451401735349e-05, + "loss": 0.85, + "step": 41919 + }, + { + "epoch": 0.04, + "learning_rate": 4.978450372641185e-05, + "loss": 1.2344, + "step": 41920 + }, + { + "epoch": 0.04, + "learning_rate": 4.978449343522556e-05, + "loss": 1.2423, + "step": 41921 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784483143794594e-05, + "loss": 1.2241, + "step": 41922 + }, + { + "epoch": 0.04, + "learning_rate": 4.978447285211897e-05, + "loss": 1.1314, + "step": 41923 + }, + { + "epoch": 0.04, + "learning_rate": 4.978446256019868e-05, + "loss": 0.9375, + "step": 41924 + }, + { + "epoch": 0.04, + "learning_rate": 4.978445226803373e-05, + "loss": 1.0004, + "step": 41925 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784441975624125e-05, + "loss": 1.2785, + "step": 41926 + }, + { + "epoch": 0.04, + "learning_rate": 4.978443168296986e-05, + "loss": 0.8937, + "step": 41927 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784421390070925e-05, + "loss": 0.9424, + "step": 41928 + }, + { + "epoch": 0.04, + "learning_rate": 4.978441109692733e-05, + "loss": 1.1508, + "step": 41929 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784400803539077e-05, + "loss": 0.9061, + "step": 41930 + }, + { + "epoch": 0.04, + "learning_rate": 4.978439050990615e-05, + "loss": 0.7193, + "step": 41931 + }, + { + "epoch": 0.04, + "learning_rate": 4.978438021602858e-05, + "loss": 1.0986, + "step": 41932 + }, + { + "epoch": 0.04, + "learning_rate": 4.978436992190634e-05, + "loss": 0.9549, + "step": 41933 + }, + { + "epoch": 0.04, + "learning_rate": 4.978435962753945e-05, + "loss": 0.8348, + "step": 41934 + }, + { + "epoch": 0.04, + "learning_rate": 4.978434933292788e-05, + "loss": 0.9622, + "step": 41935 + }, + { + "epoch": 0.04, + "learning_rate": 4.978433903807166e-05, + "loss": 1.521, + "step": 41936 + }, + { + "epoch": 0.04, + "learning_rate": 4.978432874297079e-05, + "loss": 1.0772, + "step": 41937 + }, + { + "epoch": 0.04, + "learning_rate": 4.978431844762525e-05, + "loss": 1.0034, + "step": 41938 + }, + { + "epoch": 0.04, + "learning_rate": 4.978430815203504e-05, + "loss": 1.1255, + "step": 41939 + }, + { + "epoch": 0.04, + "learning_rate": 4.978429785620018e-05, + "loss": 1.0701, + "step": 41940 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784287560120657e-05, + "loss": 1.3452, + "step": 41941 + }, + { + "epoch": 0.04, + "learning_rate": 4.978427726379647e-05, + "loss": 1.0753, + "step": 41942 + }, + { + "epoch": 0.04, + "learning_rate": 4.978426696722763e-05, + "loss": 1.3343, + "step": 41943 + }, + { + "epoch": 0.04, + "learning_rate": 4.978425667041413e-05, + "loss": 1.0741, + "step": 41944 + }, + { + "epoch": 0.04, + "learning_rate": 4.978424637335597e-05, + "loss": 1.3182, + "step": 41945 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784236076053146e-05, + "loss": 1.2454, + "step": 41946 + }, + { + "epoch": 0.04, + "learning_rate": 4.978422577850567e-05, + "loss": 1.047, + "step": 41947 + }, + { + "epoch": 0.04, + "learning_rate": 4.978421548071352e-05, + "loss": 1.1208, + "step": 41948 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784205182676726e-05, + "loss": 1.0982, + "step": 41949 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784194884395266e-05, + "loss": 0.9319, + "step": 41950 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784184585869155e-05, + "loss": 1.1192, + "step": 41951 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784174287098374e-05, + "loss": 1.0591, + "step": 41952 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784163988082936e-05, + "loss": 0.5936, + "step": 41953 + }, + { + "epoch": 0.04, + "learning_rate": 4.978415368882284e-05, + "loss": 0.9876, + "step": 41954 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784143389318095e-05, + "loss": 0.881, + "step": 41955 + }, + { + "epoch": 0.04, + "learning_rate": 4.978413308956868e-05, + "loss": 1.05, + "step": 41956 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784122789574606e-05, + "loss": 1.188, + "step": 41957 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784112489335876e-05, + "loss": 1.1286, + "step": 41958 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784102188852496e-05, + "loss": 0.8866, + "step": 41959 + }, + { + "epoch": 0.04, + "learning_rate": 4.978409188812445e-05, + "loss": 1.1008, + "step": 41960 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784081587151744e-05, + "loss": 1.1196, + "step": 41961 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784071285934386e-05, + "loss": 0.9751, + "step": 41962 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784060984472365e-05, + "loss": 0.7789, + "step": 41963 + }, + { + "epoch": 0.04, + "learning_rate": 4.9784050682765686e-05, + "loss": 0.9974, + "step": 41964 + }, + { + "epoch": 0.04, + "learning_rate": 4.978404038081435e-05, + "loss": 0.8554, + "step": 41965 + }, + { + "epoch": 0.04, + "learning_rate": 4.978403007861836e-05, + "loss": 0.9328, + "step": 41966 + }, + { + "epoch": 0.04, + "learning_rate": 4.978401977617771e-05, + "loss": 1.0479, + "step": 41967 + }, + { + "epoch": 0.04, + "learning_rate": 4.97840094734924e-05, + "loss": 1.0798, + "step": 41968 + }, + { + "epoch": 0.04, + "learning_rate": 4.978399917056244e-05, + "loss": 0.8057, + "step": 41969 + }, + { + "epoch": 0.04, + "learning_rate": 4.978398886738782e-05, + "loss": 0.6598, + "step": 41970 + }, + { + "epoch": 0.04, + "learning_rate": 4.978397856396854e-05, + "loss": 0.8601, + "step": 41971 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783968260304604e-05, + "loss": 0.8795, + "step": 41972 + }, + { + "epoch": 0.04, + "learning_rate": 4.978395795639601e-05, + "loss": 1.4822, + "step": 41973 + }, + { + "epoch": 0.04, + "learning_rate": 4.978394765224276e-05, + "loss": 1.2262, + "step": 41974 + }, + { + "epoch": 0.04, + "learning_rate": 4.978393734784485e-05, + "loss": 1.0016, + "step": 41975 + }, + { + "epoch": 0.04, + "learning_rate": 4.978392704320229e-05, + "loss": 1.116, + "step": 41976 + }, + { + "epoch": 0.04, + "learning_rate": 4.978391673831507e-05, + "loss": 0.9434, + "step": 41977 + }, + { + "epoch": 0.04, + "learning_rate": 4.978390643318319e-05, + "loss": 0.8267, + "step": 41978 + }, + { + "epoch": 0.04, + "learning_rate": 4.978389612780666e-05, + "loss": 0.4269, + "step": 41979 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783885822185475e-05, + "loss": 0.6418, + "step": 41980 + }, + { + "epoch": 0.04, + "learning_rate": 4.978387551631963e-05, + "loss": 1.2706, + "step": 41981 + }, + { + "epoch": 0.04, + "learning_rate": 4.978386521020912e-05, + "loss": 1.0435, + "step": 41982 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783854903853974e-05, + "loss": 1.0921, + "step": 41983 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783844597254156e-05, + "loss": 1.1282, + "step": 41984 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783834290409694e-05, + "loss": 1.1572, + "step": 41985 + }, + { + "epoch": 0.04, + "learning_rate": 4.978382398332057e-05, + "loss": 0.9355, + "step": 41986 + }, + { + "epoch": 0.04, + "learning_rate": 4.978381367598679e-05, + "loss": 0.9485, + "step": 41987 + }, + { + "epoch": 0.04, + "learning_rate": 4.978380336840835e-05, + "loss": 0.9022, + "step": 41988 + }, + { + "epoch": 0.04, + "learning_rate": 4.978379306058527e-05, + "loss": 1.0746, + "step": 41989 + }, + { + "epoch": 0.04, + "learning_rate": 4.978378275251752e-05, + "loss": 0.9542, + "step": 41990 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783772444205124e-05, + "loss": 0.7699, + "step": 41991 + }, + { + "epoch": 0.04, + "learning_rate": 4.978376213564807e-05, + "loss": 1.0302, + "step": 41992 + }, + { + "epoch": 0.04, + "learning_rate": 4.978375182684636e-05, + "loss": 1.0871, + "step": 41993 + }, + { + "epoch": 0.04, + "learning_rate": 4.97837415178e-05, + "loss": 0.9932, + "step": 41994 + }, + { + "epoch": 0.04, + "learning_rate": 4.978373120850898e-05, + "loss": 0.9003, + "step": 41995 + }, + { + "epoch": 0.04, + "learning_rate": 4.97837208989733e-05, + "loss": 0.6993, + "step": 41996 + }, + { + "epoch": 0.04, + "learning_rate": 4.978371058919298e-05, + "loss": 0.8567, + "step": 41997 + }, + { + "epoch": 0.04, + "learning_rate": 4.978370027916799e-05, + "loss": 1.0501, + "step": 41998 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783689968898355e-05, + "loss": 1.2543, + "step": 41999 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783679658384065e-05, + "loss": 0.9401, + "step": 42000 + }, + { + "epoch": 0.04, + "eval_loss": 1.0778989791870117, + "eval_runtime": 124.7396, + "eval_samples_per_second": 11.103, + "eval_steps_per_second": 5.556, + "step": 42000 + }, + { + "epoch": 0.04, + "learning_rate": 4.978366934762512e-05, + "loss": 0.9253, + "step": 42001 + }, + { + "epoch": 0.04, + "learning_rate": 4.978365903662152e-05, + "loss": 0.9789, + "step": 42002 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783648725373266e-05, + "loss": 0.9264, + "step": 42003 + }, + { + "epoch": 0.04, + "learning_rate": 4.978363841388036e-05, + "loss": 0.954, + "step": 42004 + }, + { + "epoch": 0.04, + "learning_rate": 4.978362810214281e-05, + "loss": 1.1791, + "step": 42005 + }, + { + "epoch": 0.04, + "learning_rate": 4.978361779016059e-05, + "loss": 1.2553, + "step": 42006 + }, + { + "epoch": 0.04, + "learning_rate": 4.978360747793373e-05, + "loss": 0.8491, + "step": 42007 + }, + { + "epoch": 0.04, + "learning_rate": 4.97835971654622e-05, + "loss": 1.1352, + "step": 42008 + }, + { + "epoch": 0.04, + "learning_rate": 4.978358685274603e-05, + "loss": 1.0373, + "step": 42009 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783576539785207e-05, + "loss": 1.2252, + "step": 42010 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783566226579724e-05, + "loss": 1.2636, + "step": 42011 + }, + { + "epoch": 0.04, + "learning_rate": 4.978355591312959e-05, + "loss": 1.2686, + "step": 42012 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783545599434814e-05, + "loss": 1.0725, + "step": 42013 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783535285495366e-05, + "loss": 1.0061, + "step": 42014 + }, + { + "epoch": 0.04, + "learning_rate": 4.978352497131128e-05, + "loss": 1.0647, + "step": 42015 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783514656882535e-05, + "loss": 0.9839, + "step": 42016 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783504342209144e-05, + "loss": 0.8229, + "step": 42017 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783494027291096e-05, + "loss": 0.9433, + "step": 42018 + }, + { + "epoch": 0.04, + "learning_rate": 4.978348371212839e-05, + "loss": 0.991, + "step": 42019 + }, + { + "epoch": 0.04, + "learning_rate": 4.978347339672104e-05, + "loss": 0.9786, + "step": 42020 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783463081069036e-05, + "loss": 0.7502, + "step": 42021 + }, + { + "epoch": 0.04, + "learning_rate": 4.978345276517238e-05, + "loss": 0.8889, + "step": 42022 + }, + { + "epoch": 0.04, + "learning_rate": 4.978344244903107e-05, + "loss": 1.0371, + "step": 42023 + }, + { + "epoch": 0.04, + "learning_rate": 4.978343213264511e-05, + "loss": 1.2425, + "step": 42024 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783421816014505e-05, + "loss": 1.0491, + "step": 42025 + }, + { + "epoch": 0.04, + "learning_rate": 4.978341149913924e-05, + "loss": 1.1239, + "step": 42026 + }, + { + "epoch": 0.04, + "learning_rate": 4.978340118201933e-05, + "loss": 1.2932, + "step": 42027 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783390864654764e-05, + "loss": 1.0752, + "step": 42028 + }, + { + "epoch": 0.04, + "learning_rate": 4.978338054704554e-05, + "loss": 0.9682, + "step": 42029 + }, + { + "epoch": 0.04, + "learning_rate": 4.978337022919168e-05, + "loss": 0.6734, + "step": 42030 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783359911093164e-05, + "loss": 0.9322, + "step": 42031 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783349592749986e-05, + "loss": 0.8895, + "step": 42032 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783339274162164e-05, + "loss": 1.4731, + "step": 42033 + }, + { + "epoch": 0.04, + "learning_rate": 4.97833289553297e-05, + "loss": 1.0678, + "step": 42034 + }, + { + "epoch": 0.04, + "learning_rate": 4.978331863625258e-05, + "loss": 1.0926, + "step": 42035 + }, + { + "epoch": 0.04, + "learning_rate": 4.978330831693081e-05, + "loss": 0.8991, + "step": 42036 + }, + { + "epoch": 0.04, + "learning_rate": 4.978329799736439e-05, + "loss": 1.2007, + "step": 42037 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783287677553316e-05, + "loss": 1.1432, + "step": 42038 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783277357497586e-05, + "loss": 0.8756, + "step": 42039 + }, + { + "epoch": 0.04, + "learning_rate": 4.978326703719721e-05, + "loss": 1.0346, + "step": 42040 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783256716652196e-05, + "loss": 1.1661, + "step": 42041 + }, + { + "epoch": 0.04, + "learning_rate": 4.978324639586252e-05, + "loss": 0.8923, + "step": 42042 + }, + { + "epoch": 0.04, + "learning_rate": 4.97832360748282e-05, + "loss": 1.0928, + "step": 42043 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783225753549224e-05, + "loss": 1.483, + "step": 42044 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783215432025606e-05, + "loss": 0.9144, + "step": 42045 + }, + { + "epoch": 0.04, + "learning_rate": 4.978320511025734e-05, + "loss": 0.7201, + "step": 42046 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783194788244414e-05, + "loss": 1.0344, + "step": 42047 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783184465986845e-05, + "loss": 1.0302, + "step": 42048 + }, + { + "epoch": 0.04, + "learning_rate": 4.978317414348462e-05, + "loss": 0.6028, + "step": 42049 + }, + { + "epoch": 0.04, + "learning_rate": 4.978316382073776e-05, + "loss": 1.3511, + "step": 42050 + }, + { + "epoch": 0.04, + "learning_rate": 4.978315349774624e-05, + "loss": 1.318, + "step": 42051 + }, + { + "epoch": 0.04, + "learning_rate": 4.978314317451007e-05, + "loss": 0.8695, + "step": 42052 + }, + { + "epoch": 0.04, + "learning_rate": 4.978313285102926e-05, + "loss": 0.9593, + "step": 42053 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783122527303795e-05, + "loss": 0.8629, + "step": 42054 + }, + { + "epoch": 0.04, + "learning_rate": 4.978311220333368e-05, + "loss": 1.0972, + "step": 42055 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783101879118925e-05, + "loss": 1.1287, + "step": 42056 + }, + { + "epoch": 0.04, + "learning_rate": 4.978309155465952e-05, + "loss": 0.93, + "step": 42057 + }, + { + "epoch": 0.04, + "learning_rate": 4.978308122995546e-05, + "loss": 1.0474, + "step": 42058 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783070905006754e-05, + "loss": 0.8809, + "step": 42059 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783060579813404e-05, + "loss": 0.4979, + "step": 42060 + }, + { + "epoch": 0.04, + "learning_rate": 4.97830502543754e-05, + "loss": 1.2541, + "step": 42061 + }, + { + "epoch": 0.04, + "learning_rate": 4.978303992869275e-05, + "loss": 1.0872, + "step": 42062 + }, + { + "epoch": 0.04, + "learning_rate": 4.978302960276546e-05, + "loss": 1.1883, + "step": 42063 + }, + { + "epoch": 0.04, + "learning_rate": 4.978301927659351e-05, + "loss": 0.733, + "step": 42064 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783008950176925e-05, + "loss": 0.2642, + "step": 42065 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782998623515686e-05, + "loss": 0.5264, + "step": 42066 + }, + { + "epoch": 0.04, + "learning_rate": 4.97829882966098e-05, + "loss": 1.1882, + "step": 42067 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782977969459266e-05, + "loss": 0.9322, + "step": 42068 + }, + { + "epoch": 0.04, + "learning_rate": 4.978296764206408e-05, + "loss": 1.1375, + "step": 42069 + }, + { + "epoch": 0.04, + "learning_rate": 4.978295731442426e-05, + "loss": 1.2653, + "step": 42070 + }, + { + "epoch": 0.04, + "learning_rate": 4.978294698653978e-05, + "loss": 0.9704, + "step": 42071 + }, + { + "epoch": 0.04, + "learning_rate": 4.978293665841066e-05, + "loss": 0.911, + "step": 42072 + }, + { + "epoch": 0.04, + "learning_rate": 4.978292633003689e-05, + "loss": 0.6503, + "step": 42073 + }, + { + "epoch": 0.04, + "learning_rate": 4.978291600141848e-05, + "loss": 0.915, + "step": 42074 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782905672555417e-05, + "loss": 1.321, + "step": 42075 + }, + { + "epoch": 0.04, + "learning_rate": 4.978289534344771e-05, + "loss": 1.3665, + "step": 42076 + }, + { + "epoch": 0.04, + "learning_rate": 4.978288501409535e-05, + "loss": 1.1846, + "step": 42077 + }, + { + "epoch": 0.04, + "learning_rate": 4.978287468449835e-05, + "loss": 0.9358, + "step": 42078 + }, + { + "epoch": 0.04, + "learning_rate": 4.978286435465671e-05, + "loss": 1.1238, + "step": 42079 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782854024570415e-05, + "loss": 1.2075, + "step": 42080 + }, + { + "epoch": 0.04, + "learning_rate": 4.978284369423948e-05, + "loss": 0.8178, + "step": 42081 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782833363663896e-05, + "loss": 0.8652, + "step": 42082 + }, + { + "epoch": 0.04, + "learning_rate": 4.978282303284366e-05, + "loss": 0.767, + "step": 42083 + }, + { + "epoch": 0.04, + "learning_rate": 4.978281270177879e-05, + "loss": 0.863, + "step": 42084 + }, + { + "epoch": 0.04, + "learning_rate": 4.978280237046927e-05, + "loss": 0.8877, + "step": 42085 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782792038915105e-05, + "loss": 0.9541, + "step": 42086 + }, + { + "epoch": 0.04, + "learning_rate": 4.978278170711629e-05, + "loss": 0.9435, + "step": 42087 + }, + { + "epoch": 0.04, + "learning_rate": 4.978277137507283e-05, + "loss": 1.0275, + "step": 42088 + }, + { + "epoch": 0.04, + "learning_rate": 4.978276104278474e-05, + "loss": 1.0148, + "step": 42089 + }, + { + "epoch": 0.04, + "learning_rate": 4.978275071025199e-05, + "loss": 0.9176, + "step": 42090 + }, + { + "epoch": 0.04, + "learning_rate": 4.97827403774746e-05, + "loss": 1.0626, + "step": 42091 + }, + { + "epoch": 0.04, + "learning_rate": 4.978273004445256e-05, + "loss": 0.8799, + "step": 42092 + }, + { + "epoch": 0.04, + "learning_rate": 4.978271971118588e-05, + "loss": 1.2361, + "step": 42093 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782709377674554e-05, + "loss": 0.993, + "step": 42094 + }, + { + "epoch": 0.04, + "learning_rate": 4.978269904391859e-05, + "loss": 1.0887, + "step": 42095 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782688709917965e-05, + "loss": 1.0717, + "step": 42096 + }, + { + "epoch": 0.04, + "learning_rate": 4.978267837567271e-05, + "loss": 0.9313, + "step": 42097 + }, + { + "epoch": 0.04, + "learning_rate": 4.978266804118281e-05, + "loss": 1.0254, + "step": 42098 + }, + { + "epoch": 0.04, + "learning_rate": 4.978265770644826e-05, + "loss": 1.3636, + "step": 42099 + }, + { + "epoch": 0.04, + "learning_rate": 4.978264737146907e-05, + "loss": 0.8404, + "step": 42100 + }, + { + "epoch": 0.04, + "learning_rate": 4.978263703624524e-05, + "loss": 0.462, + "step": 42101 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782626700776755e-05, + "loss": 0.8949, + "step": 42102 + }, + { + "epoch": 0.04, + "learning_rate": 4.978261636506364e-05, + "loss": 0.7649, + "step": 42103 + }, + { + "epoch": 0.04, + "learning_rate": 4.978260602910587e-05, + "loss": 1.0298, + "step": 42104 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782595692903464e-05, + "loss": 0.8652, + "step": 42105 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782585356456413e-05, + "loss": 0.8835, + "step": 42106 + }, + { + "epoch": 0.04, + "learning_rate": 4.978257501976472e-05, + "loss": 0.9997, + "step": 42107 + }, + { + "epoch": 0.04, + "learning_rate": 4.978256468282838e-05, + "loss": 0.8116, + "step": 42108 + }, + { + "epoch": 0.04, + "learning_rate": 4.97825543456474e-05, + "loss": 0.9314, + "step": 42109 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782544008221776e-05, + "loss": 0.8195, + "step": 42110 + }, + { + "epoch": 0.04, + "learning_rate": 4.978253367055151e-05, + "loss": 1.2381, + "step": 42111 + }, + { + "epoch": 0.04, + "learning_rate": 4.97825233326366e-05, + "loss": 1.1876, + "step": 42112 + }, + { + "epoch": 0.04, + "learning_rate": 4.978251299447705e-05, + "loss": 1.8796, + "step": 42113 + }, + { + "epoch": 0.04, + "learning_rate": 4.978250265607285e-05, + "loss": 0.8022, + "step": 42114 + }, + { + "epoch": 0.04, + "learning_rate": 4.978249231742401e-05, + "loss": 1.3194, + "step": 42115 + }, + { + "epoch": 0.04, + "learning_rate": 4.978248197853053e-05, + "loss": 1.0654, + "step": 42116 + }, + { + "epoch": 0.04, + "learning_rate": 4.978247163939241e-05, + "loss": 0.9666, + "step": 42117 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782461300009644e-05, + "loss": 1.0518, + "step": 42118 + }, + { + "epoch": 0.04, + "learning_rate": 4.978245096038224e-05, + "loss": 0.8545, + "step": 42119 + }, + { + "epoch": 0.04, + "learning_rate": 4.978244062051019e-05, + "loss": 1.1665, + "step": 42120 + }, + { + "epoch": 0.04, + "learning_rate": 4.97824302803935e-05, + "loss": 0.9265, + "step": 42121 + }, + { + "epoch": 0.04, + "learning_rate": 4.978241994003216e-05, + "loss": 1.0472, + "step": 42122 + }, + { + "epoch": 0.04, + "learning_rate": 4.978240959942619e-05, + "loss": 1.0842, + "step": 42123 + }, + { + "epoch": 0.04, + "learning_rate": 4.978239925857559e-05, + "loss": 1.1504, + "step": 42124 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782388917480325e-05, + "loss": 1.0021, + "step": 42125 + }, + { + "epoch": 0.04, + "learning_rate": 4.978237857614043e-05, + "loss": 1.1625, + "step": 42126 + }, + { + "epoch": 0.04, + "learning_rate": 4.978236823455589e-05, + "loss": 1.1092, + "step": 42127 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782357892726704e-05, + "loss": 0.8442, + "step": 42128 + }, + { + "epoch": 0.04, + "learning_rate": 4.978234755065289e-05, + "loss": 1.0401, + "step": 42129 + }, + { + "epoch": 0.04, + "learning_rate": 4.978233720833443e-05, + "loss": 0.7426, + "step": 42130 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782326865771326e-05, + "loss": 1.0493, + "step": 42131 + }, + { + "epoch": 0.04, + "learning_rate": 4.978231652296358e-05, + "loss": 0.0941, + "step": 42132 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782306179911196e-05, + "loss": 0.1564, + "step": 42133 + }, + { + "epoch": 0.04, + "learning_rate": 4.978229583661418e-05, + "loss": 0.977, + "step": 42134 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782285493072514e-05, + "loss": 1.0631, + "step": 42135 + }, + { + "epoch": 0.04, + "learning_rate": 4.978227514928621e-05, + "loss": 2.7433, + "step": 42136 + }, + { + "epoch": 0.04, + "learning_rate": 4.978226480525526e-05, + "loss": 2.3106, + "step": 42137 + }, + { + "epoch": 0.04, + "learning_rate": 4.978225446097967e-05, + "loss": 1.1928, + "step": 42138 + }, + { + "epoch": 0.04, + "learning_rate": 4.978224411645945e-05, + "loss": 0.5763, + "step": 42139 + }, + { + "epoch": 0.04, + "learning_rate": 4.978223377169459e-05, + "loss": 0.8098, + "step": 42140 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782223426685084e-05, + "loss": 1.054, + "step": 42141 + }, + { + "epoch": 0.04, + "learning_rate": 4.978221308143094e-05, + "loss": 1.1233, + "step": 42142 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782202735932147e-05, + "loss": 1.3761, + "step": 42143 + }, + { + "epoch": 0.04, + "learning_rate": 4.978219239018873e-05, + "loss": 0.7571, + "step": 42144 + }, + { + "epoch": 0.04, + "learning_rate": 4.978218204420066e-05, + "loss": 0.6333, + "step": 42145 + }, + { + "epoch": 0.04, + "learning_rate": 4.978217169796796e-05, + "loss": 0.7598, + "step": 42146 + }, + { + "epoch": 0.04, + "learning_rate": 4.978216135149062e-05, + "loss": 0.7669, + "step": 42147 + }, + { + "epoch": 0.04, + "learning_rate": 4.978215100476864e-05, + "loss": 1.1495, + "step": 42148 + }, + { + "epoch": 0.04, + "learning_rate": 4.978214065780202e-05, + "loss": 1.0773, + "step": 42149 + }, + { + "epoch": 0.04, + "learning_rate": 4.978213031059076e-05, + "loss": 1.3469, + "step": 42150 + }, + { + "epoch": 0.04, + "learning_rate": 4.978211996313486e-05, + "loss": 0.734, + "step": 42151 + }, + { + "epoch": 0.04, + "learning_rate": 4.978210961543433e-05, + "loss": 0.5875, + "step": 42152 + }, + { + "epoch": 0.04, + "learning_rate": 4.978209926748916e-05, + "loss": 0.3551, + "step": 42153 + }, + { + "epoch": 0.04, + "learning_rate": 4.978208891929934e-05, + "loss": 1.0432, + "step": 42154 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782078570864894e-05, + "loss": 0.9234, + "step": 42155 + }, + { + "epoch": 0.04, + "learning_rate": 4.978206822218581e-05, + "loss": 1.4293, + "step": 42156 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782057873262076e-05, + "loss": 1.0715, + "step": 42157 + }, + { + "epoch": 0.04, + "learning_rate": 4.978204752409371e-05, + "loss": 1.1568, + "step": 42158 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782037174680704e-05, + "loss": 0.8282, + "step": 42159 + }, + { + "epoch": 0.04, + "learning_rate": 4.9782026825023064e-05, + "loss": 1.0472, + "step": 42160 + }, + { + "epoch": 0.04, + "learning_rate": 4.978201647512079e-05, + "loss": 0.6179, + "step": 42161 + }, + { + "epoch": 0.04, + "learning_rate": 4.978200612497387e-05, + "loss": 0.7875, + "step": 42162 + }, + { + "epoch": 0.04, + "learning_rate": 4.978199577458232e-05, + "loss": 1.1303, + "step": 42163 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781985423946134e-05, + "loss": 1.0118, + "step": 42164 + }, + { + "epoch": 0.04, + "learning_rate": 4.97819750730653e-05, + "loss": 0.9953, + "step": 42165 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781964721939835e-05, + "loss": 0.9063, + "step": 42166 + }, + { + "epoch": 0.04, + "learning_rate": 4.978195437056973e-05, + "loss": 1.0479, + "step": 42167 + }, + { + "epoch": 0.04, + "learning_rate": 4.978194401895499e-05, + "loss": 1.1918, + "step": 42168 + }, + { + "epoch": 0.04, + "learning_rate": 4.978193366709561e-05, + "loss": 1.129, + "step": 42169 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781923314991605e-05, + "loss": 0.9984, + "step": 42170 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781912962642954e-05, + "loss": 0.9795, + "step": 42171 + }, + { + "epoch": 0.04, + "learning_rate": 4.978190261004966e-05, + "loss": 1.0607, + "step": 42172 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781892257211736e-05, + "loss": 0.7895, + "step": 42173 + }, + { + "epoch": 0.04, + "learning_rate": 4.978188190412918e-05, + "loss": 1.0323, + "step": 42174 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781871550801984e-05, + "loss": 0.799, + "step": 42175 + }, + { + "epoch": 0.04, + "learning_rate": 4.978186119723015e-05, + "loss": 1.2327, + "step": 42176 + }, + { + "epoch": 0.04, + "learning_rate": 4.978185084341368e-05, + "loss": 0.9467, + "step": 42177 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781840489352585e-05, + "loss": 0.9898, + "step": 42178 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781830135046834e-05, + "loss": 1.4322, + "step": 42179 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781819780496466e-05, + "loss": 1.0848, + "step": 42180 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781809425701456e-05, + "loss": 0.8745, + "step": 42181 + }, + { + "epoch": 0.04, + "learning_rate": 4.978179907066181e-05, + "loss": 0.9968, + "step": 42182 + }, + { + "epoch": 0.04, + "learning_rate": 4.978178871537752e-05, + "loss": 1.096, + "step": 42183 + }, + { + "epoch": 0.04, + "learning_rate": 4.97817783598486e-05, + "loss": 0.9249, + "step": 42184 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781768004075046e-05, + "loss": 0.8761, + "step": 42185 + }, + { + "epoch": 0.04, + "learning_rate": 4.978175764805686e-05, + "loss": 0.9977, + "step": 42186 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781747291794034e-05, + "loss": 1.0472, + "step": 42187 + }, + { + "epoch": 0.04, + "learning_rate": 4.978173693528658e-05, + "loss": 1.1072, + "step": 42188 + }, + { + "epoch": 0.04, + "learning_rate": 4.978172657853448e-05, + "loss": 1.1038, + "step": 42189 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781716221537756e-05, + "loss": 1.2923, + "step": 42190 + }, + { + "epoch": 0.04, + "learning_rate": 4.978170586429639e-05, + "loss": 1.0051, + "step": 42191 + }, + { + "epoch": 0.04, + "learning_rate": 4.97816955068104e-05, + "loss": 1.2187, + "step": 42192 + }, + { + "epoch": 0.04, + "learning_rate": 4.978168514907977e-05, + "loss": 0.9754, + "step": 42193 + }, + { + "epoch": 0.04, + "learning_rate": 4.97816747911045e-05, + "loss": 0.9294, + "step": 42194 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781664432884603e-05, + "loss": 1.0848, + "step": 42195 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781654074420056e-05, + "loss": 1.0842, + "step": 42196 + }, + { + "epoch": 0.04, + "learning_rate": 4.978164371571089e-05, + "loss": 0.668, + "step": 42197 + }, + { + "epoch": 0.04, + "learning_rate": 4.978163335675709e-05, + "loss": 1.0275, + "step": 42198 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781622997558655e-05, + "loss": 0.941, + "step": 42199 + }, + { + "epoch": 0.04, + "learning_rate": 4.978161263811558e-05, + "loss": 1.2107, + "step": 42200 + }, + { + "epoch": 0.04, + "learning_rate": 4.978160227842788e-05, + "loss": 1.0033, + "step": 42201 + }, + { + "epoch": 0.04, + "learning_rate": 4.978159191849554e-05, + "loss": 0.8617, + "step": 42202 + }, + { + "epoch": 0.04, + "learning_rate": 4.978158155831857e-05, + "loss": 0.7635, + "step": 42203 + }, + { + "epoch": 0.04, + "learning_rate": 4.978157119789697e-05, + "loss": 1.1654, + "step": 42204 + }, + { + "epoch": 0.04, + "learning_rate": 4.978156083723073e-05, + "loss": 0.9477, + "step": 42205 + }, + { + "epoch": 0.04, + "learning_rate": 4.978155047631986e-05, + "loss": 1.056, + "step": 42206 + }, + { + "epoch": 0.04, + "learning_rate": 4.978154011516436e-05, + "loss": 1.0317, + "step": 42207 + }, + { + "epoch": 0.04, + "learning_rate": 4.978152975376422e-05, + "loss": 1.2161, + "step": 42208 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781519392119445e-05, + "loss": 0.7437, + "step": 42209 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781509030230046e-05, + "loss": 1.1409, + "step": 42210 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781498668096016e-05, + "loss": 0.8306, + "step": 42211 + }, + { + "epoch": 0.04, + "learning_rate": 4.978148830571735e-05, + "loss": 1.2527, + "step": 42212 + }, + { + "epoch": 0.04, + "learning_rate": 4.978147794309405e-05, + "loss": 1.0183, + "step": 42213 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781467580226114e-05, + "loss": 0.7242, + "step": 42214 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781457217113545e-05, + "loss": 0.9856, + "step": 42215 + }, + { + "epoch": 0.04, + "learning_rate": 4.978144685375635e-05, + "loss": 0.9607, + "step": 42216 + }, + { + "epoch": 0.04, + "learning_rate": 4.978143649015452e-05, + "loss": 0.8739, + "step": 42217 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781426126308064e-05, + "loss": 1.0372, + "step": 42218 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781415762216975e-05, + "loss": 0.8756, + "step": 42219 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781405397881256e-05, + "loss": 0.9345, + "step": 42220 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781395033300894e-05, + "loss": 0.4939, + "step": 42221 + }, + { + "epoch": 0.04, + "learning_rate": 4.978138466847591e-05, + "loss": 0.758, + "step": 42222 + }, + { + "epoch": 0.04, + "learning_rate": 4.978137430340629e-05, + "loss": 0.9833, + "step": 42223 + }, + { + "epoch": 0.04, + "learning_rate": 4.978136393809204e-05, + "loss": 1.0438, + "step": 42224 + }, + { + "epoch": 0.04, + "learning_rate": 4.978135357253316e-05, + "loss": 1.3558, + "step": 42225 + }, + { + "epoch": 0.04, + "learning_rate": 4.978134320672965e-05, + "loss": 1.118, + "step": 42226 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781332840681506e-05, + "loss": 1.0825, + "step": 42227 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781322474388734e-05, + "loss": 1.1163, + "step": 42228 + }, + { + "epoch": 0.04, + "learning_rate": 4.978131210785133e-05, + "loss": 1.1942, + "step": 42229 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781301741069295e-05, + "loss": 1.0598, + "step": 42230 + }, + { + "epoch": 0.04, + "learning_rate": 4.978129137404263e-05, + "loss": 1.0952, + "step": 42231 + }, + { + "epoch": 0.04, + "learning_rate": 4.978128100677133e-05, + "loss": 0.9357, + "step": 42232 + }, + { + "epoch": 0.04, + "learning_rate": 4.978127063925541e-05, + "loss": 1.0447, + "step": 42233 + }, + { + "epoch": 0.04, + "learning_rate": 4.978126027149485e-05, + "loss": 1.3553, + "step": 42234 + }, + { + "epoch": 0.04, + "learning_rate": 4.978124990348966e-05, + "loss": 0.9933, + "step": 42235 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781239535239845e-05, + "loss": 1.3331, + "step": 42236 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781229166745405e-05, + "loss": 1.2819, + "step": 42237 + }, + { + "epoch": 0.04, + "learning_rate": 4.978121879800632e-05, + "loss": 1.2215, + "step": 42238 + }, + { + "epoch": 0.04, + "learning_rate": 4.978120842902262e-05, + "loss": 1.1852, + "step": 42239 + }, + { + "epoch": 0.04, + "learning_rate": 4.978119805979428e-05, + "loss": 1.0535, + "step": 42240 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781187690321316e-05, + "loss": 1.2306, + "step": 42241 + }, + { + "epoch": 0.04, + "learning_rate": 4.978117732060372e-05, + "loss": 1.1671, + "step": 42242 + }, + { + "epoch": 0.04, + "learning_rate": 4.97811669506415e-05, + "loss": 1.173, + "step": 42243 + }, + { + "epoch": 0.04, + "learning_rate": 4.978115658043464e-05, + "loss": 0.6723, + "step": 42244 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781146209983157e-05, + "loss": 1.1246, + "step": 42245 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781135839287054e-05, + "loss": 1.1996, + "step": 42246 + }, + { + "epoch": 0.04, + "learning_rate": 4.978112546834631e-05, + "loss": 0.7506, + "step": 42247 + }, + { + "epoch": 0.04, + "learning_rate": 4.978111509716094e-05, + "loss": 0.9152, + "step": 42248 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781104725730946e-05, + "loss": 0.668, + "step": 42249 + }, + { + "epoch": 0.04, + "learning_rate": 4.978109435405631e-05, + "loss": 0.8862, + "step": 42250 + }, + { + "epoch": 0.04, + "learning_rate": 4.978108398213706e-05, + "loss": 1.307, + "step": 42251 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781073609973175e-05, + "loss": 0.8675, + "step": 42252 + }, + { + "epoch": 0.04, + "learning_rate": 4.978106323756466e-05, + "loss": 0.6259, + "step": 42253 + }, + { + "epoch": 0.04, + "learning_rate": 4.978105286491153e-05, + "loss": 0.2454, + "step": 42254 + }, + { + "epoch": 0.04, + "learning_rate": 4.978104249201375e-05, + "loss": 0.501, + "step": 42255 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781032118871354e-05, + "loss": 0.2228, + "step": 42256 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781021745484336e-05, + "loss": 0.1488, + "step": 42257 + }, + { + "epoch": 0.04, + "learning_rate": 4.978101137185268e-05, + "loss": 0.1646, + "step": 42258 + }, + { + "epoch": 0.04, + "learning_rate": 4.9781000997976404e-05, + "loss": 0.2109, + "step": 42259 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780990623855496e-05, + "loss": 1.3204, + "step": 42260 + }, + { + "epoch": 0.04, + "learning_rate": 4.978098024948996e-05, + "loss": 0.9027, + "step": 42261 + }, + { + "epoch": 0.04, + "learning_rate": 4.97809698748798e-05, + "loss": 0.494, + "step": 42262 + }, + { + "epoch": 0.04, + "learning_rate": 4.978095950002501e-05, + "loss": 0.934, + "step": 42263 + }, + { + "epoch": 0.04, + "learning_rate": 4.978094912492559e-05, + "loss": 1.1501, + "step": 42264 + }, + { + "epoch": 0.04, + "learning_rate": 4.978093874958154e-05, + "loss": 0.7095, + "step": 42265 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780928373992876e-05, + "loss": 0.9147, + "step": 42266 + }, + { + "epoch": 0.04, + "learning_rate": 4.978091799815958e-05, + "loss": 1.0432, + "step": 42267 + }, + { + "epoch": 0.04, + "learning_rate": 4.978090762208165e-05, + "loss": 1.1511, + "step": 42268 + }, + { + "epoch": 0.04, + "learning_rate": 4.97808972457591e-05, + "loss": 0.8488, + "step": 42269 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780886869191924e-05, + "loss": 1.1557, + "step": 42270 + }, + { + "epoch": 0.04, + "learning_rate": 4.978087649238012e-05, + "loss": 1.1253, + "step": 42271 + }, + { + "epoch": 0.04, + "learning_rate": 4.978086611532369e-05, + "loss": 1.3344, + "step": 42272 + }, + { + "epoch": 0.04, + "learning_rate": 4.978085573802263e-05, + "loss": 1.4308, + "step": 42273 + }, + { + "epoch": 0.04, + "learning_rate": 4.978084536047695e-05, + "loss": 1.1964, + "step": 42274 + }, + { + "epoch": 0.04, + "learning_rate": 4.978083498268664e-05, + "loss": 1.0993, + "step": 42275 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780824604651707e-05, + "loss": 1.1732, + "step": 42276 + }, + { + "epoch": 0.04, + "learning_rate": 4.978081422637214e-05, + "loss": 0.9552, + "step": 42277 + }, + { + "epoch": 0.04, + "learning_rate": 4.978080384784795e-05, + "loss": 0.9764, + "step": 42278 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780793469079145e-05, + "loss": 1.0134, + "step": 42279 + }, + { + "epoch": 0.04, + "learning_rate": 4.978078309006571e-05, + "loss": 0.7593, + "step": 42280 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780772710807647e-05, + "loss": 1.3335, + "step": 42281 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780762331304956e-05, + "loss": 1.2666, + "step": 42282 + }, + { + "epoch": 0.04, + "learning_rate": 4.978075195155764e-05, + "loss": 0.9411, + "step": 42283 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780741571565705e-05, + "loss": 1.2619, + "step": 42284 + }, + { + "epoch": 0.04, + "learning_rate": 4.978073119132914e-05, + "loss": 1.1071, + "step": 42285 + }, + { + "epoch": 0.04, + "learning_rate": 4.978072081084795e-05, + "loss": 0.9504, + "step": 42286 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780710430122135e-05, + "loss": 1.0245, + "step": 42287 + }, + { + "epoch": 0.04, + "learning_rate": 4.97807000491517e-05, + "loss": 0.6571, + "step": 42288 + }, + { + "epoch": 0.04, + "learning_rate": 4.978068966793663e-05, + "loss": 0.9207, + "step": 42289 + }, + { + "epoch": 0.04, + "learning_rate": 4.978067928647694e-05, + "loss": 0.8914, + "step": 42290 + }, + { + "epoch": 0.04, + "learning_rate": 4.978066890477263e-05, + "loss": 0.7438, + "step": 42291 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780658522823695e-05, + "loss": 0.7473, + "step": 42292 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780648140630136e-05, + "loss": 1.0302, + "step": 42293 + }, + { + "epoch": 0.04, + "learning_rate": 4.978063775819195e-05, + "loss": 1.2464, + "step": 42294 + }, + { + "epoch": 0.04, + "learning_rate": 4.978062737550914e-05, + "loss": 1.1388, + "step": 42295 + }, + { + "epoch": 0.04, + "learning_rate": 4.978061699258171e-05, + "loss": 0.8013, + "step": 42296 + }, + { + "epoch": 0.04, + "learning_rate": 4.978060660940965e-05, + "loss": 0.8585, + "step": 42297 + }, + { + "epoch": 0.04, + "learning_rate": 4.978059622599297e-05, + "loss": 1.012, + "step": 42298 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780585842331665e-05, + "loss": 1.2141, + "step": 42299 + }, + { + "epoch": 0.04, + "learning_rate": 4.978057545842574e-05, + "loss": 1.5215, + "step": 42300 + }, + { + "epoch": 0.04, + "learning_rate": 4.978056507427519e-05, + "loss": 1.2048, + "step": 42301 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780554689880014e-05, + "loss": 1.1865, + "step": 42302 + }, + { + "epoch": 0.04, + "learning_rate": 4.978054430524022e-05, + "loss": 1.3256, + "step": 42303 + }, + { + "epoch": 0.04, + "learning_rate": 4.97805339203558e-05, + "loss": 1.0687, + "step": 42304 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780523535226755e-05, + "loss": 1.1672, + "step": 42305 + }, + { + "epoch": 0.04, + "learning_rate": 4.978051314985309e-05, + "loss": 0.997, + "step": 42306 + }, + { + "epoch": 0.04, + "learning_rate": 4.97805027642348e-05, + "loss": 0.9931, + "step": 42307 + }, + { + "epoch": 0.04, + "learning_rate": 4.978049237837189e-05, + "loss": 1.0351, + "step": 42308 + }, + { + "epoch": 0.04, + "learning_rate": 4.978048199226435e-05, + "loss": 0.8258, + "step": 42309 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780471605912194e-05, + "loss": 0.8226, + "step": 42310 + }, + { + "epoch": 0.04, + "learning_rate": 4.978046121931542e-05, + "loss": 0.924, + "step": 42311 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780450832474015e-05, + "loss": 0.8768, + "step": 42312 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780440445387994e-05, + "loss": 0.7169, + "step": 42313 + }, + { + "epoch": 0.04, + "learning_rate": 4.978043005805735e-05, + "loss": 0.6664, + "step": 42314 + }, + { + "epoch": 0.04, + "learning_rate": 4.978041967048208e-05, + "loss": 0.7727, + "step": 42315 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780409282662186e-05, + "loss": 0.7693, + "step": 42316 + }, + { + "epoch": 0.04, + "learning_rate": 4.978039889459768e-05, + "loss": 0.8795, + "step": 42317 + }, + { + "epoch": 0.04, + "learning_rate": 4.978038850628854e-05, + "loss": 0.8911, + "step": 42318 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780378117734796e-05, + "loss": 0.8364, + "step": 42319 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780367728936414e-05, + "loss": 0.8524, + "step": 42320 + }, + { + "epoch": 0.04, + "learning_rate": 4.978035733989342e-05, + "loss": 0.8969, + "step": 42321 + }, + { + "epoch": 0.04, + "learning_rate": 4.97803469506058e-05, + "loss": 0.8141, + "step": 42322 + }, + { + "epoch": 0.04, + "learning_rate": 4.978033656107356e-05, + "loss": 0.8609, + "step": 42323 + }, + { + "epoch": 0.04, + "learning_rate": 4.97803261712967e-05, + "loss": 0.7413, + "step": 42324 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780315781275214e-05, + "loss": 1.0643, + "step": 42325 + }, + { + "epoch": 0.04, + "learning_rate": 4.978030539100912e-05, + "loss": 0.9615, + "step": 42326 + }, + { + "epoch": 0.04, + "learning_rate": 4.978029500049839e-05, + "loss": 0.5471, + "step": 42327 + }, + { + "epoch": 0.04, + "learning_rate": 4.978028460974304e-05, + "loss": 0.3724, + "step": 42328 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780274218743085e-05, + "loss": 0.4007, + "step": 42329 + }, + { + "epoch": 0.04, + "learning_rate": 4.97802638274985e-05, + "loss": 0.8229, + "step": 42330 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780253436009295e-05, + "loss": 0.962, + "step": 42331 + }, + { + "epoch": 0.04, + "learning_rate": 4.978024304427547e-05, + "loss": 0.9301, + "step": 42332 + }, + { + "epoch": 0.04, + "learning_rate": 4.978023265229702e-05, + "loss": 0.9215, + "step": 42333 + }, + { + "epoch": 0.04, + "learning_rate": 4.978022226007396e-05, + "loss": 0.9535, + "step": 42334 + }, + { + "epoch": 0.04, + "learning_rate": 4.978021186760627e-05, + "loss": 0.9525, + "step": 42335 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780201474893966e-05, + "loss": 0.9772, + "step": 42336 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780191081937045e-05, + "loss": 0.7778, + "step": 42337 + }, + { + "epoch": 0.04, + "learning_rate": 4.97801806887355e-05, + "loss": 0.7662, + "step": 42338 + }, + { + "epoch": 0.04, + "learning_rate": 4.978017029528933e-05, + "loss": 0.9371, + "step": 42339 + }, + { + "epoch": 0.04, + "learning_rate": 4.978015990159855e-05, + "loss": 0.9747, + "step": 42340 + }, + { + "epoch": 0.04, + "learning_rate": 4.978014950766315e-05, + "loss": 0.8381, + "step": 42341 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780139113483124e-05, + "loss": 0.907, + "step": 42342 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780128719058484e-05, + "loss": 0.7512, + "step": 42343 + }, + { + "epoch": 0.04, + "learning_rate": 4.978011832438922e-05, + "loss": 0.9477, + "step": 42344 + }, + { + "epoch": 0.04, + "learning_rate": 4.978010792947534e-05, + "loss": 0.8559, + "step": 42345 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780097534316846e-05, + "loss": 0.7288, + "step": 42346 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780087138913734e-05, + "loss": 0.8001, + "step": 42347 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780076743266e-05, + "loss": 0.7224, + "step": 42348 + }, + { + "epoch": 0.04, + "learning_rate": 4.978006634737364e-05, + "loss": 0.8517, + "step": 42349 + }, + { + "epoch": 0.04, + "learning_rate": 4.978005595123667e-05, + "loss": 0.7734, + "step": 42350 + }, + { + "epoch": 0.04, + "learning_rate": 4.978004555485508e-05, + "loss": 0.8655, + "step": 42351 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780035158228875e-05, + "loss": 0.7675, + "step": 42352 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780024761358045e-05, + "loss": 0.8169, + "step": 42353 + }, + { + "epoch": 0.04, + "learning_rate": 4.97800143642426e-05, + "loss": 0.7357, + "step": 42354 + }, + { + "epoch": 0.04, + "learning_rate": 4.9780003966882535e-05, + "loss": 0.6826, + "step": 42355 + }, + { + "epoch": 0.04, + "learning_rate": 4.977999356927786e-05, + "loss": 0.8026, + "step": 42356 + }, + { + "epoch": 0.04, + "learning_rate": 4.977998317142857e-05, + "loss": 0.8144, + "step": 42357 + }, + { + "epoch": 0.04, + "learning_rate": 4.977997277333465e-05, + "loss": 0.7461, + "step": 42358 + }, + { + "epoch": 0.04, + "learning_rate": 4.977996237499611e-05, + "loss": 0.7169, + "step": 42359 + }, + { + "epoch": 0.04, + "learning_rate": 4.977995197641296e-05, + "loss": 0.7902, + "step": 42360 + }, + { + "epoch": 0.04, + "learning_rate": 4.97799415775852e-05, + "loss": 0.6128, + "step": 42361 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779931178512816e-05, + "loss": 0.699, + "step": 42362 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779920779195816e-05, + "loss": 0.8855, + "step": 42363 + }, + { + "epoch": 0.04, + "learning_rate": 4.97799103796342e-05, + "loss": 0.7698, + "step": 42364 + }, + { + "epoch": 0.04, + "learning_rate": 4.977989997982796e-05, + "loss": 0.7403, + "step": 42365 + }, + { + "epoch": 0.04, + "learning_rate": 4.977988957977711e-05, + "loss": 0.6064, + "step": 42366 + }, + { + "epoch": 0.04, + "learning_rate": 4.977987917948164e-05, + "loss": 0.6977, + "step": 42367 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779868778941554e-05, + "loss": 0.6798, + "step": 42368 + }, + { + "epoch": 0.04, + "learning_rate": 4.977985837815685e-05, + "loss": 0.7528, + "step": 42369 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779847977127534e-05, + "loss": 0.7987, + "step": 42370 + }, + { + "epoch": 0.04, + "learning_rate": 4.97798375758536e-05, + "loss": 0.6291, + "step": 42371 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779827174335055e-05, + "loss": 0.6334, + "step": 42372 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779816772571885e-05, + "loss": 0.8246, + "step": 42373 + }, + { + "epoch": 0.04, + "learning_rate": 4.977980637056411e-05, + "loss": 0.7474, + "step": 42374 + }, + { + "epoch": 0.04, + "learning_rate": 4.97797959683117e-05, + "loss": 0.7047, + "step": 42375 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779785565814695e-05, + "loss": 0.6767, + "step": 42376 + }, + { + "epoch": 0.04, + "learning_rate": 4.977977516307306e-05, + "loss": 0.6469, + "step": 42377 + }, + { + "epoch": 0.04, + "learning_rate": 4.977976476008682e-05, + "loss": 0.7519, + "step": 42378 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779754356855965e-05, + "loss": 0.7376, + "step": 42379 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779743953380486e-05, + "loss": 0.8834, + "step": 42380 + }, + { + "epoch": 0.04, + "learning_rate": 4.97797335496604e-05, + "loss": 0.8735, + "step": 42381 + }, + { + "epoch": 0.04, + "learning_rate": 4.977972314569569e-05, + "loss": 0.6263, + "step": 42382 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779712741486374e-05, + "loss": 0.6094, + "step": 42383 + }, + { + "epoch": 0.04, + "learning_rate": 4.977970233703243e-05, + "loss": 0.73, + "step": 42384 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779691932333884e-05, + "loss": 0.8373, + "step": 42385 + }, + { + "epoch": 0.04, + "learning_rate": 4.977968152739072e-05, + "loss": 0.8244, + "step": 42386 + }, + { + "epoch": 0.04, + "learning_rate": 4.977967112220294e-05, + "loss": 0.7097, + "step": 42387 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779660716770547e-05, + "loss": 0.7408, + "step": 42388 + }, + { + "epoch": 0.04, + "learning_rate": 4.977965031109354e-05, + "loss": 0.812, + "step": 42389 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779639905171913e-05, + "loss": 0.7681, + "step": 42390 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779629499005676e-05, + "loss": 0.7709, + "step": 42391 + }, + { + "epoch": 0.04, + "learning_rate": 4.977961909259483e-05, + "loss": 0.7855, + "step": 42392 + }, + { + "epoch": 0.04, + "learning_rate": 4.977960868593936e-05, + "loss": 0.7664, + "step": 42393 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779598279039285e-05, + "loss": 0.7554, + "step": 42394 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779587871894596e-05, + "loss": 0.6837, + "step": 42395 + }, + { + "epoch": 0.04, + "learning_rate": 4.977957746450529e-05, + "loss": 0.7107, + "step": 42396 + }, + { + "epoch": 0.04, + "learning_rate": 4.977956705687137e-05, + "loss": 0.6984, + "step": 42397 + }, + { + "epoch": 0.04, + "learning_rate": 4.977955664899283e-05, + "loss": 0.6128, + "step": 42398 + }, + { + "epoch": 0.04, + "learning_rate": 4.977954624086969e-05, + "loss": 0.6036, + "step": 42399 + }, + { + "epoch": 0.04, + "learning_rate": 4.977953583250193e-05, + "loss": 0.7295, + "step": 42400 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779525423889556e-05, + "loss": 0.843, + "step": 42401 + }, + { + "epoch": 0.04, + "learning_rate": 4.977951501503257e-05, + "loss": 0.948, + "step": 42402 + }, + { + "epoch": 0.04, + "learning_rate": 4.977950460593097e-05, + "loss": 0.8626, + "step": 42403 + }, + { + "epoch": 0.04, + "learning_rate": 4.977949419658476e-05, + "loss": 0.8198, + "step": 42404 + }, + { + "epoch": 0.04, + "learning_rate": 4.977948378699394e-05, + "loss": 0.7271, + "step": 42405 + }, + { + "epoch": 0.04, + "learning_rate": 4.97794733771585e-05, + "loss": 0.8088, + "step": 42406 + }, + { + "epoch": 0.04, + "learning_rate": 4.977946296707845e-05, + "loss": 0.6888, + "step": 42407 + }, + { + "epoch": 0.04, + "learning_rate": 4.977945255675379e-05, + "loss": 0.7196, + "step": 42408 + }, + { + "epoch": 0.04, + "learning_rate": 4.977944214618452e-05, + "loss": 0.6864, + "step": 42409 + }, + { + "epoch": 0.04, + "learning_rate": 4.977943173537063e-05, + "loss": 0.6979, + "step": 42410 + }, + { + "epoch": 0.04, + "learning_rate": 4.977942132431213e-05, + "loss": 0.6475, + "step": 42411 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779410913009015e-05, + "loss": 0.7233, + "step": 42412 + }, + { + "epoch": 0.04, + "learning_rate": 4.977940050146129e-05, + "loss": 0.6978, + "step": 42413 + }, + { + "epoch": 0.04, + "learning_rate": 4.977939008966897e-05, + "loss": 0.6584, + "step": 42414 + }, + { + "epoch": 0.04, + "learning_rate": 4.977937967763202e-05, + "loss": 0.7303, + "step": 42415 + }, + { + "epoch": 0.04, + "learning_rate": 4.977936926535046e-05, + "loss": 0.7538, + "step": 42416 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779358852824286e-05, + "loss": 0.7224, + "step": 42417 + }, + { + "epoch": 0.04, + "learning_rate": 4.977934844005351e-05, + "loss": 0.6326, + "step": 42418 + }, + { + "epoch": 0.04, + "learning_rate": 4.977933802703812e-05, + "loss": 0.555, + "step": 42419 + }, + { + "epoch": 0.04, + "learning_rate": 4.977932761377811e-05, + "loss": 0.5902, + "step": 42420 + }, + { + "epoch": 0.04, + "learning_rate": 4.97793172002735e-05, + "loss": 0.6287, + "step": 42421 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779306786524284e-05, + "loss": 0.7819, + "step": 42422 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779296372530445e-05, + "loss": 0.3804, + "step": 42423 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779285958292e-05, + "loss": 0.1431, + "step": 42424 + }, + { + "epoch": 0.04, + "learning_rate": 4.977927554380894e-05, + "loss": 0.4922, + "step": 42425 + }, + { + "epoch": 0.04, + "learning_rate": 4.977926512908127e-05, + "loss": 1.2634, + "step": 42426 + }, + { + "epoch": 0.04, + "learning_rate": 4.977925471410899e-05, + "loss": 0.9916, + "step": 42427 + }, + { + "epoch": 0.04, + "learning_rate": 4.97792442988921e-05, + "loss": 0.9972, + "step": 42428 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779233883430607e-05, + "loss": 0.9016, + "step": 42429 + }, + { + "epoch": 0.04, + "learning_rate": 4.97792234677245e-05, + "loss": 0.9282, + "step": 42430 + }, + { + "epoch": 0.04, + "learning_rate": 4.977921305177378e-05, + "loss": 0.8253, + "step": 42431 + }, + { + "epoch": 0.04, + "learning_rate": 4.977920263557845e-05, + "loss": 0.7594, + "step": 42432 + }, + { + "epoch": 0.04, + "learning_rate": 4.977919221913851e-05, + "loss": 0.8389, + "step": 42433 + }, + { + "epoch": 0.04, + "learning_rate": 4.977918180245396e-05, + "loss": 0.8694, + "step": 42434 + }, + { + "epoch": 0.04, + "learning_rate": 4.97791713855248e-05, + "loss": 0.9358, + "step": 42435 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779160968351033e-05, + "loss": 0.8504, + "step": 42436 + }, + { + "epoch": 0.04, + "learning_rate": 4.977915055093266e-05, + "loss": 0.8017, + "step": 42437 + }, + { + "epoch": 0.04, + "learning_rate": 4.977914013326967e-05, + "loss": 0.8773, + "step": 42438 + }, + { + "epoch": 0.04, + "learning_rate": 4.977912971536208e-05, + "loss": 0.883, + "step": 42439 + }, + { + "epoch": 0.04, + "learning_rate": 4.977911929720987e-05, + "loss": 0.7706, + "step": 42440 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779108878813055e-05, + "loss": 0.7314, + "step": 42441 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779098460171635e-05, + "loss": 0.8234, + "step": 42442 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779088041285605e-05, + "loss": 0.8235, + "step": 42443 + }, + { + "epoch": 0.04, + "learning_rate": 4.9779077622154966e-05, + "loss": 0.7899, + "step": 42444 + }, + { + "epoch": 0.04, + "learning_rate": 4.977906720277972e-05, + "loss": 2.2957, + "step": 42445 + }, + { + "epoch": 0.04, + "learning_rate": 4.977905678315986e-05, + "loss": 0.8561, + "step": 42446 + }, + { + "epoch": 0.04, + "learning_rate": 4.977904636329539e-05, + "loss": 0.7509, + "step": 42447 + }, + { + "epoch": 0.04, + "learning_rate": 4.977903594318632e-05, + "loss": 0.7854, + "step": 42448 + }, + { + "epoch": 0.04, + "learning_rate": 4.977902552283263e-05, + "loss": 0.8101, + "step": 42449 + }, + { + "epoch": 0.04, + "learning_rate": 4.977901510223435e-05, + "loss": 0.7689, + "step": 42450 + }, + { + "epoch": 0.04, + "learning_rate": 4.977900468139145e-05, + "loss": 0.8565, + "step": 42451 + }, + { + "epoch": 0.04, + "learning_rate": 4.977899426030394e-05, + "loss": 0.6956, + "step": 42452 + }, + { + "epoch": 0.04, + "learning_rate": 4.977898383897183e-05, + "loss": 0.7275, + "step": 42453 + }, + { + "epoch": 0.04, + "learning_rate": 4.97789734173951e-05, + "loss": 0.7919, + "step": 42454 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778962995573776e-05, + "loss": 0.7937, + "step": 42455 + }, + { + "epoch": 0.04, + "learning_rate": 4.977895257350784e-05, + "loss": 0.9302, + "step": 42456 + }, + { + "epoch": 0.04, + "learning_rate": 4.977894215119729e-05, + "loss": 1.1141, + "step": 42457 + }, + { + "epoch": 0.04, + "learning_rate": 4.977893172864214e-05, + "loss": 1.1271, + "step": 42458 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778921305842384e-05, + "loss": 0.9929, + "step": 42459 + }, + { + "epoch": 0.04, + "learning_rate": 4.977891088279802e-05, + "loss": 1.4557, + "step": 42460 + }, + { + "epoch": 0.04, + "learning_rate": 4.977890045950905e-05, + "loss": 0.8808, + "step": 42461 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778890035975466e-05, + "loss": 1.073, + "step": 42462 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778879612197286e-05, + "loss": 1.1586, + "step": 42463 + }, + { + "epoch": 0.04, + "learning_rate": 4.977886918817449e-05, + "loss": 0.9507, + "step": 42464 + }, + { + "epoch": 0.04, + "learning_rate": 4.977885876390709e-05, + "loss": 0.5346, + "step": 42465 + }, + { + "epoch": 0.04, + "learning_rate": 4.977884833939508e-05, + "loss": 0.9453, + "step": 42466 + }, + { + "epoch": 0.04, + "learning_rate": 4.977883791463847e-05, + "loss": 0.9235, + "step": 42467 + }, + { + "epoch": 0.04, + "learning_rate": 4.977882748963725e-05, + "loss": 1.0941, + "step": 42468 + }, + { + "epoch": 0.04, + "learning_rate": 4.977881706439142e-05, + "loss": 0.9492, + "step": 42469 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778806638900996e-05, + "loss": 0.8758, + "step": 42470 + }, + { + "epoch": 0.04, + "learning_rate": 4.977879621316596e-05, + "loss": 0.4442, + "step": 42471 + }, + { + "epoch": 0.04, + "learning_rate": 4.977878578718631e-05, + "loss": 0.4587, + "step": 42472 + }, + { + "epoch": 0.04, + "learning_rate": 4.977877536096207e-05, + "loss": 0.363, + "step": 42473 + }, + { + "epoch": 0.04, + "learning_rate": 4.977876493449322e-05, + "loss": 0.3103, + "step": 42474 + }, + { + "epoch": 0.04, + "learning_rate": 4.977875450777976e-05, + "loss": 1.1234, + "step": 42475 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778744080821694e-05, + "loss": 1.0275, + "step": 42476 + }, + { + "epoch": 0.04, + "learning_rate": 4.977873365361902e-05, + "loss": 0.8123, + "step": 42477 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778723226171744e-05, + "loss": 0.6245, + "step": 42478 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778712798479865e-05, + "loss": 1.034, + "step": 42479 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778702370543376e-05, + "loss": 1.0642, + "step": 42480 + }, + { + "epoch": 0.04, + "learning_rate": 4.977869194236229e-05, + "loss": 1.2478, + "step": 42481 + }, + { + "epoch": 0.04, + "learning_rate": 4.977868151393659e-05, + "loss": 1.3122, + "step": 42482 + }, + { + "epoch": 0.04, + "learning_rate": 4.977867108526629e-05, + "loss": 0.7849, + "step": 42483 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778660656351386e-05, + "loss": 0.6009, + "step": 42484 + }, + { + "epoch": 0.04, + "learning_rate": 4.977865022719188e-05, + "loss": 0.7753, + "step": 42485 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778639797787764e-05, + "loss": 0.9536, + "step": 42486 + }, + { + "epoch": 0.04, + "learning_rate": 4.977862936813905e-05, + "loss": 0.9193, + "step": 42487 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778618938245725e-05, + "loss": 0.8657, + "step": 42488 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778608508107804e-05, + "loss": 1.2545, + "step": 42489 + }, + { + "epoch": 0.04, + "learning_rate": 4.977859807772527e-05, + "loss": 0.8049, + "step": 42490 + }, + { + "epoch": 0.04, + "learning_rate": 4.977858764709814e-05, + "loss": 0.9098, + "step": 42491 + }, + { + "epoch": 0.04, + "learning_rate": 4.97785772162264e-05, + "loss": 0.7425, + "step": 42492 + }, + { + "epoch": 0.04, + "learning_rate": 4.977856678511006e-05, + "loss": 0.7302, + "step": 42493 + }, + { + "epoch": 0.04, + "learning_rate": 4.977855635374912e-05, + "loss": 0.914, + "step": 42494 + }, + { + "epoch": 0.04, + "learning_rate": 4.977854592214357e-05, + "loss": 0.7134, + "step": 42495 + }, + { + "epoch": 0.04, + "learning_rate": 4.977853549029341e-05, + "loss": 0.6589, + "step": 42496 + }, + { + "epoch": 0.04, + "learning_rate": 4.977852505819866e-05, + "loss": 0.708, + "step": 42497 + }, + { + "epoch": 0.04, + "learning_rate": 4.97785146258593e-05, + "loss": 0.6512, + "step": 42498 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778504193275336e-05, + "loss": 0.5682, + "step": 42499 + }, + { + "epoch": 0.04, + "learning_rate": 4.977849376044678e-05, + "loss": 0.6992, + "step": 42500 + }, + { + "epoch": 0.04, + "eval_loss": 1.0901010036468506, + "eval_runtime": 128.8823, + "eval_samples_per_second": 10.746, + "eval_steps_per_second": 5.377, + "step": 42500 + }, + { + "epoch": 0.04, + "learning_rate": 4.977848332737361e-05, + "loss": 0.85, + "step": 42501 + }, + { + "epoch": 0.04, + "learning_rate": 4.977847289405584e-05, + "loss": 0.7981, + "step": 42502 + }, + { + "epoch": 0.04, + "learning_rate": 4.977846246049347e-05, + "loss": 0.7442, + "step": 42503 + }, + { + "epoch": 0.04, + "learning_rate": 4.97784520266865e-05, + "loss": 0.9667, + "step": 42504 + }, + { + "epoch": 0.04, + "learning_rate": 4.977844159263492e-05, + "loss": 0.5704, + "step": 42505 + }, + { + "epoch": 0.04, + "learning_rate": 4.977843115833873e-05, + "loss": 0.9215, + "step": 42506 + }, + { + "epoch": 0.04, + "learning_rate": 4.977842072379795e-05, + "loss": 0.9168, + "step": 42507 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778410289012566e-05, + "loss": 0.4812, + "step": 42508 + }, + { + "epoch": 0.04, + "learning_rate": 4.977839985398258e-05, + "loss": 0.6707, + "step": 42509 + }, + { + "epoch": 0.04, + "learning_rate": 4.977838941870799e-05, + "loss": 0.8011, + "step": 42510 + }, + { + "epoch": 0.04, + "learning_rate": 4.977837898318881e-05, + "loss": 0.8284, + "step": 42511 + }, + { + "epoch": 0.04, + "learning_rate": 4.977836854742502e-05, + "loss": 0.8703, + "step": 42512 + }, + { + "epoch": 0.04, + "learning_rate": 4.977835811141662e-05, + "loss": 0.7562, + "step": 42513 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778347675163626e-05, + "loss": 0.8173, + "step": 42514 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778337238666026e-05, + "loss": 0.6361, + "step": 42515 + }, + { + "epoch": 0.04, + "learning_rate": 4.977832680192384e-05, + "loss": 0.4738, + "step": 42516 + }, + { + "epoch": 0.04, + "learning_rate": 4.977831636493703e-05, + "loss": 0.5306, + "step": 42517 + }, + { + "epoch": 0.04, + "learning_rate": 4.977830592770564e-05, + "loss": 0.7589, + "step": 42518 + }, + { + "epoch": 0.04, + "learning_rate": 4.977829549022963e-05, + "loss": 0.4681, + "step": 42519 + }, + { + "epoch": 0.04, + "learning_rate": 4.977828505250903e-05, + "loss": 0.5109, + "step": 42520 + }, + { + "epoch": 0.04, + "learning_rate": 4.977827461454383e-05, + "loss": 0.7698, + "step": 42521 + }, + { + "epoch": 0.04, + "learning_rate": 4.977826417633403e-05, + "loss": 0.5127, + "step": 42522 + }, + { + "epoch": 0.04, + "learning_rate": 4.977825373787962e-05, + "loss": 0.7131, + "step": 42523 + }, + { + "epoch": 0.04, + "learning_rate": 4.977824329918062e-05, + "loss": 0.6008, + "step": 42524 + }, + { + "epoch": 0.04, + "learning_rate": 4.977823286023702e-05, + "loss": 0.8013, + "step": 42525 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778222421048807e-05, + "loss": 0.688, + "step": 42526 + }, + { + "epoch": 0.04, + "learning_rate": 4.977821198161601e-05, + "loss": 1.0171, + "step": 42527 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778201541938605e-05, + "loss": 0.7588, + "step": 42528 + }, + { + "epoch": 0.04, + "learning_rate": 4.977819110201659e-05, + "loss": 0.8, + "step": 42529 + }, + { + "epoch": 0.04, + "learning_rate": 4.977818066184999e-05, + "loss": 0.5238, + "step": 42530 + }, + { + "epoch": 0.04, + "learning_rate": 4.977817022143879e-05, + "loss": 0.6202, + "step": 42531 + }, + { + "epoch": 0.04, + "learning_rate": 4.977815978078298e-05, + "loss": 0.6274, + "step": 42532 + }, + { + "epoch": 0.04, + "learning_rate": 4.977814933988258e-05, + "loss": 0.4653, + "step": 42533 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778138898737574e-05, + "loss": 0.4164, + "step": 42534 + }, + { + "epoch": 0.04, + "learning_rate": 4.977812845734797e-05, + "loss": 0.6768, + "step": 42535 + }, + { + "epoch": 0.04, + "learning_rate": 4.977811801571377e-05, + "loss": 0.646, + "step": 42536 + }, + { + "epoch": 0.04, + "learning_rate": 4.977810757383497e-05, + "loss": 0.851, + "step": 42537 + }, + { + "epoch": 0.04, + "learning_rate": 4.977809713171157e-05, + "loss": 1.0547, + "step": 42538 + }, + { + "epoch": 0.04, + "learning_rate": 4.977808668934357e-05, + "loss": 0.7877, + "step": 42539 + }, + { + "epoch": 0.04, + "learning_rate": 4.977807624673098e-05, + "loss": 1.0812, + "step": 42540 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778065803873774e-05, + "loss": 0.8596, + "step": 42541 + }, + { + "epoch": 0.04, + "learning_rate": 4.977805536077198e-05, + "loss": 1.0549, + "step": 42542 + }, + { + "epoch": 0.04, + "learning_rate": 4.977804491742559e-05, + "loss": 1.0203, + "step": 42543 + }, + { + "epoch": 0.04, + "learning_rate": 4.97780344738346e-05, + "loss": 1.08, + "step": 42544 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778024029999e-05, + "loss": 0.9063, + "step": 42545 + }, + { + "epoch": 0.04, + "learning_rate": 4.977801358591882e-05, + "loss": 0.788, + "step": 42546 + }, + { + "epoch": 0.04, + "learning_rate": 4.977800314159403e-05, + "loss": 0.9717, + "step": 42547 + }, + { + "epoch": 0.04, + "learning_rate": 4.977799269702465e-05, + "loss": 0.8997, + "step": 42548 + }, + { + "epoch": 0.04, + "learning_rate": 4.977798225221066e-05, + "loss": 1.2194, + "step": 42549 + }, + { + "epoch": 0.04, + "learning_rate": 4.977797180715208e-05, + "loss": 1.0283, + "step": 42550 + }, + { + "epoch": 0.04, + "learning_rate": 4.97779613618489e-05, + "loss": 0.7805, + "step": 42551 + }, + { + "epoch": 0.04, + "learning_rate": 4.977795091630113e-05, + "loss": 0.8334, + "step": 42552 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777940470508756e-05, + "loss": 0.8301, + "step": 42553 + }, + { + "epoch": 0.04, + "learning_rate": 4.977793002447178e-05, + "loss": 0.2184, + "step": 42554 + }, + { + "epoch": 0.04, + "learning_rate": 4.977791957819022e-05, + "loss": 0.0954, + "step": 42555 + }, + { + "epoch": 0.04, + "learning_rate": 4.977790913166405e-05, + "loss": 0.0764, + "step": 42556 + }, + { + "epoch": 0.04, + "learning_rate": 4.977789868489329e-05, + "loss": 0.0516, + "step": 42557 + }, + { + "epoch": 0.04, + "learning_rate": 4.977788823787793e-05, + "loss": 0.0618, + "step": 42558 + }, + { + "epoch": 0.04, + "learning_rate": 4.977787779061798e-05, + "loss": 0.0571, + "step": 42559 + }, + { + "epoch": 0.04, + "learning_rate": 4.977786734311343e-05, + "loss": 0.0419, + "step": 42560 + }, + { + "epoch": 0.04, + "learning_rate": 4.977785689536427e-05, + "loss": 0.0698, + "step": 42561 + }, + { + "epoch": 0.04, + "learning_rate": 4.977784644737052e-05, + "loss": 0.0556, + "step": 42562 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777835999132186e-05, + "loss": 0.0509, + "step": 42563 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777825550649246e-05, + "loss": 0.0294, + "step": 42564 + }, + { + "epoch": 0.04, + "learning_rate": 4.977781510192171e-05, + "loss": 0.0356, + "step": 42565 + }, + { + "epoch": 0.04, + "learning_rate": 4.977780465294958e-05, + "loss": 0.0457, + "step": 42566 + }, + { + "epoch": 0.04, + "learning_rate": 4.977779420373285e-05, + "loss": 0.0378, + "step": 42567 + }, + { + "epoch": 0.04, + "learning_rate": 4.977778375427153e-05, + "loss": 0.0624, + "step": 42568 + }, + { + "epoch": 0.04, + "learning_rate": 4.977777330456561e-05, + "loss": 0.0375, + "step": 42569 + }, + { + "epoch": 0.04, + "learning_rate": 4.97777628546151e-05, + "loss": 0.0477, + "step": 42570 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777752404419985e-05, + "loss": 0.9575, + "step": 42571 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777741953980284e-05, + "loss": 1.1572, + "step": 42572 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777731503295974e-05, + "loss": 0.728, + "step": 42573 + }, + { + "epoch": 0.04, + "learning_rate": 4.977772105236708e-05, + "loss": 0.8337, + "step": 42574 + }, + { + "epoch": 0.04, + "learning_rate": 4.977771060119359e-05, + "loss": 1.0812, + "step": 42575 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777700149775506e-05, + "loss": 1.0992, + "step": 42576 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777689698112825e-05, + "loss": 1.1267, + "step": 42577 + }, + { + "epoch": 0.04, + "learning_rate": 4.977767924620554e-05, + "loss": 1.3608, + "step": 42578 + }, + { + "epoch": 0.04, + "learning_rate": 4.977766879405368e-05, + "loss": 1.2259, + "step": 42579 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777658341657205e-05, + "loss": 1.271, + "step": 42580 + }, + { + "epoch": 0.04, + "learning_rate": 4.977764788901615e-05, + "loss": 0.931, + "step": 42581 + }, + { + "epoch": 0.04, + "learning_rate": 4.977763743613049e-05, + "loss": 0.74, + "step": 42582 + }, + { + "epoch": 0.04, + "learning_rate": 4.977762698300024e-05, + "loss": 1.0048, + "step": 42583 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777616529625395e-05, + "loss": 0.9009, + "step": 42584 + }, + { + "epoch": 0.04, + "learning_rate": 4.977760607600595e-05, + "loss": 1.0357, + "step": 42585 + }, + { + "epoch": 0.04, + "learning_rate": 4.977759562214193e-05, + "loss": 1.0153, + "step": 42586 + }, + { + "epoch": 0.04, + "learning_rate": 4.97775851680333e-05, + "loss": 1.0923, + "step": 42587 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777574713680076e-05, + "loss": 0.9422, + "step": 42588 + }, + { + "epoch": 0.04, + "learning_rate": 4.977756425908226e-05, + "loss": 1.1885, + "step": 42589 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777553804239854e-05, + "loss": 0.9343, + "step": 42590 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777543349152856e-05, + "loss": 0.9718, + "step": 42591 + }, + { + "epoch": 0.04, + "learning_rate": 4.977753289382126e-05, + "loss": 0.8049, + "step": 42592 + }, + { + "epoch": 0.04, + "learning_rate": 4.977752243824507e-05, + "loss": 0.9091, + "step": 42593 + }, + { + "epoch": 0.04, + "learning_rate": 4.977751198242429e-05, + "loss": 1.0093, + "step": 42594 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777501526358916e-05, + "loss": 1.7444, + "step": 42595 + }, + { + "epoch": 0.04, + "learning_rate": 4.977749107004895e-05, + "loss": 1.8156, + "step": 42596 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777480613494384e-05, + "loss": 1.3342, + "step": 42597 + }, + { + "epoch": 0.04, + "learning_rate": 4.977747015669524e-05, + "loss": 1.0839, + "step": 42598 + }, + { + "epoch": 0.04, + "learning_rate": 4.977745969965149e-05, + "loss": 1.0497, + "step": 42599 + }, + { + "epoch": 0.04, + "learning_rate": 4.977744924236315e-05, + "loss": 1.041, + "step": 42600 + }, + { + "epoch": 0.04, + "learning_rate": 4.977743878483022e-05, + "loss": 1.0533, + "step": 42601 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777428327052694e-05, + "loss": 0.899, + "step": 42602 + }, + { + "epoch": 0.04, + "learning_rate": 4.977741786903058e-05, + "loss": 1.3532, + "step": 42603 + }, + { + "epoch": 0.04, + "learning_rate": 4.977740741076387e-05, + "loss": 0.8345, + "step": 42604 + }, + { + "epoch": 0.04, + "learning_rate": 4.977739695225257e-05, + "loss": 0.6517, + "step": 42605 + }, + { + "epoch": 0.04, + "learning_rate": 4.977738649349668e-05, + "loss": 0.5822, + "step": 42606 + }, + { + "epoch": 0.04, + "learning_rate": 4.977737603449619e-05, + "loss": 0.3397, + "step": 42607 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777365575251116e-05, + "loss": 0.3588, + "step": 42608 + }, + { + "epoch": 0.04, + "learning_rate": 4.977735511576145e-05, + "loss": 0.3221, + "step": 42609 + }, + { + "epoch": 0.04, + "learning_rate": 4.977734465602719e-05, + "loss": 0.2572, + "step": 42610 + }, + { + "epoch": 0.04, + "learning_rate": 4.977733419604834e-05, + "loss": 0.1952, + "step": 42611 + }, + { + "epoch": 0.04, + "learning_rate": 4.97773237358249e-05, + "loss": 0.7435, + "step": 42612 + }, + { + "epoch": 0.04, + "learning_rate": 4.977731327535686e-05, + "loss": 0.4685, + "step": 42613 + }, + { + "epoch": 0.04, + "learning_rate": 4.977730281464424e-05, + "loss": 0.542, + "step": 42614 + }, + { + "epoch": 0.04, + "learning_rate": 4.977729235368702e-05, + "loss": 1.0661, + "step": 42615 + }, + { + "epoch": 0.04, + "learning_rate": 4.977728189248522e-05, + "loss": 0.3314, + "step": 42616 + }, + { + "epoch": 0.04, + "learning_rate": 4.977727143103882e-05, + "loss": 0.3251, + "step": 42617 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777260969347824e-05, + "loss": 0.2539, + "step": 42618 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777250507412246e-05, + "loss": 0.3224, + "step": 42619 + }, + { + "epoch": 0.04, + "learning_rate": 4.977724004523208e-05, + "loss": 0.7887, + "step": 42620 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777229582807316e-05, + "loss": 1.1107, + "step": 42621 + }, + { + "epoch": 0.04, + "learning_rate": 4.977721912013796e-05, + "loss": 0.538, + "step": 42622 + }, + { + "epoch": 0.04, + "learning_rate": 4.977720865722403e-05, + "loss": 0.702, + "step": 42623 + }, + { + "epoch": 0.04, + "learning_rate": 4.97771981940655e-05, + "loss": 0.9848, + "step": 42624 + }, + { + "epoch": 0.04, + "learning_rate": 4.977718773066237e-05, + "loss": 0.7186, + "step": 42625 + }, + { + "epoch": 0.04, + "learning_rate": 4.977717726701466e-05, + "loss": 1.0263, + "step": 42626 + }, + { + "epoch": 0.04, + "learning_rate": 4.977716680312236e-05, + "loss": 1.2188, + "step": 42627 + }, + { + "epoch": 0.04, + "learning_rate": 4.977715633898547e-05, + "loss": 1.2499, + "step": 42628 + }, + { + "epoch": 0.04, + "learning_rate": 4.977714587460399e-05, + "loss": 1.0366, + "step": 42629 + }, + { + "epoch": 0.04, + "learning_rate": 4.977713540997792e-05, + "loss": 1.0327, + "step": 42630 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777124945107254e-05, + "loss": 0.9254, + "step": 42631 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777114479992016e-05, + "loss": 1.291, + "step": 42632 + }, + { + "epoch": 0.04, + "learning_rate": 4.977710401463217e-05, + "loss": 1.0566, + "step": 42633 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777093549027745e-05, + "loss": 1.4656, + "step": 42634 + }, + { + "epoch": 0.04, + "learning_rate": 4.977708308317873e-05, + "loss": 1.2906, + "step": 42635 + }, + { + "epoch": 0.04, + "learning_rate": 4.977707261708512e-05, + "loss": 1.0808, + "step": 42636 + }, + { + "epoch": 0.04, + "learning_rate": 4.977706215074693e-05, + "loss": 1.2016, + "step": 42637 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777051684164145e-05, + "loss": 0.9665, + "step": 42638 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777041217336776e-05, + "loss": 1.1026, + "step": 42639 + }, + { + "epoch": 0.04, + "learning_rate": 4.977703075026482e-05, + "loss": 1.1049, + "step": 42640 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777020282948276e-05, + "loss": 0.3294, + "step": 42641 + }, + { + "epoch": 0.04, + "learning_rate": 4.977700981538713e-05, + "loss": 0.6578, + "step": 42642 + }, + { + "epoch": 0.04, + "learning_rate": 4.977699934758141e-05, + "loss": 0.9955, + "step": 42643 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776988879531096e-05, + "loss": 1.0043, + "step": 42644 + }, + { + "epoch": 0.04, + "learning_rate": 4.97769784112362e-05, + "loss": 1.2659, + "step": 42645 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776967942696704e-05, + "loss": 1.2185, + "step": 42646 + }, + { + "epoch": 0.04, + "learning_rate": 4.977695747391263e-05, + "loss": 0.9046, + "step": 42647 + }, + { + "epoch": 0.04, + "learning_rate": 4.977694700488397e-05, + "loss": 0.7705, + "step": 42648 + }, + { + "epoch": 0.04, + "learning_rate": 4.977693653561071e-05, + "loss": 0.8521, + "step": 42649 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776926066092875e-05, + "loss": 0.5084, + "step": 42650 + }, + { + "epoch": 0.04, + "learning_rate": 4.977691559633045e-05, + "loss": 0.3196, + "step": 42651 + }, + { + "epoch": 0.04, + "learning_rate": 4.977690512632344e-05, + "loss": 0.4377, + "step": 42652 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776894656071834e-05, + "loss": 1.1788, + "step": 42653 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776884185575646e-05, + "loss": 0.8781, + "step": 42654 + }, + { + "epoch": 0.04, + "learning_rate": 4.977687371483487e-05, + "loss": 1.1133, + "step": 42655 + }, + { + "epoch": 0.04, + "learning_rate": 4.977686324384951e-05, + "loss": 1.3389, + "step": 42656 + }, + { + "epoch": 0.04, + "learning_rate": 4.977685277261956e-05, + "loss": 1.1488, + "step": 42657 + }, + { + "epoch": 0.04, + "learning_rate": 4.977684230114502e-05, + "loss": 1.0159, + "step": 42658 + }, + { + "epoch": 0.04, + "learning_rate": 4.97768318294259e-05, + "loss": 0.9959, + "step": 42659 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776821357462186e-05, + "loss": 1.0478, + "step": 42660 + }, + { + "epoch": 0.04, + "learning_rate": 4.97768108852539e-05, + "loss": 1.3094, + "step": 42661 + }, + { + "epoch": 0.04, + "learning_rate": 4.977680041280102e-05, + "loss": 1.169, + "step": 42662 + }, + { + "epoch": 0.04, + "learning_rate": 4.977678994010355e-05, + "loss": 0.9552, + "step": 42663 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776779467161496e-05, + "loss": 1.1053, + "step": 42664 + }, + { + "epoch": 0.04, + "learning_rate": 4.977676899397486e-05, + "loss": 1.2955, + "step": 42665 + }, + { + "epoch": 0.04, + "learning_rate": 4.977675852054363e-05, + "loss": 1.0715, + "step": 42666 + }, + { + "epoch": 0.04, + "learning_rate": 4.977674804686783e-05, + "loss": 0.7172, + "step": 42667 + }, + { + "epoch": 0.04, + "learning_rate": 4.977673757294743e-05, + "loss": 0.8921, + "step": 42668 + }, + { + "epoch": 0.04, + "learning_rate": 4.977672709878245e-05, + "loss": 0.9737, + "step": 42669 + }, + { + "epoch": 0.04, + "learning_rate": 4.977671662437288e-05, + "loss": 0.6909, + "step": 42670 + }, + { + "epoch": 0.04, + "learning_rate": 4.977670614971873e-05, + "loss": 0.5317, + "step": 42671 + }, + { + "epoch": 0.04, + "learning_rate": 4.977669567481999e-05, + "loss": 0.5758, + "step": 42672 + }, + { + "epoch": 0.04, + "learning_rate": 4.977668519967667e-05, + "loss": 1.012, + "step": 42673 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776674724288755e-05, + "loss": 0.9415, + "step": 42674 + }, + { + "epoch": 0.04, + "learning_rate": 4.977666424865627e-05, + "loss": 0.9863, + "step": 42675 + }, + { + "epoch": 0.04, + "learning_rate": 4.977665377277919e-05, + "loss": 1.1086, + "step": 42676 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776643296657526e-05, + "loss": 1.1089, + "step": 42677 + }, + { + "epoch": 0.04, + "learning_rate": 4.977663282029128e-05, + "loss": 1.2055, + "step": 42678 + }, + { + "epoch": 0.04, + "learning_rate": 4.977662234368045e-05, + "loss": 1.2459, + "step": 42679 + }, + { + "epoch": 0.04, + "learning_rate": 4.977661186682503e-05, + "loss": 1.723, + "step": 42680 + }, + { + "epoch": 0.04, + "learning_rate": 4.977660138972503e-05, + "loss": 1.0531, + "step": 42681 + }, + { + "epoch": 0.04, + "learning_rate": 4.977659091238045e-05, + "loss": 1.1736, + "step": 42682 + }, + { + "epoch": 0.04, + "learning_rate": 4.977658043479128e-05, + "loss": 1.073, + "step": 42683 + }, + { + "epoch": 0.04, + "learning_rate": 4.977656995695753e-05, + "loss": 0.8418, + "step": 42684 + }, + { + "epoch": 0.04, + "learning_rate": 4.977655947887919e-05, + "loss": 0.7044, + "step": 42685 + }, + { + "epoch": 0.04, + "learning_rate": 4.977654900055627e-05, + "loss": 0.7863, + "step": 42686 + }, + { + "epoch": 0.04, + "learning_rate": 4.977653852198877e-05, + "loss": 0.8825, + "step": 42687 + }, + { + "epoch": 0.04, + "learning_rate": 4.977652804317668e-05, + "loss": 1.4496, + "step": 42688 + }, + { + "epoch": 0.04, + "learning_rate": 4.977651756412002e-05, + "loss": 1.8104, + "step": 42689 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776507084818755e-05, + "loss": 1.2927, + "step": 42690 + }, + { + "epoch": 0.04, + "learning_rate": 4.977649660527293e-05, + "loss": 0.6164, + "step": 42691 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776486125482505e-05, + "loss": 0.7977, + "step": 42692 + }, + { + "epoch": 0.04, + "learning_rate": 4.97764756454475e-05, + "loss": 0.7111, + "step": 42693 + }, + { + "epoch": 0.04, + "learning_rate": 4.977646516516792e-05, + "loss": 0.6245, + "step": 42694 + }, + { + "epoch": 0.04, + "learning_rate": 4.977645468464375e-05, + "loss": 0.7006, + "step": 42695 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776444203874996e-05, + "loss": 0.7724, + "step": 42696 + }, + { + "epoch": 0.04, + "learning_rate": 4.977643372286166e-05, + "loss": 0.9555, + "step": 42697 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776423241603744e-05, + "loss": 1.2573, + "step": 42698 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776412760101244e-05, + "loss": 1.2192, + "step": 42699 + }, + { + "epoch": 0.04, + "learning_rate": 4.977640227835416e-05, + "loss": 0.4652, + "step": 42700 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776391796362497e-05, + "loss": 0.2002, + "step": 42701 + }, + { + "epoch": 0.04, + "learning_rate": 4.977638131412625e-05, + "loss": 0.2032, + "step": 42702 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776370831645426e-05, + "loss": 0.8903, + "step": 42703 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776360348920014e-05, + "loss": 1.0884, + "step": 42704 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776349865950026e-05, + "loss": 0.8858, + "step": 42705 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776339382735456e-05, + "loss": 0.9258, + "step": 42706 + }, + { + "epoch": 0.04, + "learning_rate": 4.97763288992763e-05, + "loss": 0.2681, + "step": 42707 + }, + { + "epoch": 0.04, + "learning_rate": 4.977631841557257e-05, + "loss": 0.2755, + "step": 42708 + }, + { + "epoch": 0.04, + "learning_rate": 4.977630793162424e-05, + "loss": 0.974, + "step": 42709 + }, + { + "epoch": 0.04, + "learning_rate": 4.977629744743135e-05, + "loss": 1.1515, + "step": 42710 + }, + { + "epoch": 0.04, + "learning_rate": 4.977628696299387e-05, + "loss": 1.2309, + "step": 42711 + }, + { + "epoch": 0.04, + "learning_rate": 4.977627647831181e-05, + "loss": 1.2935, + "step": 42712 + }, + { + "epoch": 0.04, + "learning_rate": 4.977626599338516e-05, + "loss": 1.172, + "step": 42713 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776255508213944e-05, + "loss": 0.9681, + "step": 42714 + }, + { + "epoch": 0.04, + "learning_rate": 4.977624502279814e-05, + "loss": 0.5731, + "step": 42715 + }, + { + "epoch": 0.04, + "learning_rate": 4.977623453713776e-05, + "loss": 1.0202, + "step": 42716 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776224051232786e-05, + "loss": 1.131, + "step": 42717 + }, + { + "epoch": 0.04, + "learning_rate": 4.977621356508325e-05, + "loss": 1.0168, + "step": 42718 + }, + { + "epoch": 0.04, + "learning_rate": 4.977620307868912e-05, + "loss": 0.8479, + "step": 42719 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776192592050413e-05, + "loss": 0.9857, + "step": 42720 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776182105167127e-05, + "loss": 0.8723, + "step": 42721 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776171618039264e-05, + "loss": 0.9408, + "step": 42722 + }, + { + "epoch": 0.04, + "learning_rate": 4.977616113066682e-05, + "loss": 0.7809, + "step": 42723 + }, + { + "epoch": 0.04, + "learning_rate": 4.977615064304979e-05, + "loss": 0.7154, + "step": 42724 + }, + { + "epoch": 0.04, + "learning_rate": 4.977614015518819e-05, + "loss": 0.7697, + "step": 42725 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776129667082e-05, + "loss": 0.6247, + "step": 42726 + }, + { + "epoch": 0.04, + "learning_rate": 4.977611917873124e-05, + "loss": 0.9467, + "step": 42727 + }, + { + "epoch": 0.04, + "learning_rate": 4.977610869013589e-05, + "loss": 1.0218, + "step": 42728 + }, + { + "epoch": 0.04, + "learning_rate": 4.977609820129597e-05, + "loss": 1.0929, + "step": 42729 + }, + { + "epoch": 0.04, + "learning_rate": 4.977608771221147e-05, + "loss": 1.1389, + "step": 42730 + }, + { + "epoch": 0.04, + "learning_rate": 4.977607722288239e-05, + "loss": 1.1759, + "step": 42731 + }, + { + "epoch": 0.04, + "learning_rate": 4.9776066733308725e-05, + "loss": 0.8979, + "step": 42732 + }, + { + "epoch": 0.04, + "learning_rate": 4.977605624349049e-05, + "loss": 1.2797, + "step": 42733 + }, + { + "epoch": 0.04, + "learning_rate": 4.977604575342767e-05, + "loss": 0.5025, + "step": 42734 + }, + { + "epoch": 0.04, + "learning_rate": 4.977603526312027e-05, + "loss": 0.1793, + "step": 42735 + }, + { + "epoch": 0.04, + "learning_rate": 4.97760247725683e-05, + "loss": 0.956, + "step": 42736 + }, + { + "epoch": 0.04, + "learning_rate": 4.977601428177174e-05, + "loss": 0.9807, + "step": 42737 + }, + { + "epoch": 0.04, + "learning_rate": 4.977600379073061e-05, + "loss": 1.134, + "step": 42738 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775993299444904e-05, + "loss": 1.229, + "step": 42739 + }, + { + "epoch": 0.04, + "learning_rate": 4.977598280791461e-05, + "loss": 0.8884, + "step": 42740 + }, + { + "epoch": 0.04, + "learning_rate": 4.977597231613974e-05, + "loss": 0.491, + "step": 42741 + }, + { + "epoch": 0.04, + "learning_rate": 4.97759618241203e-05, + "loss": 1.0774, + "step": 42742 + }, + { + "epoch": 0.04, + "learning_rate": 4.977595133185627e-05, + "loss": 1.2082, + "step": 42743 + }, + { + "epoch": 0.04, + "learning_rate": 4.977594083934768e-05, + "loss": 0.7981, + "step": 42744 + }, + { + "epoch": 0.04, + "learning_rate": 4.97759303465945e-05, + "loss": 1.2515, + "step": 42745 + }, + { + "epoch": 0.04, + "learning_rate": 4.977591985359674e-05, + "loss": 1.282, + "step": 42746 + }, + { + "epoch": 0.04, + "learning_rate": 4.977590936035441e-05, + "loss": 0.2743, + "step": 42747 + }, + { + "epoch": 0.04, + "learning_rate": 4.97758988668675e-05, + "loss": 0.9013, + "step": 42748 + }, + { + "epoch": 0.04, + "learning_rate": 4.977588837313602e-05, + "loss": 0.9887, + "step": 42749 + }, + { + "epoch": 0.04, + "learning_rate": 4.977587787915995e-05, + "loss": 0.8917, + "step": 42750 + }, + { + "epoch": 0.04, + "learning_rate": 4.977586738493931e-05, + "loss": 0.914, + "step": 42751 + }, + { + "epoch": 0.04, + "learning_rate": 4.977585689047409e-05, + "loss": 1.0852, + "step": 42752 + }, + { + "epoch": 0.04, + "learning_rate": 4.977584639576429e-05, + "loss": 1.1459, + "step": 42753 + }, + { + "epoch": 0.04, + "learning_rate": 4.977583590080992e-05, + "loss": 1.3332, + "step": 42754 + }, + { + "epoch": 0.04, + "learning_rate": 4.977582540561098e-05, + "loss": 1.051, + "step": 42755 + }, + { + "epoch": 0.04, + "learning_rate": 4.977581491016745e-05, + "loss": 0.9653, + "step": 42756 + }, + { + "epoch": 0.04, + "learning_rate": 4.977580441447935e-05, + "loss": 0.9907, + "step": 42757 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775793918546665e-05, + "loss": 1.2277, + "step": 42758 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775783422369414e-05, + "loss": 1.326, + "step": 42759 + }, + { + "epoch": 0.04, + "learning_rate": 4.977577292594758e-05, + "loss": 1.2112, + "step": 42760 + }, + { + "epoch": 0.04, + "learning_rate": 4.977576242928118e-05, + "loss": 1.1766, + "step": 42761 + }, + { + "epoch": 0.04, + "learning_rate": 4.977575193237019e-05, + "loss": 0.9502, + "step": 42762 + }, + { + "epoch": 0.04, + "learning_rate": 4.977574143521464e-05, + "loss": 1.5337, + "step": 42763 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775730937814504e-05, + "loss": 0.7898, + "step": 42764 + }, + { + "epoch": 0.04, + "learning_rate": 4.977572044016979e-05, + "loss": 1.0904, + "step": 42765 + }, + { + "epoch": 0.04, + "learning_rate": 4.977570994228051e-05, + "loss": 1.1057, + "step": 42766 + }, + { + "epoch": 0.04, + "learning_rate": 4.977569944414665e-05, + "loss": 1.1902, + "step": 42767 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775688945768216e-05, + "loss": 1.0893, + "step": 42768 + }, + { + "epoch": 0.04, + "learning_rate": 4.97756784471452e-05, + "loss": 0.5359, + "step": 42769 + }, + { + "epoch": 0.04, + "learning_rate": 4.977566794827762e-05, + "loss": 0.7177, + "step": 42770 + }, + { + "epoch": 0.04, + "learning_rate": 4.977565744916546e-05, + "loss": 0.8732, + "step": 42771 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775646949808724e-05, + "loss": 0.893, + "step": 42772 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775636450207414e-05, + "loss": 1.049, + "step": 42773 + }, + { + "epoch": 0.04, + "learning_rate": 4.977562595036153e-05, + "loss": 0.9523, + "step": 42774 + }, + { + "epoch": 0.04, + "learning_rate": 4.977561545027107e-05, + "loss": 0.9452, + "step": 42775 + }, + { + "epoch": 0.04, + "learning_rate": 4.977560494993604e-05, + "loss": 1.2934, + "step": 42776 + }, + { + "epoch": 0.04, + "learning_rate": 4.977559444935643e-05, + "loss": 1.0027, + "step": 42777 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775583948532246e-05, + "loss": 1.1559, + "step": 42778 + }, + { + "epoch": 0.04, + "learning_rate": 4.977557344746349e-05, + "loss": 0.9811, + "step": 42779 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775562946150155e-05, + "loss": 0.5485, + "step": 42780 + }, + { + "epoch": 0.04, + "learning_rate": 4.977555244459225e-05, + "loss": 0.7607, + "step": 42781 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775541942789775e-05, + "loss": 0.7587, + "step": 42782 + }, + { + "epoch": 0.04, + "learning_rate": 4.977553144074273e-05, + "loss": 0.4716, + "step": 42783 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775520938451105e-05, + "loss": 0.6627, + "step": 42784 + }, + { + "epoch": 0.04, + "learning_rate": 4.97755104359149e-05, + "loss": 0.8478, + "step": 42785 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775499933134126e-05, + "loss": 0.6296, + "step": 42786 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775489430108777e-05, + "loss": 0.9362, + "step": 42787 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775478926838865e-05, + "loss": 0.8183, + "step": 42788 + }, + { + "epoch": 0.04, + "learning_rate": 4.977546842332437e-05, + "loss": 1.1356, + "step": 42789 + }, + { + "epoch": 0.04, + "learning_rate": 4.977545791956531e-05, + "loss": 1.1323, + "step": 42790 + }, + { + "epoch": 0.04, + "learning_rate": 4.977544741556167e-05, + "loss": 1.1587, + "step": 42791 + }, + { + "epoch": 0.04, + "learning_rate": 4.977543691131346e-05, + "loss": 1.0544, + "step": 42792 + }, + { + "epoch": 0.04, + "learning_rate": 4.977542640682068e-05, + "loss": 0.9496, + "step": 42793 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775415902083325e-05, + "loss": 0.9477, + "step": 42794 + }, + { + "epoch": 0.04, + "learning_rate": 4.977540539710139e-05, + "loss": 0.9245, + "step": 42795 + }, + { + "epoch": 0.04, + "learning_rate": 4.977539489187489e-05, + "loss": 1.0332, + "step": 42796 + }, + { + "epoch": 0.04, + "learning_rate": 4.977538438640382e-05, + "loss": 1.2262, + "step": 42797 + }, + { + "epoch": 0.04, + "learning_rate": 4.977537388068817e-05, + "loss": 1.3267, + "step": 42798 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775363374727955e-05, + "loss": 0.9899, + "step": 42799 + }, + { + "epoch": 0.04, + "learning_rate": 4.977535286852317e-05, + "loss": 1.4075, + "step": 42800 + }, + { + "epoch": 0.04, + "learning_rate": 4.977534236207381e-05, + "loss": 1.0612, + "step": 42801 + }, + { + "epoch": 0.04, + "learning_rate": 4.977533185537988e-05, + "loss": 0.8175, + "step": 42802 + }, + { + "epoch": 0.04, + "learning_rate": 4.977532134844137e-05, + "loss": 0.8422, + "step": 42803 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775310841258295e-05, + "loss": 1.2201, + "step": 42804 + }, + { + "epoch": 0.04, + "learning_rate": 4.977530033383065e-05, + "loss": 1.0676, + "step": 42805 + }, + { + "epoch": 0.04, + "learning_rate": 4.977528982615843e-05, + "loss": 0.9154, + "step": 42806 + }, + { + "epoch": 0.04, + "learning_rate": 4.977527931824164e-05, + "loss": 1.5186, + "step": 42807 + }, + { + "epoch": 0.04, + "learning_rate": 4.977526881008028e-05, + "loss": 0.9355, + "step": 42808 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775258301674344e-05, + "loss": 1.0017, + "step": 42809 + }, + { + "epoch": 0.04, + "learning_rate": 4.977524779302384e-05, + "loss": 1.1253, + "step": 42810 + }, + { + "epoch": 0.04, + "learning_rate": 4.977523728412877e-05, + "loss": 0.8246, + "step": 42811 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775226774989124e-05, + "loss": 1.1565, + "step": 42812 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775216265604915e-05, + "loss": 0.8986, + "step": 42813 + }, + { + "epoch": 0.04, + "learning_rate": 4.977520575597613e-05, + "loss": 0.7151, + "step": 42814 + }, + { + "epoch": 0.04, + "learning_rate": 4.977519524610277e-05, + "loss": 1.1747, + "step": 42815 + }, + { + "epoch": 0.04, + "learning_rate": 4.977518473598485e-05, + "loss": 1.2003, + "step": 42816 + }, + { + "epoch": 0.04, + "learning_rate": 4.977517422562234e-05, + "loss": 1.0464, + "step": 42817 + }, + { + "epoch": 0.04, + "learning_rate": 4.977516371501528e-05, + "loss": 0.6176, + "step": 42818 + }, + { + "epoch": 0.04, + "learning_rate": 4.977515320416365e-05, + "loss": 0.7661, + "step": 42819 + }, + { + "epoch": 0.04, + "learning_rate": 4.977514269306744e-05, + "loss": 0.8642, + "step": 42820 + }, + { + "epoch": 0.04, + "learning_rate": 4.977513218172666e-05, + "loss": 0.852, + "step": 42821 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775121670141325e-05, + "loss": 1.0256, + "step": 42822 + }, + { + "epoch": 0.04, + "learning_rate": 4.977511115831141e-05, + "loss": 0.8578, + "step": 42823 + }, + { + "epoch": 0.04, + "learning_rate": 4.977510064623691e-05, + "loss": 1.1915, + "step": 42824 + }, + { + "epoch": 0.04, + "learning_rate": 4.977509013391787e-05, + "loss": 1.3047, + "step": 42825 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775079621354245e-05, + "loss": 0.9746, + "step": 42826 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775069108546045e-05, + "loss": 1.3528, + "step": 42827 + }, + { + "epoch": 0.04, + "learning_rate": 4.977505859549328e-05, + "loss": 1.0742, + "step": 42828 + }, + { + "epoch": 0.04, + "learning_rate": 4.977504808219596e-05, + "loss": 1.0521, + "step": 42829 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775037568654056e-05, + "loss": 0.8526, + "step": 42830 + }, + { + "epoch": 0.04, + "learning_rate": 4.977502705486759e-05, + "loss": 0.8938, + "step": 42831 + }, + { + "epoch": 0.04, + "learning_rate": 4.9775016540836555e-05, + "loss": 0.9696, + "step": 42832 + }, + { + "epoch": 0.04, + "learning_rate": 4.977500602656095e-05, + "loss": 0.9771, + "step": 42833 + }, + { + "epoch": 0.04, + "learning_rate": 4.977499551204078e-05, + "loss": 1.0117, + "step": 42834 + }, + { + "epoch": 0.04, + "learning_rate": 4.977498499727603e-05, + "loss": 1.0129, + "step": 42835 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774974482266725e-05, + "loss": 0.9775, + "step": 42836 + }, + { + "epoch": 0.04, + "learning_rate": 4.977496396701284e-05, + "loss": 0.9269, + "step": 42837 + }, + { + "epoch": 0.04, + "learning_rate": 4.97749534515144e-05, + "loss": 0.9701, + "step": 42838 + }, + { + "epoch": 0.04, + "learning_rate": 4.977494293577138e-05, + "loss": 1.0457, + "step": 42839 + }, + { + "epoch": 0.04, + "learning_rate": 4.977493241978381e-05, + "loss": 1.103, + "step": 42840 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774921903551655e-05, + "loss": 0.6487, + "step": 42841 + }, + { + "epoch": 0.04, + "learning_rate": 4.977491138707494e-05, + "loss": 1.1235, + "step": 42842 + }, + { + "epoch": 0.04, + "learning_rate": 4.977490087035365e-05, + "loss": 0.775, + "step": 42843 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774890353387807e-05, + "loss": 0.9125, + "step": 42844 + }, + { + "epoch": 0.04, + "learning_rate": 4.977487983617738e-05, + "loss": 0.9928, + "step": 42845 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774869318722396e-05, + "loss": 0.9132, + "step": 42846 + }, + { + "epoch": 0.04, + "learning_rate": 4.977485880102284e-05, + "loss": 1.275, + "step": 42847 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774848283078724e-05, + "loss": 1.0192, + "step": 42848 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774837764890034e-05, + "loss": 0.9372, + "step": 42849 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774827246456776e-05, + "loss": 1.3051, + "step": 42850 + }, + { + "epoch": 0.04, + "learning_rate": 4.977481672777896e-05, + "loss": 0.8345, + "step": 42851 + }, + { + "epoch": 0.04, + "learning_rate": 4.977480620885657e-05, + "loss": 0.8332, + "step": 42852 + }, + { + "epoch": 0.04, + "learning_rate": 4.977479568968962e-05, + "loss": 1.0966, + "step": 42853 + }, + { + "epoch": 0.04, + "learning_rate": 4.97747851702781e-05, + "loss": 0.9843, + "step": 42854 + }, + { + "epoch": 0.04, + "learning_rate": 4.977477465062201e-05, + "loss": 0.7905, + "step": 42855 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774764130721366e-05, + "loss": 0.722, + "step": 42856 + }, + { + "epoch": 0.04, + "learning_rate": 4.977475361057614e-05, + "loss": 0.8859, + "step": 42857 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774743090186356e-05, + "loss": 1.2723, + "step": 42858 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774732569552005e-05, + "loss": 1.3981, + "step": 42859 + }, + { + "epoch": 0.04, + "learning_rate": 4.977472204867309e-05, + "loss": 1.0083, + "step": 42860 + }, + { + "epoch": 0.04, + "learning_rate": 4.977471152754961e-05, + "loss": 1.1207, + "step": 42861 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774701006181564e-05, + "loss": 1.0363, + "step": 42862 + }, + { + "epoch": 0.04, + "learning_rate": 4.977469048456895e-05, + "loss": 1.0857, + "step": 42863 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774679962711775e-05, + "loss": 0.9556, + "step": 42864 + }, + { + "epoch": 0.04, + "learning_rate": 4.977466944061003e-05, + "loss": 1.705, + "step": 42865 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774658918263724e-05, + "loss": 1.2292, + "step": 42866 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774648395672855e-05, + "loss": 0.9622, + "step": 42867 + }, + { + "epoch": 0.04, + "learning_rate": 4.977463787283741e-05, + "loss": 1.1031, + "step": 42868 + }, + { + "epoch": 0.04, + "learning_rate": 4.977462734975741e-05, + "loss": 1.0836, + "step": 42869 + }, + { + "epoch": 0.04, + "learning_rate": 4.977461682643284e-05, + "loss": 1.0089, + "step": 42870 + }, + { + "epoch": 0.04, + "learning_rate": 4.977460630286371e-05, + "loss": 0.9525, + "step": 42871 + }, + { + "epoch": 0.04, + "learning_rate": 4.977459577905002e-05, + "loss": 0.9914, + "step": 42872 + }, + { + "epoch": 0.04, + "learning_rate": 4.977458525499175e-05, + "loss": 0.9403, + "step": 42873 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774574730688925e-05, + "loss": 0.8716, + "step": 42874 + }, + { + "epoch": 0.04, + "learning_rate": 4.977456420614154e-05, + "loss": 1.0783, + "step": 42875 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774553681349583e-05, + "loss": 0.9486, + "step": 42876 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774543156313066e-05, + "loss": 0.8307, + "step": 42877 + }, + { + "epoch": 0.04, + "learning_rate": 4.977453263103199e-05, + "loss": 1.2308, + "step": 42878 + }, + { + "epoch": 0.04, + "learning_rate": 4.977452210550634e-05, + "loss": 0.9446, + "step": 42879 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774511579736135e-05, + "loss": 1.2575, + "step": 42880 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774501053721356e-05, + "loss": 1.0518, + "step": 42881 + }, + { + "epoch": 0.04, + "learning_rate": 4.977449052746203e-05, + "loss": 0.8322, + "step": 42882 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774480000958124e-05, + "loss": 0.8818, + "step": 42883 + }, + { + "epoch": 0.04, + "learning_rate": 4.977446947420966e-05, + "loss": 1.1011, + "step": 42884 + }, + { + "epoch": 0.04, + "learning_rate": 4.977445894721664e-05, + "loss": 1.1485, + "step": 42885 + }, + { + "epoch": 0.04, + "learning_rate": 4.977444841997905e-05, + "loss": 0.8346, + "step": 42886 + }, + { + "epoch": 0.04, + "learning_rate": 4.97744378924969e-05, + "loss": 1.1321, + "step": 42887 + }, + { + "epoch": 0.04, + "learning_rate": 4.977442736477018e-05, + "loss": 0.7627, + "step": 42888 + }, + { + "epoch": 0.04, + "learning_rate": 4.977441683679891e-05, + "loss": 1.1072, + "step": 42889 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774406308583066e-05, + "loss": 1.0324, + "step": 42890 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774395780122666e-05, + "loss": 0.7804, + "step": 42891 + }, + { + "epoch": 0.04, + "learning_rate": 4.97743852514177e-05, + "loss": 0.9499, + "step": 42892 + }, + { + "epoch": 0.04, + "learning_rate": 4.977437472246818e-05, + "loss": 1.109, + "step": 42893 + }, + { + "epoch": 0.04, + "learning_rate": 4.977436419327409e-05, + "loss": 1.1391, + "step": 42894 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774353663835435e-05, + "loss": 1.0359, + "step": 42895 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774343134152224e-05, + "loss": 1.0014, + "step": 42896 + }, + { + "epoch": 0.04, + "learning_rate": 4.977433260422445e-05, + "loss": 0.5154, + "step": 42897 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774322074052115e-05, + "loss": 0.4159, + "step": 42898 + }, + { + "epoch": 0.04, + "learning_rate": 4.977431154363522e-05, + "loss": 1.1144, + "step": 42899 + }, + { + "epoch": 0.04, + "learning_rate": 4.977430101297376e-05, + "loss": 0.7957, + "step": 42900 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774290482067736e-05, + "loss": 0.9, + "step": 42901 + }, + { + "epoch": 0.04, + "learning_rate": 4.977427995091715e-05, + "loss": 1.0368, + "step": 42902 + }, + { + "epoch": 0.04, + "learning_rate": 4.977426941952201e-05, + "loss": 1.041, + "step": 42903 + }, + { + "epoch": 0.04, + "learning_rate": 4.977425888788231e-05, + "loss": 1.2897, + "step": 42904 + }, + { + "epoch": 0.04, + "learning_rate": 4.977424835599804e-05, + "loss": 1.275, + "step": 42905 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774237823869215e-05, + "loss": 0.8969, + "step": 42906 + }, + { + "epoch": 0.04, + "learning_rate": 4.977422729149582e-05, + "loss": 0.9335, + "step": 42907 + }, + { + "epoch": 0.04, + "learning_rate": 4.977421675887788e-05, + "loss": 0.9518, + "step": 42908 + }, + { + "epoch": 0.04, + "learning_rate": 4.977420622601537e-05, + "loss": 1.1922, + "step": 42909 + }, + { + "epoch": 0.04, + "learning_rate": 4.97741956929083e-05, + "loss": 1.1098, + "step": 42910 + }, + { + "epoch": 0.04, + "learning_rate": 4.977418515955667e-05, + "loss": 1.1723, + "step": 42911 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774174625960476e-05, + "loss": 1.1708, + "step": 42912 + }, + { + "epoch": 0.04, + "learning_rate": 4.977416409211972e-05, + "loss": 1.1952, + "step": 42913 + }, + { + "epoch": 0.04, + "learning_rate": 4.977415355803442e-05, + "loss": 0.9896, + "step": 42914 + }, + { + "epoch": 0.04, + "learning_rate": 4.977414302370454e-05, + "loss": 1.0308, + "step": 42915 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774132489130115e-05, + "loss": 1.2175, + "step": 42916 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774121954311126e-05, + "loss": 1.2778, + "step": 42917 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774111419247575e-05, + "loss": 1.0854, + "step": 42918 + }, + { + "epoch": 0.04, + "learning_rate": 4.977410088393947e-05, + "loss": 1.0974, + "step": 42919 + }, + { + "epoch": 0.04, + "learning_rate": 4.97740903483868e-05, + "loss": 1.2764, + "step": 42920 + }, + { + "epoch": 0.04, + "learning_rate": 4.977407981258957e-05, + "loss": 0.6539, + "step": 42921 + }, + { + "epoch": 0.04, + "learning_rate": 4.977406927654778e-05, + "loss": 0.8578, + "step": 42922 + }, + { + "epoch": 0.04, + "learning_rate": 4.977405874026143e-05, + "loss": 0.5896, + "step": 42923 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774048203730526e-05, + "loss": 0.8544, + "step": 42924 + }, + { + "epoch": 0.04, + "learning_rate": 4.977403766695507e-05, + "loss": 1.0989, + "step": 42925 + }, + { + "epoch": 0.04, + "learning_rate": 4.977402712993504e-05, + "loss": 1.0821, + "step": 42926 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774016592670456e-05, + "loss": 0.8324, + "step": 42927 + }, + { + "epoch": 0.04, + "learning_rate": 4.977400605516132e-05, + "loss": 1.1967, + "step": 42928 + }, + { + "epoch": 0.04, + "learning_rate": 4.977399551740762e-05, + "loss": 0.9882, + "step": 42929 + }, + { + "epoch": 0.04, + "learning_rate": 4.977398497940936e-05, + "loss": 0.9939, + "step": 42930 + }, + { + "epoch": 0.04, + "learning_rate": 4.977397444116654e-05, + "loss": 0.7449, + "step": 42931 + }, + { + "epoch": 0.04, + "learning_rate": 4.977396390267916e-05, + "loss": 0.7581, + "step": 42932 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773953363947235e-05, + "loss": 0.831, + "step": 42933 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773942824970744e-05, + "loss": 1.2357, + "step": 42934 + }, + { + "epoch": 0.04, + "learning_rate": 4.977393228574969e-05, + "loss": 1.0329, + "step": 42935 + }, + { + "epoch": 0.04, + "learning_rate": 4.977392174628409e-05, + "loss": 0.9139, + "step": 42936 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773911206573926e-05, + "loss": 1.1432, + "step": 42937 + }, + { + "epoch": 0.04, + "learning_rate": 4.97739006666192e-05, + "loss": 0.8416, + "step": 42938 + }, + { + "epoch": 0.04, + "learning_rate": 4.977389012641992e-05, + "loss": 0.343, + "step": 42939 + }, + { + "epoch": 0.04, + "learning_rate": 4.977387958597608e-05, + "loss": 1.4421, + "step": 42940 + }, + { + "epoch": 0.04, + "learning_rate": 4.977386904528769e-05, + "loss": 0.8713, + "step": 42941 + }, + { + "epoch": 0.04, + "learning_rate": 4.977385850435474e-05, + "loss": 1.049, + "step": 42942 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773847963177234e-05, + "loss": 1.0541, + "step": 42943 + }, + { + "epoch": 0.04, + "learning_rate": 4.977383742175516e-05, + "loss": 0.8917, + "step": 42944 + }, + { + "epoch": 0.04, + "learning_rate": 4.977382688008855e-05, + "loss": 1.1077, + "step": 42945 + }, + { + "epoch": 0.04, + "learning_rate": 4.977381633817737e-05, + "loss": 1.3695, + "step": 42946 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773805796021636e-05, + "loss": 1.4334, + "step": 42947 + }, + { + "epoch": 0.04, + "learning_rate": 4.977379525362134e-05, + "loss": 2.6273, + "step": 42948 + }, + { + "epoch": 0.04, + "learning_rate": 4.977378471097649e-05, + "loss": 1.9137, + "step": 42949 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773774168087085e-05, + "loss": 1.524, + "step": 42950 + }, + { + "epoch": 0.04, + "learning_rate": 4.977376362495312e-05, + "loss": 1.1171, + "step": 42951 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773753081574605e-05, + "loss": 1.1758, + "step": 42952 + }, + { + "epoch": 0.04, + "learning_rate": 4.977374253795153e-05, + "loss": 1.4397, + "step": 42953 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773731994083904e-05, + "loss": 1.1422, + "step": 42954 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773721449971716e-05, + "loss": 1.0409, + "step": 42955 + }, + { + "epoch": 0.04, + "learning_rate": 4.977371090561498e-05, + "loss": 1.4511, + "step": 42956 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773700361013676e-05, + "loss": 1.1277, + "step": 42957 + }, + { + "epoch": 0.04, + "learning_rate": 4.977368981616782e-05, + "loss": 1.0458, + "step": 42958 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773679271077414e-05, + "loss": 1.156, + "step": 42959 + }, + { + "epoch": 0.04, + "learning_rate": 4.977366872574246e-05, + "loss": 0.7718, + "step": 42960 + }, + { + "epoch": 0.04, + "learning_rate": 4.977365818016294e-05, + "loss": 0.7421, + "step": 42961 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773647634338864e-05, + "loss": 1.0194, + "step": 42962 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773637088270234e-05, + "loss": 0.9989, + "step": 42963 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773626541957055e-05, + "loss": 1.2296, + "step": 42964 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773615995399315e-05, + "loss": 1.1649, + "step": 42965 + }, + { + "epoch": 0.04, + "learning_rate": 4.977360544859702e-05, + "loss": 1.3091, + "step": 42966 + }, + { + "epoch": 0.04, + "learning_rate": 4.977359490155017e-05, + "loss": 1.1339, + "step": 42967 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773584354258774e-05, + "loss": 0.9427, + "step": 42968 + }, + { + "epoch": 0.04, + "learning_rate": 4.977357380672282e-05, + "loss": 0.7372, + "step": 42969 + }, + { + "epoch": 0.04, + "learning_rate": 4.97735632589423e-05, + "loss": 1.1729, + "step": 42970 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773552710917235e-05, + "loss": 0.9114, + "step": 42971 + }, + { + "epoch": 0.04, + "learning_rate": 4.977354216264762e-05, + "loss": 0.9101, + "step": 42972 + }, + { + "epoch": 0.04, + "learning_rate": 4.977353161413345e-05, + "loss": 1.0287, + "step": 42973 + }, + { + "epoch": 0.04, + "learning_rate": 4.977352106537472e-05, + "loss": 0.5721, + "step": 42974 + }, + { + "epoch": 0.04, + "learning_rate": 4.977351051637144e-05, + "loss": 0.3726, + "step": 42975 + }, + { + "epoch": 0.04, + "learning_rate": 4.97734999671236e-05, + "loss": 1.0583, + "step": 42976 + }, + { + "epoch": 0.04, + "learning_rate": 4.977348941763122e-05, + "loss": 0.5973, + "step": 42977 + }, + { + "epoch": 0.04, + "learning_rate": 4.977347886789427e-05, + "loss": 0.8969, + "step": 42978 + }, + { + "epoch": 0.04, + "learning_rate": 4.977346831791278e-05, + "loss": 1.3467, + "step": 42979 + }, + { + "epoch": 0.04, + "learning_rate": 4.977345776768674e-05, + "loss": 0.9516, + "step": 42980 + }, + { + "epoch": 0.04, + "learning_rate": 4.977344721721613e-05, + "loss": 1.0553, + "step": 42981 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773436666500985e-05, + "loss": 1.1517, + "step": 42982 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773426115541275e-05, + "loss": 1.0769, + "step": 42983 + }, + { + "epoch": 0.04, + "learning_rate": 4.977341556433701e-05, + "loss": 1.2595, + "step": 42984 + }, + { + "epoch": 0.04, + "learning_rate": 4.97734050128882e-05, + "loss": 0.8564, + "step": 42985 + }, + { + "epoch": 0.04, + "learning_rate": 4.977339446119484e-05, + "loss": 1.5923, + "step": 42986 + }, + { + "epoch": 0.04, + "learning_rate": 4.977338390925692e-05, + "loss": 1.1656, + "step": 42987 + }, + { + "epoch": 0.04, + "learning_rate": 4.977337335707445e-05, + "loss": 1.0167, + "step": 42988 + }, + { + "epoch": 0.04, + "learning_rate": 4.977336280464743e-05, + "loss": 0.9987, + "step": 42989 + }, + { + "epoch": 0.04, + "learning_rate": 4.977335225197586e-05, + "loss": 0.8149, + "step": 42990 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773341699059726e-05, + "loss": 1.1555, + "step": 42991 + }, + { + "epoch": 0.04, + "learning_rate": 4.977333114589905e-05, + "loss": 1.2343, + "step": 42992 + }, + { + "epoch": 0.04, + "learning_rate": 4.977332059249382e-05, + "loss": 1.0233, + "step": 42993 + }, + { + "epoch": 0.04, + "learning_rate": 4.977331003884404e-05, + "loss": 0.7209, + "step": 42994 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773299484949704e-05, + "loss": 1.1753, + "step": 42995 + }, + { + "epoch": 0.04, + "learning_rate": 4.977328893081082e-05, + "loss": 0.4713, + "step": 42996 + }, + { + "epoch": 0.04, + "learning_rate": 4.977327837642738e-05, + "loss": 0.8025, + "step": 42997 + }, + { + "epoch": 0.04, + "learning_rate": 4.97732678217994e-05, + "loss": 1.0196, + "step": 42998 + }, + { + "epoch": 0.04, + "learning_rate": 4.977325726692686e-05, + "loss": 1.0846, + "step": 42999 + }, + { + "epoch": 0.04, + "learning_rate": 4.977324671180976e-05, + "loss": 1.1446, + "step": 43000 + }, + { + "epoch": 0.04, + "eval_loss": 1.081432819366455, + "eval_runtime": 127.6127, + "eval_samples_per_second": 10.853, + "eval_steps_per_second": 5.43, + "step": 43000 + }, + { + "epoch": 0.04, + "learning_rate": 4.977323615644812e-05, + "loss": 1.0632, + "step": 43001 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773225600841935e-05, + "loss": 0.4377, + "step": 43002 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773215044991186e-05, + "loss": 0.6026, + "step": 43003 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773204488895895e-05, + "loss": 1.0853, + "step": 43004 + }, + { + "epoch": 0.04, + "learning_rate": 4.977319393255605e-05, + "loss": 1.2184, + "step": 43005 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773183375971654e-05, + "loss": 1.7411, + "step": 43006 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773172819142703e-05, + "loss": 1.2331, + "step": 43007 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773162262069205e-05, + "loss": 1.2726, + "step": 43008 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773151704751164e-05, + "loss": 1.3347, + "step": 43009 + }, + { + "epoch": 0.04, + "learning_rate": 4.977314114718857e-05, + "loss": 1.194, + "step": 43010 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773130589381423e-05, + "loss": 0.7383, + "step": 43011 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773120031329724e-05, + "loss": 1.0086, + "step": 43012 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773109473033475e-05, + "loss": 1.1612, + "step": 43013 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773098914492685e-05, + "loss": 1.1513, + "step": 43014 + }, + { + "epoch": 0.04, + "learning_rate": 4.977308835570734e-05, + "loss": 1.1989, + "step": 43015 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773077796677445e-05, + "loss": 1.1165, + "step": 43016 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773067237402995e-05, + "loss": 1.224, + "step": 43017 + }, + { + "epoch": 0.04, + "learning_rate": 4.9773056677884e-05, + "loss": 1.2164, + "step": 43018 + }, + { + "epoch": 0.04, + "learning_rate": 4.977304611812046e-05, + "loss": 1.2975, + "step": 43019 + }, + { + "epoch": 0.04, + "learning_rate": 4.977303555811237e-05, + "loss": 1.2666, + "step": 43020 + }, + { + "epoch": 0.04, + "learning_rate": 4.977302499785973e-05, + "loss": 1.1102, + "step": 43021 + }, + { + "epoch": 0.04, + "learning_rate": 4.977301443736254e-05, + "loss": 1.0956, + "step": 43022 + }, + { + "epoch": 0.04, + "learning_rate": 4.97730038766208e-05, + "loss": 0.8755, + "step": 43023 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772993315634516e-05, + "loss": 0.6887, + "step": 43024 + }, + { + "epoch": 0.04, + "learning_rate": 4.977298275440368e-05, + "loss": 1.2853, + "step": 43025 + }, + { + "epoch": 0.04, + "learning_rate": 4.977297219292829e-05, + "loss": 0.9895, + "step": 43026 + }, + { + "epoch": 0.04, + "learning_rate": 4.977296163120836e-05, + "loss": 1.1133, + "step": 43027 + }, + { + "epoch": 0.04, + "learning_rate": 4.977295106924388e-05, + "loss": 1.0061, + "step": 43028 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772940507034846e-05, + "loss": 1.1824, + "step": 43029 + }, + { + "epoch": 0.04, + "learning_rate": 4.977292994458128e-05, + "loss": 1.1184, + "step": 43030 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772919381883155e-05, + "loss": 0.907, + "step": 43031 + }, + { + "epoch": 0.04, + "learning_rate": 4.977290881894048e-05, + "loss": 0.9148, + "step": 43032 + }, + { + "epoch": 0.04, + "learning_rate": 4.977289825575325e-05, + "loss": 0.9381, + "step": 43033 + }, + { + "epoch": 0.04, + "learning_rate": 4.977288769232149e-05, + "loss": 0.3105, + "step": 43034 + }, + { + "epoch": 0.04, + "learning_rate": 4.977287712864517e-05, + "loss": 0.3206, + "step": 43035 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772866564724306e-05, + "loss": 0.3952, + "step": 43036 + }, + { + "epoch": 0.04, + "learning_rate": 4.97728560005589e-05, + "loss": 0.4374, + "step": 43037 + }, + { + "epoch": 0.04, + "learning_rate": 4.977284543614894e-05, + "loss": 0.7105, + "step": 43038 + }, + { + "epoch": 0.04, + "learning_rate": 4.977283487149444e-05, + "loss": 0.8225, + "step": 43039 + }, + { + "epoch": 0.04, + "learning_rate": 4.977282430659539e-05, + "loss": 0.3765, + "step": 43040 + }, + { + "epoch": 0.04, + "learning_rate": 4.977281374145179e-05, + "loss": 1.224, + "step": 43041 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772803176063645e-05, + "loss": 1.3456, + "step": 43042 + }, + { + "epoch": 0.04, + "learning_rate": 4.977279261043095e-05, + "loss": 1.075, + "step": 43043 + }, + { + "epoch": 0.04, + "learning_rate": 4.977278204455371e-05, + "loss": 1.2487, + "step": 43044 + }, + { + "epoch": 0.04, + "learning_rate": 4.977277147843193e-05, + "loss": 1.1091, + "step": 43045 + }, + { + "epoch": 0.04, + "learning_rate": 4.97727609120656e-05, + "loss": 0.7878, + "step": 43046 + }, + { + "epoch": 0.04, + "learning_rate": 4.977275034545472e-05, + "loss": 2.1026, + "step": 43047 + }, + { + "epoch": 0.04, + "learning_rate": 4.97727397785993e-05, + "loss": 1.2229, + "step": 43048 + }, + { + "epoch": 0.04, + "learning_rate": 4.977272921149933e-05, + "loss": 1.3747, + "step": 43049 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772718644154815e-05, + "loss": 1.1594, + "step": 43050 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772708076565755e-05, + "loss": 1.1678, + "step": 43051 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772697508732146e-05, + "loss": 0.9367, + "step": 43052 + }, + { + "epoch": 0.04, + "learning_rate": 4.977268694065399e-05, + "loss": 1.0917, + "step": 43053 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772676372331296e-05, + "loss": 1.0954, + "step": 43054 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772665803764054e-05, + "loss": 0.9628, + "step": 43055 + }, + { + "epoch": 0.04, + "learning_rate": 4.977265523495226e-05, + "loss": 1.1162, + "step": 43056 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772644665895926e-05, + "loss": 0.959, + "step": 43057 + }, + { + "epoch": 0.04, + "learning_rate": 4.977263409659505e-05, + "loss": 0.4806, + "step": 43058 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772623527049624e-05, + "loss": 0.9448, + "step": 43059 + }, + { + "epoch": 0.04, + "learning_rate": 4.977261295725965e-05, + "loss": 0.9924, + "step": 43060 + }, + { + "epoch": 0.04, + "learning_rate": 4.977260238722514e-05, + "loss": 1.0128, + "step": 43061 + }, + { + "epoch": 0.04, + "learning_rate": 4.977259181694608e-05, + "loss": 0.981, + "step": 43062 + }, + { + "epoch": 0.04, + "learning_rate": 4.977258124642248e-05, + "loss": 0.8146, + "step": 43063 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772570675654326e-05, + "loss": 1.1378, + "step": 43064 + }, + { + "epoch": 0.04, + "learning_rate": 4.977256010464164e-05, + "loss": 0.979, + "step": 43065 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772549533384405e-05, + "loss": 1.0193, + "step": 43066 + }, + { + "epoch": 0.04, + "learning_rate": 4.977253896188262e-05, + "loss": 0.6144, + "step": 43067 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772528390136296e-05, + "loss": 0.564, + "step": 43068 + }, + { + "epoch": 0.04, + "learning_rate": 4.977251781814543e-05, + "loss": 0.7904, + "step": 43069 + }, + { + "epoch": 0.04, + "learning_rate": 4.977250724591001e-05, + "loss": 1.1383, + "step": 43070 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772496673430056e-05, + "loss": 1.0035, + "step": 43071 + }, + { + "epoch": 0.04, + "learning_rate": 4.977248610070556e-05, + "loss": 1.0959, + "step": 43072 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772475527736515e-05, + "loss": 0.8807, + "step": 43073 + }, + { + "epoch": 0.04, + "learning_rate": 4.977246495452293e-05, + "loss": 0.1096, + "step": 43074 + }, + { + "epoch": 0.04, + "learning_rate": 4.97724543810648e-05, + "loss": 0.0629, + "step": 43075 + }, + { + "epoch": 0.04, + "learning_rate": 4.977244380736212e-05, + "loss": 0.0546, + "step": 43076 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772433233414906e-05, + "loss": 0.0427, + "step": 43077 + }, + { + "epoch": 0.04, + "learning_rate": 4.977242265922315e-05, + "loss": 0.0341, + "step": 43078 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772412084786844e-05, + "loss": 0.0418, + "step": 43079 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772401510106e-05, + "loss": 0.0371, + "step": 43080 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772390935180614e-05, + "loss": 0.0388, + "step": 43081 + }, + { + "epoch": 0.04, + "learning_rate": 4.977238036001068e-05, + "loss": 0.0954, + "step": 43082 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772369784596204e-05, + "loss": 0.0718, + "step": 43083 + }, + { + "epoch": 0.04, + "learning_rate": 4.977235920893719e-05, + "loss": 0.087, + "step": 43084 + }, + { + "epoch": 0.04, + "learning_rate": 4.977234863303364e-05, + "loss": 0.0727, + "step": 43085 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772338056885535e-05, + "loss": 0.6035, + "step": 43086 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772327480492895e-05, + "loss": 1.011, + "step": 43087 + }, + { + "epoch": 0.04, + "learning_rate": 4.977231690385571e-05, + "loss": 0.7542, + "step": 43088 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772306326973983e-05, + "loss": 0.9139, + "step": 43089 + }, + { + "epoch": 0.04, + "learning_rate": 4.977229574984772e-05, + "loss": 1.0007, + "step": 43090 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772285172476904e-05, + "loss": 0.8838, + "step": 43091 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772274594861556e-05, + "loss": 0.9334, + "step": 43092 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772264017001665e-05, + "loss": 1.0065, + "step": 43093 + }, + { + "epoch": 0.04, + "learning_rate": 4.977225343889723e-05, + "loss": 0.9454, + "step": 43094 + }, + { + "epoch": 0.04, + "learning_rate": 4.977224286054826e-05, + "loss": 1.0232, + "step": 43095 + }, + { + "epoch": 0.04, + "learning_rate": 4.977223228195474e-05, + "loss": 1.0004, + "step": 43096 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772221703116684e-05, + "loss": 0.9088, + "step": 43097 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772211124034085e-05, + "loss": 1.4474, + "step": 43098 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772200544706943e-05, + "loss": 1.2021, + "step": 43099 + }, + { + "epoch": 0.04, + "learning_rate": 4.977218996513526e-05, + "loss": 0.8628, + "step": 43100 + }, + { + "epoch": 0.04, + "learning_rate": 4.977217938531905e-05, + "loss": 1.3833, + "step": 43101 + }, + { + "epoch": 0.04, + "learning_rate": 4.977216880525828e-05, + "loss": 1.0591, + "step": 43102 + }, + { + "epoch": 0.04, + "learning_rate": 4.977215822495298e-05, + "loss": 0.8425, + "step": 43103 + }, + { + "epoch": 0.04, + "learning_rate": 4.977214764440314e-05, + "loss": 1.4179, + "step": 43104 + }, + { + "epoch": 0.04, + "learning_rate": 4.977213706360876e-05, + "loss": 0.5969, + "step": 43105 + }, + { + "epoch": 0.04, + "learning_rate": 4.977212648256984e-05, + "loss": 0.8367, + "step": 43106 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772115901286375e-05, + "loss": 1.0126, + "step": 43107 + }, + { + "epoch": 0.04, + "learning_rate": 4.977210531975838e-05, + "loss": 1.0772, + "step": 43108 + }, + { + "epoch": 0.04, + "learning_rate": 4.977209473798583e-05, + "loss": 0.8785, + "step": 43109 + }, + { + "epoch": 0.04, + "learning_rate": 4.977208415596875e-05, + "loss": 0.6918, + "step": 43110 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772073573707135e-05, + "loss": 0.8133, + "step": 43111 + }, + { + "epoch": 0.04, + "learning_rate": 4.977206299120098e-05, + "loss": 1.0763, + "step": 43112 + }, + { + "epoch": 0.04, + "learning_rate": 4.977205240845027e-05, + "loss": 0.9928, + "step": 43113 + }, + { + "epoch": 0.04, + "learning_rate": 4.977204182545503e-05, + "loss": 0.8391, + "step": 43114 + }, + { + "epoch": 0.04, + "learning_rate": 4.977203124221525e-05, + "loss": 1.1488, + "step": 43115 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772020658730935e-05, + "loss": 1.1006, + "step": 43116 + }, + { + "epoch": 0.04, + "learning_rate": 4.977201007500208e-05, + "loss": 0.8371, + "step": 43117 + }, + { + "epoch": 0.04, + "learning_rate": 4.977199949102869e-05, + "loss": 0.9447, + "step": 43118 + }, + { + "epoch": 0.04, + "learning_rate": 4.977198890681075e-05, + "loss": 1.1159, + "step": 43119 + }, + { + "epoch": 0.04, + "learning_rate": 4.977197832234828e-05, + "loss": 1.0058, + "step": 43120 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771967737641265e-05, + "loss": 0.9408, + "step": 43121 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771957152689716e-05, + "loss": 0.8724, + "step": 43122 + }, + { + "epoch": 0.04, + "learning_rate": 4.977194656749363e-05, + "loss": 1.0278, + "step": 43123 + }, + { + "epoch": 0.04, + "learning_rate": 4.977193598205301e-05, + "loss": 0.8365, + "step": 43124 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771925396367847e-05, + "loss": 1.0471, + "step": 43125 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771914810438144e-05, + "loss": 1.0148, + "step": 43126 + }, + { + "epoch": 0.04, + "learning_rate": 4.97719042242639e-05, + "loss": 1.3552, + "step": 43127 + }, + { + "epoch": 0.04, + "learning_rate": 4.977189363784512e-05, + "loss": 1.0987, + "step": 43128 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771883051181814e-05, + "loss": 1.0702, + "step": 43129 + }, + { + "epoch": 0.04, + "learning_rate": 4.977187246427396e-05, + "loss": 0.9085, + "step": 43130 + }, + { + "epoch": 0.04, + "learning_rate": 4.977186187712156e-05, + "loss": 0.6807, + "step": 43131 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771851289724634e-05, + "loss": 0.7956, + "step": 43132 + }, + { + "epoch": 0.04, + "learning_rate": 4.977184070208317e-05, + "loss": 0.838, + "step": 43133 + }, + { + "epoch": 0.04, + "learning_rate": 4.977183011419717e-05, + "loss": 0.9174, + "step": 43134 + }, + { + "epoch": 0.04, + "learning_rate": 4.977181952606663e-05, + "loss": 1.0231, + "step": 43135 + }, + { + "epoch": 0.04, + "learning_rate": 4.977180893769155e-05, + "loss": 0.9268, + "step": 43136 + }, + { + "epoch": 0.04, + "learning_rate": 4.977179834907194e-05, + "loss": 0.8808, + "step": 43137 + }, + { + "epoch": 0.04, + "learning_rate": 4.977178776020779e-05, + "loss": 0.9102, + "step": 43138 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771777171099104e-05, + "loss": 1.0167, + "step": 43139 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771766581745884e-05, + "loss": 1.1593, + "step": 43140 + }, + { + "epoch": 0.04, + "learning_rate": 4.977175599214812e-05, + "loss": 1.0469, + "step": 43141 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771745402305826e-05, + "loss": 1.2328, + "step": 43142 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771734812218995e-05, + "loss": 1.2675, + "step": 43143 + }, + { + "epoch": 0.04, + "learning_rate": 4.977172422188763e-05, + "loss": 1.2475, + "step": 43144 + }, + { + "epoch": 0.04, + "learning_rate": 4.977171363131172e-05, + "loss": 0.9862, + "step": 43145 + }, + { + "epoch": 0.04, + "learning_rate": 4.977170304049128e-05, + "loss": 0.8124, + "step": 43146 + }, + { + "epoch": 0.04, + "learning_rate": 4.977169244942631e-05, + "loss": 1.1512, + "step": 43147 + }, + { + "epoch": 0.04, + "learning_rate": 4.977168185811679e-05, + "loss": 1.0953, + "step": 43148 + }, + { + "epoch": 0.04, + "learning_rate": 4.977167126656275e-05, + "loss": 1.094, + "step": 43149 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771660674764166e-05, + "loss": 1.4725, + "step": 43150 + }, + { + "epoch": 0.04, + "learning_rate": 4.977165008272104e-05, + "loss": 1.3645, + "step": 43151 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771639490433394e-05, + "loss": 1.1471, + "step": 43152 + }, + { + "epoch": 0.04, + "learning_rate": 4.97716288979012e-05, + "loss": 1.0884, + "step": 43153 + }, + { + "epoch": 0.04, + "learning_rate": 4.977161830512448e-05, + "loss": 1.1766, + "step": 43154 + }, + { + "epoch": 0.04, + "learning_rate": 4.977160771210322e-05, + "loss": 1.1144, + "step": 43155 + }, + { + "epoch": 0.04, + "learning_rate": 4.977159711883743e-05, + "loss": 0.8863, + "step": 43156 + }, + { + "epoch": 0.04, + "learning_rate": 4.97715865253271e-05, + "loss": 0.814, + "step": 43157 + }, + { + "epoch": 0.04, + "learning_rate": 4.977157593157224e-05, + "loss": 0.9863, + "step": 43158 + }, + { + "epoch": 0.04, + "learning_rate": 4.977156533757284e-05, + "loss": 1.6175, + "step": 43159 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771554743328905e-05, + "loss": 1.2704, + "step": 43160 + }, + { + "epoch": 0.04, + "learning_rate": 4.977154414884044e-05, + "loss": 1.2589, + "step": 43161 + }, + { + "epoch": 0.04, + "learning_rate": 4.977153355410744e-05, + "loss": 1.2298, + "step": 43162 + }, + { + "epoch": 0.04, + "learning_rate": 4.977152295912991e-05, + "loss": 1.3787, + "step": 43163 + }, + { + "epoch": 0.04, + "learning_rate": 4.977151236390783e-05, + "loss": 1.3231, + "step": 43164 + }, + { + "epoch": 0.04, + "learning_rate": 4.977150176844124e-05, + "loss": 1.4596, + "step": 43165 + }, + { + "epoch": 0.04, + "learning_rate": 4.97714911727301e-05, + "loss": 1.1637, + "step": 43166 + }, + { + "epoch": 0.04, + "learning_rate": 4.977148057677443e-05, + "loss": 1.2652, + "step": 43167 + }, + { + "epoch": 0.04, + "learning_rate": 4.977146998057423e-05, + "loss": 1.2693, + "step": 43168 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771459384129486e-05, + "loss": 0.806, + "step": 43169 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771448787440216e-05, + "loss": 1.3163, + "step": 43170 + }, + { + "epoch": 0.04, + "learning_rate": 4.977143819050642e-05, + "loss": 1.1406, + "step": 43171 + }, + { + "epoch": 0.04, + "learning_rate": 4.977142759332808e-05, + "loss": 1.074, + "step": 43172 + }, + { + "epoch": 0.04, + "learning_rate": 4.977141699590521e-05, + "loss": 0.8529, + "step": 43173 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771406398237804e-05, + "loss": 1.0116, + "step": 43174 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771395800325865e-05, + "loss": 0.9894, + "step": 43175 + }, + { + "epoch": 0.04, + "learning_rate": 4.97713852021694e-05, + "loss": 1.4412, + "step": 43176 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771374603768396e-05, + "loss": 1.3144, + "step": 43177 + }, + { + "epoch": 0.04, + "learning_rate": 4.977136400512286e-05, + "loss": 1.7968, + "step": 43178 + }, + { + "epoch": 0.04, + "learning_rate": 4.977135340623279e-05, + "loss": 0.9951, + "step": 43179 + }, + { + "epoch": 0.04, + "learning_rate": 4.97713428070982e-05, + "loss": 0.6374, + "step": 43180 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771332207719065e-05, + "loss": 1.1862, + "step": 43181 + }, + { + "epoch": 0.04, + "learning_rate": 4.97713216080954e-05, + "loss": 0.976, + "step": 43182 + }, + { + "epoch": 0.04, + "learning_rate": 4.97713110082272e-05, + "loss": 1.0794, + "step": 43183 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771300408114476e-05, + "loss": 1.2781, + "step": 43184 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771289807757214e-05, + "loss": 1.313, + "step": 43185 + }, + { + "epoch": 0.04, + "learning_rate": 4.977127920715543e-05, + "loss": 1.5829, + "step": 43186 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771268606309106e-05, + "loss": 0.7633, + "step": 43187 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771258005218246e-05, + "loss": 0.9103, + "step": 43188 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771247403882864e-05, + "loss": 1.0869, + "step": 43189 + }, + { + "epoch": 0.04, + "learning_rate": 4.977123680230295e-05, + "loss": 0.9666, + "step": 43190 + }, + { + "epoch": 0.04, + "learning_rate": 4.97712262004785e-05, + "loss": 1.2739, + "step": 43191 + }, + { + "epoch": 0.04, + "learning_rate": 4.977121559840952e-05, + "loss": 1.2772, + "step": 43192 + }, + { + "epoch": 0.04, + "learning_rate": 4.977120499609601e-05, + "loss": 0.9195, + "step": 43193 + }, + { + "epoch": 0.04, + "learning_rate": 4.977119439353797e-05, + "loss": 1.1136, + "step": 43194 + }, + { + "epoch": 0.04, + "learning_rate": 4.97711837907354e-05, + "loss": 1.842, + "step": 43195 + }, + { + "epoch": 0.04, + "learning_rate": 4.977117318768829e-05, + "loss": 0.9213, + "step": 43196 + }, + { + "epoch": 0.04, + "learning_rate": 4.977116258439666e-05, + "loss": 0.9348, + "step": 43197 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771151980860495e-05, + "loss": 1.0133, + "step": 43198 + }, + { + "epoch": 0.04, + "learning_rate": 4.97711413770798e-05, + "loss": 1.0242, + "step": 43199 + }, + { + "epoch": 0.04, + "learning_rate": 4.977113077305458e-05, + "loss": 1.3006, + "step": 43200 + }, + { + "epoch": 0.04, + "learning_rate": 4.977112016878482e-05, + "loss": 1.1613, + "step": 43201 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771109564270534e-05, + "loss": 0.9065, + "step": 43202 + }, + { + "epoch": 0.04, + "learning_rate": 4.977109895951172e-05, + "loss": 0.7703, + "step": 43203 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771088354508377e-05, + "loss": 1.0133, + "step": 43204 + }, + { + "epoch": 0.04, + "learning_rate": 4.977107774926051e-05, + "loss": 0.9613, + "step": 43205 + }, + { + "epoch": 0.04, + "learning_rate": 4.97710671437681e-05, + "loss": 1.4714, + "step": 43206 + }, + { + "epoch": 0.04, + "learning_rate": 4.977105653803117e-05, + "loss": 0.8552, + "step": 43207 + }, + { + "epoch": 0.04, + "learning_rate": 4.977104593204971e-05, + "loss": 0.7925, + "step": 43208 + }, + { + "epoch": 0.04, + "learning_rate": 4.977103532582371e-05, + "loss": 0.6932, + "step": 43209 + }, + { + "epoch": 0.04, + "learning_rate": 4.9771024719353194e-05, + "loss": 0.8306, + "step": 43210 + }, + { + "epoch": 0.04, + "learning_rate": 4.977101411263814e-05, + "loss": 1.0674, + "step": 43211 + }, + { + "epoch": 0.04, + "learning_rate": 4.977100350567856e-05, + "loss": 0.8299, + "step": 43212 + }, + { + "epoch": 0.04, + "learning_rate": 4.977099289847446e-05, + "loss": 0.9263, + "step": 43213 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770982291025815e-05, + "loss": 1.0585, + "step": 43214 + }, + { + "epoch": 0.04, + "learning_rate": 4.977097168333265e-05, + "loss": 1.1923, + "step": 43215 + }, + { + "epoch": 0.04, + "learning_rate": 4.977096107539495e-05, + "loss": 1.0391, + "step": 43216 + }, + { + "epoch": 0.04, + "learning_rate": 4.977095046721273e-05, + "loss": 0.7899, + "step": 43217 + }, + { + "epoch": 0.04, + "learning_rate": 4.977093985878598e-05, + "loss": 0.65, + "step": 43218 + }, + { + "epoch": 0.04, + "learning_rate": 4.977092925011471e-05, + "loss": 0.5966, + "step": 43219 + }, + { + "epoch": 0.04, + "learning_rate": 4.97709186411989e-05, + "loss": 0.8533, + "step": 43220 + }, + { + "epoch": 0.04, + "learning_rate": 4.977090803203856e-05, + "loss": 1.1431, + "step": 43221 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770897422633696e-05, + "loss": 1.0611, + "step": 43222 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770886812984306e-05, + "loss": 1.1186, + "step": 43223 + }, + { + "epoch": 0.04, + "learning_rate": 4.977087620309038e-05, + "loss": 0.9337, + "step": 43224 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770865592951936e-05, + "loss": 0.9971, + "step": 43225 + }, + { + "epoch": 0.04, + "learning_rate": 4.977085498256896e-05, + "loss": 1.128, + "step": 43226 + }, + { + "epoch": 0.04, + "learning_rate": 4.977084437194146e-05, + "loss": 1.1355, + "step": 43227 + }, + { + "epoch": 0.04, + "learning_rate": 4.977083376106943e-05, + "loss": 1.0533, + "step": 43228 + }, + { + "epoch": 0.04, + "learning_rate": 4.977082314995287e-05, + "loss": 1.0487, + "step": 43229 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770812538591796e-05, + "loss": 0.9796, + "step": 43230 + }, + { + "epoch": 0.04, + "learning_rate": 4.977080192698618e-05, + "loss": 0.9609, + "step": 43231 + }, + { + "epoch": 0.04, + "learning_rate": 4.977079131513604e-05, + "loss": 0.9599, + "step": 43232 + }, + { + "epoch": 0.04, + "learning_rate": 4.977078070304138e-05, + "loss": 1.0132, + "step": 43233 + }, + { + "epoch": 0.04, + "learning_rate": 4.977077009070219e-05, + "loss": 1.0655, + "step": 43234 + }, + { + "epoch": 0.04, + "learning_rate": 4.977075947811847e-05, + "loss": 1.0568, + "step": 43235 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770748865290226e-05, + "loss": 1.0287, + "step": 43236 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770738252217455e-05, + "loss": 1.0251, + "step": 43237 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770727638900155e-05, + "loss": 0.9889, + "step": 43238 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770717025338334e-05, + "loss": 0.9926, + "step": 43239 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770706411531985e-05, + "loss": 0.9962, + "step": 43240 + }, + { + "epoch": 0.04, + "learning_rate": 4.977069579748111e-05, + "loss": 1.0461, + "step": 43241 + }, + { + "epoch": 0.04, + "learning_rate": 4.977068518318571e-05, + "loss": 0.9808, + "step": 43242 + }, + { + "epoch": 0.04, + "learning_rate": 4.977067456864578e-05, + "loss": 0.963, + "step": 43243 + }, + { + "epoch": 0.04, + "learning_rate": 4.977066395386133e-05, + "loss": 0.9551, + "step": 43244 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770653338832355e-05, + "loss": 1.0921, + "step": 43245 + }, + { + "epoch": 0.04, + "learning_rate": 4.977064272355885e-05, + "loss": 1.1196, + "step": 43246 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770632108040816e-05, + "loss": 1.1508, + "step": 43247 + }, + { + "epoch": 0.04, + "learning_rate": 4.977062149227826e-05, + "loss": 0.6406, + "step": 43248 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770610876271184e-05, + "loss": 0.1922, + "step": 43249 + }, + { + "epoch": 0.04, + "learning_rate": 4.977060026001958e-05, + "loss": 1.0864, + "step": 43250 + }, + { + "epoch": 0.04, + "learning_rate": 4.977058964352345e-05, + "loss": 0.8838, + "step": 43251 + }, + { + "epoch": 0.04, + "learning_rate": 4.977057902678279e-05, + "loss": 0.8094, + "step": 43252 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770568409797616e-05, + "loss": 1.1516, + "step": 43253 + }, + { + "epoch": 0.04, + "learning_rate": 4.977055779256792e-05, + "loss": 0.7887, + "step": 43254 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770547175093686e-05, + "loss": 0.6643, + "step": 43255 + }, + { + "epoch": 0.04, + "learning_rate": 4.977053655737494e-05, + "loss": 1.5653, + "step": 43256 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770525939411664e-05, + "loss": 1.1032, + "step": 43257 + }, + { + "epoch": 0.04, + "learning_rate": 4.977051532120386e-05, + "loss": 1.0197, + "step": 43258 + }, + { + "epoch": 0.04, + "learning_rate": 4.977050470275153e-05, + "loss": 0.8592, + "step": 43259 + }, + { + "epoch": 0.04, + "learning_rate": 4.977049408405469e-05, + "loss": 1.1767, + "step": 43260 + }, + { + "epoch": 0.04, + "learning_rate": 4.977048346511331e-05, + "loss": 1.082, + "step": 43261 + }, + { + "epoch": 0.04, + "learning_rate": 4.977047284592741e-05, + "loss": 0.8171, + "step": 43262 + }, + { + "epoch": 0.04, + "learning_rate": 4.977046222649699e-05, + "loss": 1.088, + "step": 43263 + }, + { + "epoch": 0.04, + "learning_rate": 4.977045160682205e-05, + "loss": 1.1417, + "step": 43264 + }, + { + "epoch": 0.04, + "learning_rate": 4.977044098690258e-05, + "loss": 1.0392, + "step": 43265 + }, + { + "epoch": 0.04, + "learning_rate": 4.977043036673859e-05, + "loss": 1.0419, + "step": 43266 + }, + { + "epoch": 0.04, + "learning_rate": 4.977041974633008e-05, + "loss": 0.9271, + "step": 43267 + }, + { + "epoch": 0.04, + "learning_rate": 4.977040912567704e-05, + "loss": 1.0858, + "step": 43268 + }, + { + "epoch": 0.04, + "learning_rate": 4.977039850477948e-05, + "loss": 1.3078, + "step": 43269 + }, + { + "epoch": 0.04, + "learning_rate": 4.97703878836374e-05, + "loss": 2.2399, + "step": 43270 + }, + { + "epoch": 0.04, + "learning_rate": 4.977037726225079e-05, + "loss": 0.9992, + "step": 43271 + }, + { + "epoch": 0.04, + "learning_rate": 4.977036664061966e-05, + "loss": 0.8268, + "step": 43272 + }, + { + "epoch": 0.04, + "learning_rate": 4.977035601874401e-05, + "loss": 1.0382, + "step": 43273 + }, + { + "epoch": 0.04, + "learning_rate": 4.977034539662384e-05, + "loss": 0.765, + "step": 43274 + }, + { + "epoch": 0.04, + "learning_rate": 4.977033477425914e-05, + "loss": 1.0939, + "step": 43275 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770324151649925e-05, + "loss": 1.3437, + "step": 43276 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770313528796187e-05, + "loss": 0.8166, + "step": 43277 + }, + { + "epoch": 0.04, + "learning_rate": 4.977030290569792e-05, + "loss": 0.9506, + "step": 43278 + }, + { + "epoch": 0.04, + "learning_rate": 4.977029228235514e-05, + "loss": 0.7113, + "step": 43279 + }, + { + "epoch": 0.04, + "learning_rate": 4.977028165876783e-05, + "loss": 1.0758, + "step": 43280 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770271034936004e-05, + "loss": 1.4441, + "step": 43281 + }, + { + "epoch": 0.04, + "learning_rate": 4.977026041085965e-05, + "loss": 1.0357, + "step": 43282 + }, + { + "epoch": 0.04, + "learning_rate": 4.977024978653878e-05, + "loss": 0.8678, + "step": 43283 + }, + { + "epoch": 0.04, + "learning_rate": 4.977023916197339e-05, + "loss": 1.084, + "step": 43284 + }, + { + "epoch": 0.04, + "learning_rate": 4.977022853716347e-05, + "loss": 0.8571, + "step": 43285 + }, + { + "epoch": 0.04, + "learning_rate": 4.977021791210904e-05, + "loss": 1.184, + "step": 43286 + }, + { + "epoch": 0.04, + "learning_rate": 4.977020728681008e-05, + "loss": 1.2234, + "step": 43287 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770196661266596e-05, + "loss": 1.2278, + "step": 43288 + }, + { + "epoch": 0.04, + "learning_rate": 4.977018603547861e-05, + "loss": 1.3374, + "step": 43289 + }, + { + "epoch": 0.04, + "learning_rate": 4.977017540944608e-05, + "loss": 1.1755, + "step": 43290 + }, + { + "epoch": 0.04, + "learning_rate": 4.977016478316904e-05, + "loss": 0.9307, + "step": 43291 + }, + { + "epoch": 0.04, + "learning_rate": 4.977015415664748e-05, + "loss": 0.9489, + "step": 43292 + }, + { + "epoch": 0.04, + "learning_rate": 4.97701435298814e-05, + "loss": 1.4799, + "step": 43293 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770132902870796e-05, + "loss": 0.7257, + "step": 43294 + }, + { + "epoch": 0.04, + "learning_rate": 4.977012227561567e-05, + "loss": 1.0426, + "step": 43295 + }, + { + "epoch": 0.04, + "learning_rate": 4.977011164811604e-05, + "loss": 0.6681, + "step": 43296 + }, + { + "epoch": 0.04, + "learning_rate": 4.977010102037187e-05, + "loss": 0.3359, + "step": 43297 + }, + { + "epoch": 0.04, + "learning_rate": 4.977009039238319e-05, + "loss": 0.1901, + "step": 43298 + }, + { + "epoch": 0.04, + "learning_rate": 4.977007976414998e-05, + "loss": 0.3593, + "step": 43299 + }, + { + "epoch": 0.04, + "learning_rate": 4.977006913567226e-05, + "loss": 0.7129, + "step": 43300 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770058506950023e-05, + "loss": 0.5447, + "step": 43301 + }, + { + "epoch": 0.04, + "learning_rate": 4.977004787798326e-05, + "loss": 0.9973, + "step": 43302 + }, + { + "epoch": 0.04, + "learning_rate": 4.977003724877198e-05, + "loss": 1.0612, + "step": 43303 + }, + { + "epoch": 0.04, + "learning_rate": 4.977002661931618e-05, + "loss": 1.0703, + "step": 43304 + }, + { + "epoch": 0.04, + "learning_rate": 4.977001598961586e-05, + "loss": 1.2384, + "step": 43305 + }, + { + "epoch": 0.04, + "learning_rate": 4.977000535967102e-05, + "loss": 0.6851, + "step": 43306 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769994729481664e-05, + "loss": 0.9465, + "step": 43307 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769984099047787e-05, + "loss": 1.1582, + "step": 43308 + }, + { + "epoch": 0.04, + "learning_rate": 4.976997346836939e-05, + "loss": 0.9706, + "step": 43309 + }, + { + "epoch": 0.04, + "learning_rate": 4.976996283744647e-05, + "loss": 0.9789, + "step": 43310 + }, + { + "epoch": 0.04, + "learning_rate": 4.976995220627904e-05, + "loss": 0.8408, + "step": 43311 + }, + { + "epoch": 0.04, + "learning_rate": 4.976994157486709e-05, + "loss": 1.3441, + "step": 43312 + }, + { + "epoch": 0.04, + "learning_rate": 4.976993094321062e-05, + "loss": 1.5034, + "step": 43313 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769920311309634e-05, + "loss": 1.7258, + "step": 43314 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769909679164126e-05, + "loss": 0.7056, + "step": 43315 + }, + { + "epoch": 0.04, + "learning_rate": 4.97698990467741e-05, + "loss": 0.7369, + "step": 43316 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769888414139554e-05, + "loss": 0.8738, + "step": 43317 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769877781260496e-05, + "loss": 1.2305, + "step": 43318 + }, + { + "epoch": 0.04, + "learning_rate": 4.976986714813692e-05, + "loss": 1.1426, + "step": 43319 + }, + { + "epoch": 0.04, + "learning_rate": 4.976985651476882e-05, + "loss": 1.2824, + "step": 43320 + }, + { + "epoch": 0.04, + "learning_rate": 4.976984588115621e-05, + "loss": 1.0438, + "step": 43321 + }, + { + "epoch": 0.04, + "learning_rate": 4.976983524729908e-05, + "loss": 0.9252, + "step": 43322 + }, + { + "epoch": 0.04, + "learning_rate": 4.976982461319743e-05, + "loss": 0.9712, + "step": 43323 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769813978851265e-05, + "loss": 0.9553, + "step": 43324 + }, + { + "epoch": 0.04, + "learning_rate": 4.976980334426058e-05, + "loss": 1.0424, + "step": 43325 + }, + { + "epoch": 0.04, + "learning_rate": 4.976979270942538e-05, + "loss": 1.306, + "step": 43326 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769782074345664e-05, + "loss": 1.1726, + "step": 43327 + }, + { + "epoch": 0.04, + "learning_rate": 4.976977143902143e-05, + "loss": 0.8769, + "step": 43328 + }, + { + "epoch": 0.04, + "learning_rate": 4.976976080345268e-05, + "loss": 1.0808, + "step": 43329 + }, + { + "epoch": 0.04, + "learning_rate": 4.976975016763941e-05, + "loss": 0.929, + "step": 43330 + }, + { + "epoch": 0.04, + "learning_rate": 4.976973953158163e-05, + "loss": 1.1078, + "step": 43331 + }, + { + "epoch": 0.04, + "learning_rate": 4.976972889527933e-05, + "loss": 1.4006, + "step": 43332 + }, + { + "epoch": 0.04, + "learning_rate": 4.976971825873251e-05, + "loss": 0.9467, + "step": 43333 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769707621941177e-05, + "loss": 1.0898, + "step": 43334 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769696984905335e-05, + "loss": 0.7099, + "step": 43335 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769686347624965e-05, + "loss": 0.9711, + "step": 43336 + }, + { + "epoch": 0.04, + "learning_rate": 4.976967571010008e-05, + "loss": 0.9778, + "step": 43337 + }, + { + "epoch": 0.04, + "learning_rate": 4.976966507233069e-05, + "loss": 1.0051, + "step": 43338 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769654434316774e-05, + "loss": 1.1141, + "step": 43339 + }, + { + "epoch": 0.04, + "learning_rate": 4.976964379605835e-05, + "loss": 1.071, + "step": 43340 + }, + { + "epoch": 0.04, + "learning_rate": 4.976963315755541e-05, + "loss": 1.4159, + "step": 43341 + }, + { + "epoch": 0.04, + "learning_rate": 4.976962251880794e-05, + "loss": 0.6071, + "step": 43342 + }, + { + "epoch": 0.04, + "learning_rate": 4.976961187981597e-05, + "loss": 1.0446, + "step": 43343 + }, + { + "epoch": 0.04, + "learning_rate": 4.976960124057948e-05, + "loss": 0.7221, + "step": 43344 + }, + { + "epoch": 0.04, + "learning_rate": 4.976959060109848e-05, + "loss": 1.1228, + "step": 43345 + }, + { + "epoch": 0.04, + "learning_rate": 4.976957996137296e-05, + "loss": 1.3737, + "step": 43346 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769569321402925e-05, + "loss": 0.8423, + "step": 43347 + }, + { + "epoch": 0.04, + "learning_rate": 4.976955868118838e-05, + "loss": 0.9873, + "step": 43348 + }, + { + "epoch": 0.04, + "learning_rate": 4.976954804072931e-05, + "loss": 0.9554, + "step": 43349 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769537400025736e-05, + "loss": 1.0356, + "step": 43350 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769526759077645e-05, + "loss": 0.9013, + "step": 43351 + }, + { + "epoch": 0.04, + "learning_rate": 4.976951611788504e-05, + "loss": 1.4114, + "step": 43352 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769505476447914e-05, + "loss": 1.1707, + "step": 43353 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769494834766286e-05, + "loss": 0.8104, + "step": 43354 + }, + { + "epoch": 0.04, + "learning_rate": 4.976948419284013e-05, + "loss": 0.8379, + "step": 43355 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769473550669466e-05, + "loss": 0.3183, + "step": 43356 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769462908254294e-05, + "loss": 0.4001, + "step": 43357 + }, + { + "epoch": 0.04, + "learning_rate": 4.97694522655946e-05, + "loss": 0.3442, + "step": 43358 + }, + { + "epoch": 0.04, + "learning_rate": 4.976944162269039e-05, + "loss": 0.3433, + "step": 43359 + }, + { + "epoch": 0.04, + "learning_rate": 4.976943097954168e-05, + "loss": 0.9359, + "step": 43360 + }, + { + "epoch": 0.04, + "learning_rate": 4.976942033614845e-05, + "loss": 0.8109, + "step": 43361 + }, + { + "epoch": 0.04, + "learning_rate": 4.97694096925107e-05, + "loss": 0.1944, + "step": 43362 + }, + { + "epoch": 0.04, + "learning_rate": 4.976939904862845e-05, + "loss": 1.257, + "step": 43363 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769388404501674e-05, + "loss": 1.0825, + "step": 43364 + }, + { + "epoch": 0.04, + "learning_rate": 4.976937776013039e-05, + "loss": 0.8951, + "step": 43365 + }, + { + "epoch": 0.04, + "learning_rate": 4.97693671155146e-05, + "loss": 0.5043, + "step": 43366 + }, + { + "epoch": 0.04, + "learning_rate": 4.976935647065428e-05, + "loss": 0.3918, + "step": 43367 + }, + { + "epoch": 0.04, + "learning_rate": 4.976934582554946e-05, + "loss": 0.8302, + "step": 43368 + }, + { + "epoch": 0.04, + "learning_rate": 4.976933518020013e-05, + "loss": 1.0176, + "step": 43369 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769324534606285e-05, + "loss": 1.0758, + "step": 43370 + }, + { + "epoch": 0.04, + "learning_rate": 4.976931388876792e-05, + "loss": 1.1982, + "step": 43371 + }, + { + "epoch": 0.04, + "learning_rate": 4.976930324268505e-05, + "loss": 0.7094, + "step": 43372 + }, + { + "epoch": 0.04, + "learning_rate": 4.976929259635767e-05, + "loss": 1.0813, + "step": 43373 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769281949785775e-05, + "loss": 0.8183, + "step": 43374 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769271302969366e-05, + "loss": 1.0532, + "step": 43375 + }, + { + "epoch": 0.04, + "learning_rate": 4.976926065590844e-05, + "loss": 1.064, + "step": 43376 + }, + { + "epoch": 0.04, + "learning_rate": 4.976925000860302e-05, + "loss": 1.0882, + "step": 43377 + }, + { + "epoch": 0.04, + "learning_rate": 4.976923936105308e-05, + "loss": 1.0283, + "step": 43378 + }, + { + "epoch": 0.04, + "learning_rate": 4.976922871325862e-05, + "loss": 0.874, + "step": 43379 + }, + { + "epoch": 0.04, + "learning_rate": 4.976921806521966e-05, + "loss": 1.1968, + "step": 43380 + }, + { + "epoch": 0.04, + "learning_rate": 4.976920741693618e-05, + "loss": 1.4656, + "step": 43381 + }, + { + "epoch": 0.04, + "learning_rate": 4.976919676840819e-05, + "loss": 1.0594, + "step": 43382 + }, + { + "epoch": 0.04, + "learning_rate": 4.976918611963569e-05, + "loss": 1.0304, + "step": 43383 + }, + { + "epoch": 0.04, + "learning_rate": 4.976917547061868e-05, + "loss": 1.1944, + "step": 43384 + }, + { + "epoch": 0.04, + "learning_rate": 4.976916482135716e-05, + "loss": 1.0599, + "step": 43385 + }, + { + "epoch": 0.04, + "learning_rate": 4.976915417185113e-05, + "loss": 1.0202, + "step": 43386 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769143522100594e-05, + "loss": 0.9462, + "step": 43387 + }, + { + "epoch": 0.04, + "learning_rate": 4.976913287210554e-05, + "loss": 0.5728, + "step": 43388 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769122221865974e-05, + "loss": 0.9993, + "step": 43389 + }, + { + "epoch": 0.04, + "learning_rate": 4.97691115713819e-05, + "loss": 0.9858, + "step": 43390 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769100920653315e-05, + "loss": 0.8867, + "step": 43391 + }, + { + "epoch": 0.04, + "learning_rate": 4.976909026968022e-05, + "loss": 0.9865, + "step": 43392 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769079618462624e-05, + "loss": 0.9799, + "step": 43393 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769068967000504e-05, + "loss": 0.9504, + "step": 43394 + }, + { + "epoch": 0.04, + "learning_rate": 4.976905831529388e-05, + "loss": 1.1387, + "step": 43395 + }, + { + "epoch": 0.04, + "learning_rate": 4.976904766334275e-05, + "loss": 1.1086, + "step": 43396 + }, + { + "epoch": 0.04, + "learning_rate": 4.97690370111471e-05, + "loss": 1.3523, + "step": 43397 + }, + { + "epoch": 0.04, + "learning_rate": 4.976902635870695e-05, + "loss": 1.1629, + "step": 43398 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769015706022295e-05, + "loss": 1.1671, + "step": 43399 + }, + { + "epoch": 0.04, + "learning_rate": 4.976900505309312e-05, + "loss": 0.9602, + "step": 43400 + }, + { + "epoch": 0.04, + "learning_rate": 4.976899439991944e-05, + "loss": 1.4438, + "step": 43401 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768983746501254e-05, + "loss": 1.0066, + "step": 43402 + }, + { + "epoch": 0.04, + "learning_rate": 4.976897309283855e-05, + "loss": 1.039, + "step": 43403 + }, + { + "epoch": 0.04, + "learning_rate": 4.976896243893134e-05, + "loss": 0.8197, + "step": 43404 + }, + { + "epoch": 0.04, + "learning_rate": 4.976895178477963e-05, + "loss": 1.0916, + "step": 43405 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768941130383405e-05, + "loss": 0.9929, + "step": 43406 + }, + { + "epoch": 0.04, + "learning_rate": 4.976893047574267e-05, + "loss": 1.2856, + "step": 43407 + }, + { + "epoch": 0.04, + "learning_rate": 4.976891982085743e-05, + "loss": 1.1546, + "step": 43408 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768909165727676e-05, + "loss": 1.39, + "step": 43409 + }, + { + "epoch": 0.04, + "learning_rate": 4.976889851035342e-05, + "loss": 1.222, + "step": 43410 + }, + { + "epoch": 0.04, + "learning_rate": 4.976888785473466e-05, + "loss": 1.266, + "step": 43411 + }, + { + "epoch": 0.04, + "learning_rate": 4.976887719887138e-05, + "loss": 0.9845, + "step": 43412 + }, + { + "epoch": 0.04, + "learning_rate": 4.97688665427636e-05, + "loss": 1.0552, + "step": 43413 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768855886411306e-05, + "loss": 0.9542, + "step": 43414 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768845229814506e-05, + "loss": 1.2924, + "step": 43415 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768834572973204e-05, + "loss": 1.0125, + "step": 43416 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768823915887395e-05, + "loss": 0.9609, + "step": 43417 + }, + { + "epoch": 0.04, + "learning_rate": 4.976881325855707e-05, + "loss": 1.1428, + "step": 43418 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768802600982245e-05, + "loss": 1.4471, + "step": 43419 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768791943162905e-05, + "loss": 2.2395, + "step": 43420 + }, + { + "epoch": 0.04, + "learning_rate": 4.976878128509907e-05, + "loss": 1.9473, + "step": 43421 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768770626790715e-05, + "loss": 1.3357, + "step": 43422 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768759968237865e-05, + "loss": 1.2185, + "step": 43423 + }, + { + "epoch": 0.04, + "learning_rate": 4.97687493094405e-05, + "loss": 1.1548, + "step": 43424 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768738650398633e-05, + "loss": 1.154, + "step": 43425 + }, + { + "epoch": 0.04, + "learning_rate": 4.976872799111226e-05, + "loss": 1.3293, + "step": 43426 + }, + { + "epoch": 0.04, + "learning_rate": 4.976871733158137e-05, + "loss": 1.158, + "step": 43427 + }, + { + "epoch": 0.04, + "learning_rate": 4.976870667180599e-05, + "loss": 0.7104, + "step": 43428 + }, + { + "epoch": 0.04, + "learning_rate": 4.976869601178609e-05, + "loss": 0.9788, + "step": 43429 + }, + { + "epoch": 0.04, + "learning_rate": 4.976868535152169e-05, + "loss": 0.8718, + "step": 43430 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768674691012784e-05, + "loss": 0.948, + "step": 43431 + }, + { + "epoch": 0.04, + "learning_rate": 4.976866403025937e-05, + "loss": 1.1247, + "step": 43432 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768653369261453e-05, + "loss": 1.0863, + "step": 43433 + }, + { + "epoch": 0.04, + "learning_rate": 4.976864270801903e-05, + "loss": 0.8544, + "step": 43434 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768632046532105e-05, + "loss": 0.7553, + "step": 43435 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768621384800665e-05, + "loss": 0.9312, + "step": 43436 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768610722824724e-05, + "loss": 0.8407, + "step": 43437 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768600060604275e-05, + "loss": 0.7668, + "step": 43438 + }, + { + "epoch": 0.04, + "learning_rate": 4.976858939813933e-05, + "loss": 1.123, + "step": 43439 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768578735429875e-05, + "loss": 0.7703, + "step": 43440 + }, + { + "epoch": 0.04, + "learning_rate": 4.976856807247591e-05, + "loss": 0.8329, + "step": 43441 + }, + { + "epoch": 0.04, + "learning_rate": 4.976855740927744e-05, + "loss": 1.2106, + "step": 43442 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768546745834474e-05, + "loss": 1.2767, + "step": 43443 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768536082147e-05, + "loss": 0.8608, + "step": 43444 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768525418215014e-05, + "loss": 1.0229, + "step": 43445 + }, + { + "epoch": 0.04, + "learning_rate": 4.976851475403853e-05, + "loss": 1.0519, + "step": 43446 + }, + { + "epoch": 0.04, + "learning_rate": 4.976850408961754e-05, + "loss": 1.0483, + "step": 43447 + }, + { + "epoch": 0.04, + "learning_rate": 4.976849342495205e-05, + "loss": 1.0903, + "step": 43448 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768482760042054e-05, + "loss": 0.9796, + "step": 43449 + }, + { + "epoch": 0.04, + "learning_rate": 4.976847209488755e-05, + "loss": 0.9773, + "step": 43450 + }, + { + "epoch": 0.04, + "learning_rate": 4.976846142948855e-05, + "loss": 1.3094, + "step": 43451 + }, + { + "epoch": 0.04, + "learning_rate": 4.976845076384504e-05, + "loss": 0.8922, + "step": 43452 + }, + { + "epoch": 0.04, + "learning_rate": 4.976844009795703e-05, + "loss": 1.1636, + "step": 43453 + }, + { + "epoch": 0.04, + "learning_rate": 4.976842943182451e-05, + "loss": 1.0273, + "step": 43454 + }, + { + "epoch": 0.04, + "learning_rate": 4.976841876544749e-05, + "loss": 1.0554, + "step": 43455 + }, + { + "epoch": 0.04, + "learning_rate": 4.976840809882597e-05, + "loss": 0.7267, + "step": 43456 + }, + { + "epoch": 0.04, + "learning_rate": 4.976839743195994e-05, + "loss": 0.7869, + "step": 43457 + }, + { + "epoch": 0.04, + "learning_rate": 4.976838676484941e-05, + "loss": 0.9203, + "step": 43458 + }, + { + "epoch": 0.04, + "learning_rate": 4.976837609749438e-05, + "loss": 0.9117, + "step": 43459 + }, + { + "epoch": 0.04, + "learning_rate": 4.976836542989484e-05, + "loss": 1.1166, + "step": 43460 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768354762050806e-05, + "loss": 1.0403, + "step": 43461 + }, + { + "epoch": 0.04, + "learning_rate": 4.976834409396226e-05, + "loss": 1.2187, + "step": 43462 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768333425629224e-05, + "loss": 1.0482, + "step": 43463 + }, + { + "epoch": 0.04, + "learning_rate": 4.976832275705167e-05, + "loss": 1.0052, + "step": 43464 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768312088229624e-05, + "loss": 0.7613, + "step": 43465 + }, + { + "epoch": 0.04, + "learning_rate": 4.976830141916307e-05, + "loss": 1.0331, + "step": 43466 + }, + { + "epoch": 0.04, + "learning_rate": 4.976829074985202e-05, + "loss": 1.204, + "step": 43467 + }, + { + "epoch": 0.04, + "learning_rate": 4.976828008029646e-05, + "loss": 1.1321, + "step": 43468 + }, + { + "epoch": 0.04, + "learning_rate": 4.97682694104964e-05, + "loss": 0.9122, + "step": 43469 + }, + { + "epoch": 0.04, + "learning_rate": 4.976825874045184e-05, + "loss": 0.6385, + "step": 43470 + }, + { + "epoch": 0.04, + "learning_rate": 4.976824807016278e-05, + "loss": 0.4752, + "step": 43471 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768237399629227e-05, + "loss": 1.3389, + "step": 43472 + }, + { + "epoch": 0.04, + "learning_rate": 4.976822672885115e-05, + "loss": 0.7905, + "step": 43473 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768216057828585e-05, + "loss": 1.0033, + "step": 43474 + }, + { + "epoch": 0.04, + "learning_rate": 4.976820538656152e-05, + "loss": 0.9459, + "step": 43475 + }, + { + "epoch": 0.04, + "learning_rate": 4.976819471504995e-05, + "loss": 1.3259, + "step": 43476 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768184043293885e-05, + "loss": 1.13, + "step": 43477 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768173371293316e-05, + "loss": 1.0591, + "step": 43478 + }, + { + "epoch": 0.04, + "learning_rate": 4.976816269904824e-05, + "loss": 0.9291, + "step": 43479 + }, + { + "epoch": 0.04, + "learning_rate": 4.976815202655867e-05, + "loss": 0.7991, + "step": 43480 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768141353824596e-05, + "loss": 0.9786, + "step": 43481 + }, + { + "epoch": 0.04, + "learning_rate": 4.976813068084602e-05, + "loss": 1.1274, + "step": 43482 + }, + { + "epoch": 0.04, + "learning_rate": 4.976812000762294e-05, + "loss": 1.2593, + "step": 43483 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768109334155365e-05, + "loss": 1.2078, + "step": 43484 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768098660443295e-05, + "loss": 1.1569, + "step": 43485 + }, + { + "epoch": 0.04, + "learning_rate": 4.976808798648672e-05, + "loss": 0.9868, + "step": 43486 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768077312285644e-05, + "loss": 0.7747, + "step": 43487 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768066637840064e-05, + "loss": 1.0812, + "step": 43488 + }, + { + "epoch": 0.04, + "learning_rate": 4.976805596314999e-05, + "loss": 1.1331, + "step": 43489 + }, + { + "epoch": 0.04, + "learning_rate": 4.976804528821541e-05, + "loss": 1.0333, + "step": 43490 + }, + { + "epoch": 0.04, + "learning_rate": 4.976803461303634e-05, + "loss": 1.0944, + "step": 43491 + }, + { + "epoch": 0.04, + "learning_rate": 4.9768023937612765e-05, + "loss": 1.0108, + "step": 43492 + }, + { + "epoch": 0.04, + "learning_rate": 4.976801326194469e-05, + "loss": 1.0759, + "step": 43493 + }, + { + "epoch": 0.04, + "learning_rate": 4.976800258603211e-05, + "loss": 1.0968, + "step": 43494 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767991909875044e-05, + "loss": 1.0592, + "step": 43495 + }, + { + "epoch": 0.04, + "learning_rate": 4.976798123347347e-05, + "loss": 0.9029, + "step": 43496 + }, + { + "epoch": 0.04, + "learning_rate": 4.97679705568274e-05, + "loss": 1.0784, + "step": 43497 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767959879936826e-05, + "loss": 1.2865, + "step": 43498 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767949202801753e-05, + "loss": 0.4135, + "step": 43499 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767938525422187e-05, + "loss": 1.0818, + "step": 43500 + }, + { + "epoch": 0.04, + "eval_loss": 1.0793282985687256, + "eval_runtime": 128.3709, + "eval_samples_per_second": 10.789, + "eval_steps_per_second": 5.398, + "step": 43500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767927847798125e-05, + "loss": 0.1879, + "step": 43501 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767917169929556e-05, + "loss": 0.1801, + "step": 43502 + }, + { + "epoch": 0.04, + "learning_rate": 4.97679064918165e-05, + "loss": 0.1623, + "step": 43503 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767895813458934e-05, + "loss": 0.3367, + "step": 43504 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767885134856875e-05, + "loss": 0.26, + "step": 43505 + }, + { + "epoch": 0.04, + "learning_rate": 4.976787445601031e-05, + "loss": 0.3412, + "step": 43506 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767863776919253e-05, + "loss": 0.1209, + "step": 43507 + }, + { + "epoch": 0.04, + "learning_rate": 4.97678530975837e-05, + "loss": 1.1175, + "step": 43508 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767842418003654e-05, + "loss": 1.0668, + "step": 43509 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767831738179096e-05, + "loss": 1.1523, + "step": 43510 + }, + { + "epoch": 0.04, + "learning_rate": 4.976782105811005e-05, + "loss": 1.2666, + "step": 43511 + }, + { + "epoch": 0.04, + "learning_rate": 4.976781037779651e-05, + "loss": 1.3846, + "step": 43512 + }, + { + "epoch": 0.04, + "learning_rate": 4.976779969723846e-05, + "loss": 0.959, + "step": 43513 + }, + { + "epoch": 0.04, + "learning_rate": 4.976778901643593e-05, + "loss": 0.9137, + "step": 43514 + }, + { + "epoch": 0.04, + "learning_rate": 4.976777833538889e-05, + "loss": 0.9293, + "step": 43515 + }, + { + "epoch": 0.04, + "learning_rate": 4.976776765409735e-05, + "loss": 1.0659, + "step": 43516 + }, + { + "epoch": 0.04, + "learning_rate": 4.976775697256132e-05, + "loss": 1.0196, + "step": 43517 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767746290780794e-05, + "loss": 1.2368, + "step": 43518 + }, + { + "epoch": 0.04, + "learning_rate": 4.976773560875577e-05, + "loss": 2.7937, + "step": 43519 + }, + { + "epoch": 0.04, + "learning_rate": 4.976772492648625e-05, + "loss": 3.4842, + "step": 43520 + }, + { + "epoch": 0.04, + "learning_rate": 4.976771424397223e-05, + "loss": 1.2734, + "step": 43521 + }, + { + "epoch": 0.04, + "learning_rate": 4.976770356121372e-05, + "loss": 1.2395, + "step": 43522 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767692878210705e-05, + "loss": 1.0688, + "step": 43523 + }, + { + "epoch": 0.04, + "learning_rate": 4.97676821949632e-05, + "loss": 0.8779, + "step": 43524 + }, + { + "epoch": 0.04, + "learning_rate": 4.97676715114712e-05, + "loss": 1.2659, + "step": 43525 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767660827734696e-05, + "loss": 1.2546, + "step": 43526 + }, + { + "epoch": 0.04, + "learning_rate": 4.97676501437537e-05, + "loss": 0.818, + "step": 43527 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767639459528216e-05, + "loss": 1.1962, + "step": 43528 + }, + { + "epoch": 0.04, + "learning_rate": 4.976762877505823e-05, + "loss": 1.8127, + "step": 43529 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767618090343745e-05, + "loss": 1.1876, + "step": 43530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767607405384765e-05, + "loss": 1.1346, + "step": 43531 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767596720181297e-05, + "loss": 1.1363, + "step": 43532 + }, + { + "epoch": 0.04, + "learning_rate": 4.976758603473333e-05, + "loss": 0.9584, + "step": 43533 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767575349040864e-05, + "loss": 1.0591, + "step": 43534 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767564663103906e-05, + "loss": 0.8256, + "step": 43535 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767553976922454e-05, + "loss": 1.2606, + "step": 43536 + }, + { + "epoch": 0.04, + "learning_rate": 4.976754329049651e-05, + "loss": 1.3134, + "step": 43537 + }, + { + "epoch": 0.04, + "learning_rate": 4.976753260382606e-05, + "loss": 1.1611, + "step": 43538 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767521916911124e-05, + "loss": 1.1756, + "step": 43539 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767511229751694e-05, + "loss": 1.0008, + "step": 43540 + }, + { + "epoch": 0.04, + "learning_rate": 4.976750054234777e-05, + "loss": 0.9684, + "step": 43541 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767489854699345e-05, + "loss": 1.092, + "step": 43542 + }, + { + "epoch": 0.04, + "learning_rate": 4.976747916680643e-05, + "loss": 0.985, + "step": 43543 + }, + { + "epoch": 0.04, + "learning_rate": 4.976746847866902e-05, + "loss": 0.7568, + "step": 43544 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767457790287117e-05, + "loss": 0.8494, + "step": 43545 + }, + { + "epoch": 0.04, + "learning_rate": 4.976744710166073e-05, + "loss": 0.9788, + "step": 43546 + }, + { + "epoch": 0.04, + "learning_rate": 4.976743641278983e-05, + "loss": 1.0072, + "step": 43547 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767425723674446e-05, + "loss": 0.8792, + "step": 43548 + }, + { + "epoch": 0.04, + "learning_rate": 4.976741503431457e-05, + "loss": 1.2105, + "step": 43549 + }, + { + "epoch": 0.04, + "learning_rate": 4.976740434471019e-05, + "loss": 1.1288, + "step": 43550 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767393654861326e-05, + "loss": 1.3869, + "step": 43551 + }, + { + "epoch": 0.04, + "learning_rate": 4.976738296476797e-05, + "loss": 1.0783, + "step": 43552 + }, + { + "epoch": 0.04, + "learning_rate": 4.976737227443013e-05, + "loss": 0.9991, + "step": 43553 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767361583847776e-05, + "loss": 1.214, + "step": 43554 + }, + { + "epoch": 0.04, + "learning_rate": 4.976735089302094e-05, + "loss": 0.2514, + "step": 43555 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767340201949605e-05, + "loss": 0.1255, + "step": 43556 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767329510633785e-05, + "loss": 0.5974, + "step": 43557 + }, + { + "epoch": 0.04, + "learning_rate": 4.976731881907347e-05, + "loss": 0.4185, + "step": 43558 + }, + { + "epoch": 0.04, + "learning_rate": 4.976730812726866e-05, + "loss": 0.6316, + "step": 43559 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767297435219364e-05, + "loss": 0.2231, + "step": 43560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767286742925566e-05, + "loss": 0.1719, + "step": 43561 + }, + { + "epoch": 0.04, + "learning_rate": 4.976727605038728e-05, + "loss": 0.1217, + "step": 43562 + }, + { + "epoch": 0.04, + "learning_rate": 4.976726535760451e-05, + "loss": 0.1949, + "step": 43563 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767254664577234e-05, + "loss": 0.114, + "step": 43564 + }, + { + "epoch": 0.04, + "learning_rate": 4.976724397130548e-05, + "loss": 0.1612, + "step": 43565 + }, + { + "epoch": 0.04, + "learning_rate": 4.976723327778922e-05, + "loss": 0.2137, + "step": 43566 + }, + { + "epoch": 0.04, + "learning_rate": 4.976722258402848e-05, + "loss": 0.1427, + "step": 43567 + }, + { + "epoch": 0.04, + "learning_rate": 4.976721189002324e-05, + "loss": 0.2148, + "step": 43568 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767201195773515e-05, + "loss": 0.7083, + "step": 43569 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767190501279295e-05, + "loss": 0.7204, + "step": 43570 + }, + { + "epoch": 0.04, + "learning_rate": 4.976717980654059e-05, + "loss": 1.0645, + "step": 43571 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767169111557385e-05, + "loss": 1.0205, + "step": 43572 + }, + { + "epoch": 0.04, + "learning_rate": 4.976715841632969e-05, + "loss": 1.1838, + "step": 43573 + }, + { + "epoch": 0.04, + "learning_rate": 4.976714772085751e-05, + "loss": 0.9784, + "step": 43574 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767137025140834e-05, + "loss": 0.9001, + "step": 43575 + }, + { + "epoch": 0.04, + "learning_rate": 4.976712632917967e-05, + "loss": 0.8889, + "step": 43576 + }, + { + "epoch": 0.04, + "learning_rate": 4.976711563297401e-05, + "loss": 1.3232, + "step": 43577 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767104936523866e-05, + "loss": 1.0028, + "step": 43578 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767094239829224e-05, + "loss": 0.9124, + "step": 43579 + }, + { + "epoch": 0.04, + "learning_rate": 4.97670835428901e-05, + "loss": 1.2947, + "step": 43580 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767072845706484e-05, + "loss": 1.0311, + "step": 43581 + }, + { + "epoch": 0.04, + "learning_rate": 4.976706214827838e-05, + "loss": 0.7185, + "step": 43582 + }, + { + "epoch": 0.04, + "learning_rate": 4.976705145060578e-05, + "loss": 1.1766, + "step": 43583 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767040752688685e-05, + "loss": 0.8214, + "step": 43584 + }, + { + "epoch": 0.04, + "learning_rate": 4.976703005452711e-05, + "loss": 0.5731, + "step": 43585 + }, + { + "epoch": 0.04, + "learning_rate": 4.976701935612105e-05, + "loss": 1.1233, + "step": 43586 + }, + { + "epoch": 0.04, + "learning_rate": 4.976700865747049e-05, + "loss": 0.793, + "step": 43587 + }, + { + "epoch": 0.04, + "learning_rate": 4.976699795857545e-05, + "loss": 1.0227, + "step": 43588 + }, + { + "epoch": 0.04, + "learning_rate": 4.976698725943591e-05, + "loss": 1.1212, + "step": 43589 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766976560051884e-05, + "loss": 1.0604, + "step": 43590 + }, + { + "epoch": 0.04, + "learning_rate": 4.976696586042338e-05, + "loss": 0.9397, + "step": 43591 + }, + { + "epoch": 0.04, + "learning_rate": 4.976695516055037e-05, + "loss": 1.0859, + "step": 43592 + }, + { + "epoch": 0.04, + "learning_rate": 4.976694446043287e-05, + "loss": 1.4275, + "step": 43593 + }, + { + "epoch": 0.04, + "learning_rate": 4.97669337600709e-05, + "loss": 1.1398, + "step": 43594 + }, + { + "epoch": 0.04, + "learning_rate": 4.976692305946443e-05, + "loss": 0.8304, + "step": 43595 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766912358613474e-05, + "loss": 0.9305, + "step": 43596 + }, + { + "epoch": 0.04, + "learning_rate": 4.976690165751803e-05, + "loss": 1.1256, + "step": 43597 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766890956178095e-05, + "loss": 0.9688, + "step": 43598 + }, + { + "epoch": 0.04, + "learning_rate": 4.976688025459367e-05, + "loss": 1.6875, + "step": 43599 + }, + { + "epoch": 0.04, + "learning_rate": 4.976686955276476e-05, + "loss": 1.0296, + "step": 43600 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766858850691355e-05, + "loss": 1.1038, + "step": 43601 + }, + { + "epoch": 0.04, + "learning_rate": 4.976684814837347e-05, + "loss": 0.9567, + "step": 43602 + }, + { + "epoch": 0.04, + "learning_rate": 4.976683744581109e-05, + "loss": 0.8703, + "step": 43603 + }, + { + "epoch": 0.04, + "learning_rate": 4.976682674300423e-05, + "loss": 1.205, + "step": 43604 + }, + { + "epoch": 0.04, + "learning_rate": 4.976681603995288e-05, + "loss": 0.9705, + "step": 43605 + }, + { + "epoch": 0.04, + "learning_rate": 4.976680533665704e-05, + "loss": 0.8716, + "step": 43606 + }, + { + "epoch": 0.04, + "learning_rate": 4.976679463311672e-05, + "loss": 1.1162, + "step": 43607 + }, + { + "epoch": 0.04, + "learning_rate": 4.976678392933191e-05, + "loss": 1.2164, + "step": 43608 + }, + { + "epoch": 0.04, + "learning_rate": 4.97667732253026e-05, + "loss": 1.3875, + "step": 43609 + }, + { + "epoch": 0.04, + "learning_rate": 4.976676252102881e-05, + "loss": 1.1021, + "step": 43610 + }, + { + "epoch": 0.04, + "learning_rate": 4.976675181651054e-05, + "loss": 2.9305, + "step": 43611 + }, + { + "epoch": 0.04, + "learning_rate": 4.976674111174778e-05, + "loss": 3.055, + "step": 43612 + }, + { + "epoch": 0.04, + "learning_rate": 4.976673040674053e-05, + "loss": 1.3167, + "step": 43613 + }, + { + "epoch": 0.04, + "learning_rate": 4.97667197014888e-05, + "loss": 1.2186, + "step": 43614 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766708995992576e-05, + "loss": 0.8302, + "step": 43615 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766698290251866e-05, + "loss": 1.2113, + "step": 43616 + }, + { + "epoch": 0.04, + "learning_rate": 4.976668758426667e-05, + "loss": 1.0783, + "step": 43617 + }, + { + "epoch": 0.04, + "learning_rate": 4.976667687803699e-05, + "loss": 0.6285, + "step": 43618 + }, + { + "epoch": 0.04, + "learning_rate": 4.976666617156282e-05, + "loss": 0.8071, + "step": 43619 + }, + { + "epoch": 0.04, + "learning_rate": 4.976665546484417e-05, + "loss": 1.3871, + "step": 43620 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766644757881035e-05, + "loss": 1.1133, + "step": 43621 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766634050673406e-05, + "loss": 0.5011, + "step": 43622 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766623343221296e-05, + "loss": 0.9902, + "step": 43623 + }, + { + "epoch": 0.04, + "learning_rate": 4.97666126355247e-05, + "loss": 0.9382, + "step": 43624 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766601927583614e-05, + "loss": 1.0122, + "step": 43625 + }, + { + "epoch": 0.04, + "learning_rate": 4.976659121939805e-05, + "loss": 0.9174, + "step": 43626 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766580510967995e-05, + "loss": 1.1385, + "step": 43627 + }, + { + "epoch": 0.04, + "learning_rate": 4.976656980229346e-05, + "loss": 1.3703, + "step": 43628 + }, + { + "epoch": 0.04, + "learning_rate": 4.976655909337443e-05, + "loss": 1.121, + "step": 43629 + }, + { + "epoch": 0.04, + "learning_rate": 4.976654838421092e-05, + "loss": 1.3205, + "step": 43630 + }, + { + "epoch": 0.04, + "learning_rate": 4.976653767480293e-05, + "loss": 0.7371, + "step": 43631 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766526965150455e-05, + "loss": 0.8441, + "step": 43632 + }, + { + "epoch": 0.04, + "learning_rate": 4.976651625525349e-05, + "loss": 1.0329, + "step": 43633 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766505545112043e-05, + "loss": 1.1377, + "step": 43634 + }, + { + "epoch": 0.04, + "learning_rate": 4.976649483472611e-05, + "loss": 1.1663, + "step": 43635 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766484124095695e-05, + "loss": 1.2139, + "step": 43636 + }, + { + "epoch": 0.04, + "learning_rate": 4.976647341322079e-05, + "loss": 0.9001, + "step": 43637 + }, + { + "epoch": 0.04, + "learning_rate": 4.976646270210141e-05, + "loss": 0.8858, + "step": 43638 + }, + { + "epoch": 0.04, + "learning_rate": 4.976645199073754e-05, + "loss": 0.8807, + "step": 43639 + }, + { + "epoch": 0.04, + "learning_rate": 4.976644127912919e-05, + "loss": 0.7296, + "step": 43640 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766430567276356e-05, + "loss": 0.7851, + "step": 43641 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766419855179036e-05, + "loss": 1.1657, + "step": 43642 + }, + { + "epoch": 0.04, + "learning_rate": 4.976640914283723e-05, + "loss": 0.9732, + "step": 43643 + }, + { + "epoch": 0.04, + "learning_rate": 4.976639843025094e-05, + "loss": 0.7124, + "step": 43644 + }, + { + "epoch": 0.04, + "learning_rate": 4.976638771742017e-05, + "loss": 0.1874, + "step": 43645 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766377004344915e-05, + "loss": 0.2172, + "step": 43646 + }, + { + "epoch": 0.04, + "learning_rate": 4.976636629102518e-05, + "loss": 0.1747, + "step": 43647 + }, + { + "epoch": 0.04, + "learning_rate": 4.976635557746095e-05, + "loss": 0.3386, + "step": 43648 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766344863652254e-05, + "loss": 0.3336, + "step": 43649 + }, + { + "epoch": 0.04, + "learning_rate": 4.976633414959907e-05, + "loss": 0.1135, + "step": 43650 + }, + { + "epoch": 0.04, + "learning_rate": 4.97663234353014e-05, + "loss": 0.1555, + "step": 43651 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766312720759245e-05, + "loss": 0.2353, + "step": 43652 + }, + { + "epoch": 0.04, + "learning_rate": 4.976630200597261e-05, + "loss": 0.1556, + "step": 43653 + }, + { + "epoch": 0.04, + "learning_rate": 4.976629129094149e-05, + "loss": 0.2056, + "step": 43654 + }, + { + "epoch": 0.04, + "learning_rate": 4.976628057566589e-05, + "loss": 0.1185, + "step": 43655 + }, + { + "epoch": 0.04, + "learning_rate": 4.976626986014581e-05, + "loss": 0.4746, + "step": 43656 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766259144381244e-05, + "loss": 1.046, + "step": 43657 + }, + { + "epoch": 0.04, + "learning_rate": 4.97662484283722e-05, + "loss": 1.1846, + "step": 43658 + }, + { + "epoch": 0.04, + "learning_rate": 4.976623771211867e-05, + "loss": 1.4738, + "step": 43659 + }, + { + "epoch": 0.04, + "learning_rate": 4.976622699562066e-05, + "loss": 0.9828, + "step": 43660 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766216278878166e-05, + "loss": 0.8722, + "step": 43661 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766205561891196e-05, + "loss": 1.0096, + "step": 43662 + }, + { + "epoch": 0.04, + "learning_rate": 4.976619484465974e-05, + "loss": 1.2449, + "step": 43663 + }, + { + "epoch": 0.04, + "learning_rate": 4.97661841271838e-05, + "loss": 1.1459, + "step": 43664 + }, + { + "epoch": 0.04, + "learning_rate": 4.976617340946339e-05, + "loss": 1.0648, + "step": 43665 + }, + { + "epoch": 0.04, + "learning_rate": 4.976616269149849e-05, + "loss": 0.955, + "step": 43666 + }, + { + "epoch": 0.04, + "learning_rate": 4.976615197328911e-05, + "loss": 1.3172, + "step": 43667 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766141254835246e-05, + "loss": 0.8297, + "step": 43668 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766130536136903e-05, + "loss": 0.9204, + "step": 43669 + }, + { + "epoch": 0.04, + "learning_rate": 4.976611981719408e-05, + "loss": 0.8686, + "step": 43670 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766109098006776e-05, + "loss": 1.0163, + "step": 43671 + }, + { + "epoch": 0.04, + "learning_rate": 4.976609837857499e-05, + "loss": 1.039, + "step": 43672 + }, + { + "epoch": 0.04, + "learning_rate": 4.976608765889873e-05, + "loss": 1.1423, + "step": 43673 + }, + { + "epoch": 0.04, + "learning_rate": 4.976607693897798e-05, + "loss": 1.1242, + "step": 43674 + }, + { + "epoch": 0.04, + "learning_rate": 4.976606621881276e-05, + "loss": 0.776, + "step": 43675 + }, + { + "epoch": 0.04, + "learning_rate": 4.976605549840305e-05, + "loss": 1.1123, + "step": 43676 + }, + { + "epoch": 0.04, + "learning_rate": 4.976604477774887e-05, + "loss": 1.0901, + "step": 43677 + }, + { + "epoch": 0.04, + "learning_rate": 4.97660340568502e-05, + "loss": 1.3775, + "step": 43678 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766023335707055e-05, + "loss": 0.8808, + "step": 43679 + }, + { + "epoch": 0.04, + "learning_rate": 4.976601261431943e-05, + "loss": 0.6574, + "step": 43680 + }, + { + "epoch": 0.04, + "learning_rate": 4.9766001892687325e-05, + "loss": 0.817, + "step": 43681 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765991170810746e-05, + "loss": 0.3532, + "step": 43682 + }, + { + "epoch": 0.04, + "learning_rate": 4.976598044868967e-05, + "loss": 0.4493, + "step": 43683 + }, + { + "epoch": 0.04, + "learning_rate": 4.976596972632414e-05, + "loss": 0.4478, + "step": 43684 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765959003714116e-05, + "loss": 0.6675, + "step": 43685 + }, + { + "epoch": 0.04, + "learning_rate": 4.976594828085961e-05, + "loss": 0.9616, + "step": 43686 + }, + { + "epoch": 0.04, + "learning_rate": 4.976593755776063e-05, + "loss": 1.0865, + "step": 43687 + }, + { + "epoch": 0.04, + "learning_rate": 4.976592683441717e-05, + "loss": 1.2101, + "step": 43688 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765916110829234e-05, + "loss": 1.1922, + "step": 43689 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765905386996815e-05, + "loss": 1.0531, + "step": 43690 + }, + { + "epoch": 0.04, + "learning_rate": 4.976589466291992e-05, + "loss": 0.8962, + "step": 43691 + }, + { + "epoch": 0.04, + "learning_rate": 4.976588393859855e-05, + "loss": 1.0141, + "step": 43692 + }, + { + "epoch": 0.04, + "learning_rate": 4.976587321403269e-05, + "loss": 1.0096, + "step": 43693 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765862489222364e-05, + "loss": 1.0671, + "step": 43694 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765851764167555e-05, + "loss": 1.0529, + "step": 43695 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765841038868264e-05, + "loss": 0.8222, + "step": 43696 + }, + { + "epoch": 0.04, + "learning_rate": 4.97658303133245e-05, + "loss": 0.517, + "step": 43697 + }, + { + "epoch": 0.04, + "learning_rate": 4.976581958753626e-05, + "loss": 0.3466, + "step": 43698 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765808861503535e-05, + "loss": 0.7992, + "step": 43699 + }, + { + "epoch": 0.04, + "learning_rate": 4.976579813522634e-05, + "loss": 1.1095, + "step": 43700 + }, + { + "epoch": 0.04, + "learning_rate": 4.976578740870467e-05, + "loss": 0.9834, + "step": 43701 + }, + { + "epoch": 0.04, + "learning_rate": 4.976577668193851e-05, + "loss": 0.9441, + "step": 43702 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765765954927884e-05, + "loss": 0.3046, + "step": 43703 + }, + { + "epoch": 0.04, + "learning_rate": 4.976575522767278e-05, + "loss": 0.5088, + "step": 43704 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765744500173197e-05, + "loss": 0.4917, + "step": 43705 + }, + { + "epoch": 0.04, + "learning_rate": 4.976573377242913e-05, + "loss": 0.7988, + "step": 43706 + }, + { + "epoch": 0.04, + "learning_rate": 4.976572304444059e-05, + "loss": 0.9351, + "step": 43707 + }, + { + "epoch": 0.04, + "learning_rate": 4.976571231620758e-05, + "loss": 0.6943, + "step": 43708 + }, + { + "epoch": 0.04, + "learning_rate": 4.976570158773009e-05, + "loss": 0.7016, + "step": 43709 + }, + { + "epoch": 0.04, + "learning_rate": 4.976569085900812e-05, + "loss": 0.6739, + "step": 43710 + }, + { + "epoch": 0.04, + "learning_rate": 4.976568013004167e-05, + "loss": 0.7035, + "step": 43711 + }, + { + "epoch": 0.04, + "learning_rate": 4.976566940083075e-05, + "loss": 0.9589, + "step": 43712 + }, + { + "epoch": 0.04, + "learning_rate": 4.976565867137535e-05, + "loss": 1.0234, + "step": 43713 + }, + { + "epoch": 0.04, + "learning_rate": 4.976564794167548e-05, + "loss": 0.7244, + "step": 43714 + }, + { + "epoch": 0.04, + "learning_rate": 4.976563721173113e-05, + "loss": 0.8438, + "step": 43715 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765626481542304e-05, + "loss": 0.8664, + "step": 43716 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765615751109005e-05, + "loss": 1.0183, + "step": 43717 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765605020431225e-05, + "loss": 1.0794, + "step": 43718 + }, + { + "epoch": 0.04, + "learning_rate": 4.976559428950898e-05, + "loss": 1.092, + "step": 43719 + }, + { + "epoch": 0.04, + "learning_rate": 4.976558355834224e-05, + "loss": 1.0902, + "step": 43720 + }, + { + "epoch": 0.04, + "learning_rate": 4.976557282693104e-05, + "loss": 1.2289, + "step": 43721 + }, + { + "epoch": 0.04, + "learning_rate": 4.976556209527536e-05, + "loss": 1.2143, + "step": 43722 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765551363375206e-05, + "loss": 0.9051, + "step": 43723 + }, + { + "epoch": 0.04, + "learning_rate": 4.976554063123058e-05, + "loss": 0.9361, + "step": 43724 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765529898841476e-05, + "loss": 1.3687, + "step": 43725 + }, + { + "epoch": 0.04, + "learning_rate": 4.97655191662079e-05, + "loss": 0.7206, + "step": 43726 + }, + { + "epoch": 0.04, + "learning_rate": 4.976550843332984e-05, + "loss": 0.9736, + "step": 43727 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765497700207316e-05, + "loss": 0.8953, + "step": 43728 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765486966840314e-05, + "loss": 0.9616, + "step": 43729 + }, + { + "epoch": 0.04, + "learning_rate": 4.976547623322884e-05, + "loss": 1.3716, + "step": 43730 + }, + { + "epoch": 0.04, + "learning_rate": 4.976546549937288e-05, + "loss": 0.8412, + "step": 43731 + }, + { + "epoch": 0.04, + "learning_rate": 4.976545476527246e-05, + "loss": 0.9644, + "step": 43732 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765444030927555e-05, + "loss": 1.2394, + "step": 43733 + }, + { + "epoch": 0.04, + "learning_rate": 4.976543329633818e-05, + "loss": 0.848, + "step": 43734 + }, + { + "epoch": 0.04, + "learning_rate": 4.976542256150433e-05, + "loss": 1.0036, + "step": 43735 + }, + { + "epoch": 0.04, + "learning_rate": 4.976541182642601e-05, + "loss": 0.2354, + "step": 43736 + }, + { + "epoch": 0.04, + "learning_rate": 4.976540109110322e-05, + "loss": 0.7882, + "step": 43737 + }, + { + "epoch": 0.04, + "learning_rate": 4.976539035553594e-05, + "loss": 1.1529, + "step": 43738 + }, + { + "epoch": 0.04, + "learning_rate": 4.976537961972421e-05, + "loss": 1.4231, + "step": 43739 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765368883667985e-05, + "loss": 1.1107, + "step": 43740 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765358147367295e-05, + "loss": 0.8821, + "step": 43741 + }, + { + "epoch": 0.04, + "learning_rate": 4.976534741082214e-05, + "loss": 1.2543, + "step": 43742 + }, + { + "epoch": 0.04, + "learning_rate": 4.97653366740325e-05, + "loss": 0.9341, + "step": 43743 + }, + { + "epoch": 0.04, + "learning_rate": 4.976532593699839e-05, + "loss": 0.8614, + "step": 43744 + }, + { + "epoch": 0.04, + "learning_rate": 4.976531519971981e-05, + "loss": 1.0374, + "step": 43745 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765304462196755e-05, + "loss": 1.0868, + "step": 43746 + }, + { + "epoch": 0.04, + "learning_rate": 4.976529372442923e-05, + "loss": 0.9329, + "step": 43747 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765282986417227e-05, + "loss": 1.1294, + "step": 43748 + }, + { + "epoch": 0.04, + "learning_rate": 4.976527224816076e-05, + "loss": 0.9225, + "step": 43749 + }, + { + "epoch": 0.04, + "learning_rate": 4.976526150965981e-05, + "loss": 0.7093, + "step": 43750 + }, + { + "epoch": 0.04, + "learning_rate": 4.97652507709144e-05, + "loss": 1.0579, + "step": 43751 + }, + { + "epoch": 0.04, + "learning_rate": 4.976524003192451e-05, + "loss": 1.1565, + "step": 43752 + }, + { + "epoch": 0.04, + "learning_rate": 4.976522929269015e-05, + "loss": 1.0652, + "step": 43753 + }, + { + "epoch": 0.04, + "learning_rate": 4.976521855321131e-05, + "loss": 1.2769, + "step": 43754 + }, + { + "epoch": 0.04, + "learning_rate": 4.976520781348801e-05, + "loss": 1.0554, + "step": 43755 + }, + { + "epoch": 0.04, + "learning_rate": 4.976519707352023e-05, + "loss": 1.3024, + "step": 43756 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765186333307984e-05, + "loss": 0.9108, + "step": 43757 + }, + { + "epoch": 0.04, + "learning_rate": 4.976517559285126e-05, + "loss": 0.9215, + "step": 43758 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765164852150074e-05, + "loss": 0.8495, + "step": 43759 + }, + { + "epoch": 0.04, + "learning_rate": 4.976515411120442e-05, + "loss": 0.8444, + "step": 43760 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765143370014274e-05, + "loss": 1.2747, + "step": 43761 + }, + { + "epoch": 0.04, + "learning_rate": 4.976513262857968e-05, + "loss": 0.9885, + "step": 43762 + }, + { + "epoch": 0.04, + "learning_rate": 4.97651218869006e-05, + "loss": 0.6006, + "step": 43763 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765111144977054e-05, + "loss": 0.4887, + "step": 43764 + }, + { + "epoch": 0.04, + "learning_rate": 4.976510040280904e-05, + "loss": 1.2415, + "step": 43765 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765089660396555e-05, + "loss": 0.3332, + "step": 43766 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765078917739594e-05, + "loss": 0.1436, + "step": 43767 + }, + { + "epoch": 0.04, + "learning_rate": 4.976506817483817e-05, + "loss": 0.4052, + "step": 43768 + }, + { + "epoch": 0.04, + "learning_rate": 4.976505743169227e-05, + "loss": 1.0858, + "step": 43769 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765046688301895e-05, + "loss": 0.9192, + "step": 43770 + }, + { + "epoch": 0.04, + "learning_rate": 4.976503594466706e-05, + "loss": 0.757, + "step": 43771 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765025200787755e-05, + "loss": 1.1626, + "step": 43772 + }, + { + "epoch": 0.04, + "learning_rate": 4.976501445666397e-05, + "loss": 1.147, + "step": 43773 + }, + { + "epoch": 0.04, + "learning_rate": 4.976500371229572e-05, + "loss": 1.1713, + "step": 43774 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764992967683006e-05, + "loss": 0.8769, + "step": 43775 + }, + { + "epoch": 0.04, + "learning_rate": 4.976498222282582e-05, + "loss": 0.9963, + "step": 43776 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764971477724166e-05, + "loss": 0.9176, + "step": 43777 + }, + { + "epoch": 0.04, + "learning_rate": 4.976496073237804e-05, + "loss": 1.0045, + "step": 43778 + }, + { + "epoch": 0.04, + "learning_rate": 4.976494998678744e-05, + "loss": 1.1725, + "step": 43779 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764939240952374e-05, + "loss": 0.7227, + "step": 43780 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764928494872845e-05, + "loss": 0.7603, + "step": 43781 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764917748548835e-05, + "loss": 1.1116, + "step": 43782 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764907001980364e-05, + "loss": 0.9965, + "step": 43783 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764896255167426e-05, + "loss": 0.9883, + "step": 43784 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764885508110014e-05, + "loss": 1.0498, + "step": 43785 + }, + { + "epoch": 0.04, + "learning_rate": 4.976487476080814e-05, + "loss": 0.8829, + "step": 43786 + }, + { + "epoch": 0.04, + "learning_rate": 4.976486401326179e-05, + "loss": 0.6126, + "step": 43787 + }, + { + "epoch": 0.04, + "learning_rate": 4.976485326547098e-05, + "loss": 0.6918, + "step": 43788 + }, + { + "epoch": 0.04, + "learning_rate": 4.976484251743569e-05, + "loss": 0.8142, + "step": 43789 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764831769155945e-05, + "loss": 0.9374, + "step": 43790 + }, + { + "epoch": 0.04, + "learning_rate": 4.976482102063172e-05, + "loss": 0.7676, + "step": 43791 + }, + { + "epoch": 0.04, + "learning_rate": 4.976481027186304e-05, + "loss": 0.6398, + "step": 43792 + }, + { + "epoch": 0.04, + "learning_rate": 4.976479952284989e-05, + "loss": 1.0641, + "step": 43793 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764788773592266e-05, + "loss": 1.1572, + "step": 43794 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764778024090174e-05, + "loss": 1.2586, + "step": 43795 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764767274343615e-05, + "loss": 0.757, + "step": 43796 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764756524352596e-05, + "loss": 1.1803, + "step": 43797 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764745774117096e-05, + "loss": 1.3283, + "step": 43798 + }, + { + "epoch": 0.04, + "learning_rate": 4.976473502363714e-05, + "loss": 0.9015, + "step": 43799 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764724272912715e-05, + "loss": 1.1698, + "step": 43800 + }, + { + "epoch": 0.04, + "learning_rate": 4.976471352194382e-05, + "loss": 1.2405, + "step": 43801 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764702770730465e-05, + "loss": 1.131, + "step": 43802 + }, + { + "epoch": 0.04, + "learning_rate": 4.976469201927264e-05, + "loss": 0.8027, + "step": 43803 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764681267570345e-05, + "loss": 1.1454, + "step": 43804 + }, + { + "epoch": 0.04, + "learning_rate": 4.976467051562359e-05, + "loss": 1.0556, + "step": 43805 + }, + { + "epoch": 0.04, + "learning_rate": 4.976465976343236e-05, + "loss": 1.0092, + "step": 43806 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764649010996665e-05, + "loss": 1.6903, + "step": 43807 + }, + { + "epoch": 0.04, + "learning_rate": 4.976463825831651e-05, + "loss": 0.9227, + "step": 43808 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764627505391886e-05, + "loss": 0.9879, + "step": 43809 + }, + { + "epoch": 0.04, + "learning_rate": 4.97646167522228e-05, + "loss": 1.0436, + "step": 43810 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764605998809245e-05, + "loss": 1.2756, + "step": 43811 + }, + { + "epoch": 0.04, + "learning_rate": 4.976459524515122e-05, + "loss": 1.142, + "step": 43812 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764584491248736e-05, + "loss": 0.9496, + "step": 43813 + }, + { + "epoch": 0.04, + "learning_rate": 4.976457373710178e-05, + "loss": 0.8323, + "step": 43814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764562982710363e-05, + "loss": 1.0107, + "step": 43815 + }, + { + "epoch": 0.04, + "learning_rate": 4.976455222807448e-05, + "loss": 0.9054, + "step": 43816 + }, + { + "epoch": 0.04, + "learning_rate": 4.976454147319413e-05, + "loss": 1.3008, + "step": 43817 + }, + { + "epoch": 0.04, + "learning_rate": 4.976453071806931e-05, + "loss": 0.514, + "step": 43818 + }, + { + "epoch": 0.04, + "learning_rate": 4.976451996270003e-05, + "loss": 0.9326, + "step": 43819 + }, + { + "epoch": 0.04, + "learning_rate": 4.976450920708629e-05, + "loss": 1.1058, + "step": 43820 + }, + { + "epoch": 0.04, + "learning_rate": 4.976449845122808e-05, + "loss": 0.5187, + "step": 43821 + }, + { + "epoch": 0.04, + "learning_rate": 4.976448769512541e-05, + "loss": 0.3356, + "step": 43822 + }, + { + "epoch": 0.04, + "learning_rate": 4.976447693877827e-05, + "loss": 0.1685, + "step": 43823 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764466182186665e-05, + "loss": 0.2075, + "step": 43824 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764455425350596e-05, + "loss": 0.3138, + "step": 43825 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764444668270074e-05, + "loss": 0.7347, + "step": 43826 + }, + { + "epoch": 0.04, + "learning_rate": 4.976443391094508e-05, + "loss": 1.1349, + "step": 43827 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764423153375614e-05, + "loss": 1.0743, + "step": 43828 + }, + { + "epoch": 0.04, + "learning_rate": 4.976441239556169e-05, + "loss": 0.9341, + "step": 43829 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764401637503305e-05, + "loss": 0.9165, + "step": 43830 + }, + { + "epoch": 0.04, + "learning_rate": 4.976439087920045e-05, + "loss": 0.7887, + "step": 43831 + }, + { + "epoch": 0.04, + "learning_rate": 4.976438012065314e-05, + "loss": 1.0787, + "step": 43832 + }, + { + "epoch": 0.04, + "learning_rate": 4.976436936186136e-05, + "loss": 1.1339, + "step": 43833 + }, + { + "epoch": 0.04, + "learning_rate": 4.976435860282512e-05, + "loss": 0.8531, + "step": 43834 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764347843544414e-05, + "loss": 0.8883, + "step": 43835 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764337084019246e-05, + "loss": 0.4779, + "step": 43836 + }, + { + "epoch": 0.04, + "learning_rate": 4.976432632424962e-05, + "loss": 0.6563, + "step": 43837 + }, + { + "epoch": 0.04, + "learning_rate": 4.976431556423552e-05, + "loss": 1.1505, + "step": 43838 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764304803976966e-05, + "loss": 1.0257, + "step": 43839 + }, + { + "epoch": 0.04, + "learning_rate": 4.976429404347395e-05, + "loss": 1.216, + "step": 43840 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764283282726466e-05, + "loss": 1.0339, + "step": 43841 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764272521734515e-05, + "loss": 1.081, + "step": 43842 + }, + { + "epoch": 0.04, + "learning_rate": 4.976426176049811e-05, + "loss": 1.1918, + "step": 43843 + }, + { + "epoch": 0.04, + "learning_rate": 4.976425099901724e-05, + "loss": 1.5537, + "step": 43844 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764240237291906e-05, + "loss": 1.8211, + "step": 43845 + }, + { + "epoch": 0.04, + "learning_rate": 4.976422947532211e-05, + "loss": 1.0268, + "step": 43846 + }, + { + "epoch": 0.04, + "learning_rate": 4.976421871310786e-05, + "loss": 2.2849, + "step": 43847 + }, + { + "epoch": 0.04, + "learning_rate": 4.976420795064914e-05, + "loss": 1.0778, + "step": 43848 + }, + { + "epoch": 0.04, + "learning_rate": 4.976419718794596e-05, + "loss": 1.0269, + "step": 43849 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764186424998316e-05, + "loss": 1.1075, + "step": 43850 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764175661806214e-05, + "loss": 0.9895, + "step": 43851 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764164898369644e-05, + "loss": 1.0039, + "step": 43852 + }, + { + "epoch": 0.04, + "learning_rate": 4.976415413468862e-05, + "loss": 1.1522, + "step": 43853 + }, + { + "epoch": 0.04, + "learning_rate": 4.976414337076313e-05, + "loss": 1.1527, + "step": 43854 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764132606593187e-05, + "loss": 0.9377, + "step": 43855 + }, + { + "epoch": 0.04, + "learning_rate": 4.976412184217878e-05, + "loss": 0.9428, + "step": 43856 + }, + { + "epoch": 0.04, + "learning_rate": 4.97641110775199e-05, + "loss": 0.8072, + "step": 43857 + }, + { + "epoch": 0.04, + "learning_rate": 4.976410031261657e-05, + "loss": 0.9477, + "step": 43858 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764089547468785e-05, + "loss": 0.5796, + "step": 43859 + }, + { + "epoch": 0.04, + "learning_rate": 4.976407878207653e-05, + "loss": 1.0377, + "step": 43860 + }, + { + "epoch": 0.04, + "learning_rate": 4.976406801643981e-05, + "loss": 1.3208, + "step": 43861 + }, + { + "epoch": 0.04, + "learning_rate": 4.976405725055864e-05, + "loss": 0.9896, + "step": 43862 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764046484433e-05, + "loss": 1.0915, + "step": 43863 + }, + { + "epoch": 0.04, + "learning_rate": 4.976403571806291e-05, + "loss": 1.2515, + "step": 43864 + }, + { + "epoch": 0.04, + "learning_rate": 4.976402495144836e-05, + "loss": 0.8876, + "step": 43865 + }, + { + "epoch": 0.04, + "learning_rate": 4.976401418458934e-05, + "loss": 0.6953, + "step": 43866 + }, + { + "epoch": 0.04, + "learning_rate": 4.9764003417485865e-05, + "loss": 1.0554, + "step": 43867 + }, + { + "epoch": 0.04, + "learning_rate": 4.976399265013793e-05, + "loss": 0.8604, + "step": 43868 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763981882545543e-05, + "loss": 1.0035, + "step": 43869 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763971114708685e-05, + "loss": 1.3623, + "step": 43870 + }, + { + "epoch": 0.04, + "learning_rate": 4.976396034662737e-05, + "loss": 1.3579, + "step": 43871 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763949578301594e-05, + "loss": 0.7895, + "step": 43872 + }, + { + "epoch": 0.04, + "learning_rate": 4.976393880973137e-05, + "loss": 1.0426, + "step": 43873 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763928040916675e-05, + "loss": 1.0916, + "step": 43874 + }, + { + "epoch": 0.04, + "learning_rate": 4.976391727185752e-05, + "loss": 0.9515, + "step": 43875 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763906502553914e-05, + "loss": 0.7617, + "step": 43876 + }, + { + "epoch": 0.04, + "learning_rate": 4.976389573300585e-05, + "loss": 0.7362, + "step": 43877 + }, + { + "epoch": 0.04, + "learning_rate": 4.976388496321332e-05, + "loss": 0.41, + "step": 43878 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763874193176335e-05, + "loss": 0.4438, + "step": 43879 + }, + { + "epoch": 0.04, + "learning_rate": 4.976386342289489e-05, + "loss": 0.4518, + "step": 43880 + }, + { + "epoch": 0.04, + "learning_rate": 4.976385265236899e-05, + "loss": 0.4567, + "step": 43881 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763841881598625e-05, + "loss": 0.3839, + "step": 43882 + }, + { + "epoch": 0.04, + "learning_rate": 4.976383111058381e-05, + "loss": 0.5121, + "step": 43883 + }, + { + "epoch": 0.04, + "learning_rate": 4.976382033932453e-05, + "loss": 0.8345, + "step": 43884 + }, + { + "epoch": 0.04, + "learning_rate": 4.97638095678208e-05, + "loss": 1.0445, + "step": 43885 + }, + { + "epoch": 0.04, + "learning_rate": 4.97637987960726e-05, + "loss": 1.1974, + "step": 43886 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763788024079954e-05, + "loss": 1.1905, + "step": 43887 + }, + { + "epoch": 0.04, + "learning_rate": 4.976377725184285e-05, + "loss": 1.2364, + "step": 43888 + }, + { + "epoch": 0.04, + "learning_rate": 4.976376647936128e-05, + "loss": 1.3434, + "step": 43889 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763755706635254e-05, + "loss": 1.714, + "step": 43890 + }, + { + "epoch": 0.04, + "learning_rate": 4.976374493366477e-05, + "loss": 1.2352, + "step": 43891 + }, + { + "epoch": 0.04, + "learning_rate": 4.976373416044984e-05, + "loss": 0.8926, + "step": 43892 + }, + { + "epoch": 0.04, + "learning_rate": 4.976372338699044e-05, + "loss": 1.4564, + "step": 43893 + }, + { + "epoch": 0.04, + "learning_rate": 4.976371261328659e-05, + "loss": 1.5588, + "step": 43894 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763701839338285e-05, + "loss": 1.083, + "step": 43895 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763691065145515e-05, + "loss": 1.2636, + "step": 43896 + }, + { + "epoch": 0.04, + "learning_rate": 4.976368029070829e-05, + "loss": 0.939, + "step": 43897 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763669516026615e-05, + "loss": 1.3013, + "step": 43898 + }, + { + "epoch": 0.04, + "learning_rate": 4.976365874110048e-05, + "loss": 0.8248, + "step": 43899 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763647965929886e-05, + "loss": 0.826, + "step": 43900 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763637190514834e-05, + "loss": 1.2027, + "step": 43901 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763626414855335e-05, + "loss": 0.853, + "step": 43902 + }, + { + "epoch": 0.04, + "learning_rate": 4.976361563895137e-05, + "loss": 1.19, + "step": 43903 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763604862802956e-05, + "loss": 1.03, + "step": 43904 + }, + { + "epoch": 0.04, + "learning_rate": 4.976359408641008e-05, + "loss": 1.2239, + "step": 43905 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763583309772755e-05, + "loss": 1.2044, + "step": 43906 + }, + { + "epoch": 0.04, + "learning_rate": 4.976357253289097e-05, + "loss": 0.7222, + "step": 43907 + }, + { + "epoch": 0.04, + "learning_rate": 4.976356175576473e-05, + "loss": 0.3921, + "step": 43908 + }, + { + "epoch": 0.04, + "learning_rate": 4.976355097839404e-05, + "loss": 0.9228, + "step": 43909 + }, + { + "epoch": 0.04, + "learning_rate": 4.976354020077888e-05, + "loss": 0.9088, + "step": 43910 + }, + { + "epoch": 0.04, + "learning_rate": 4.976352942291927e-05, + "loss": 0.8723, + "step": 43911 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763518644815223e-05, + "loss": 0.9681, + "step": 43912 + }, + { + "epoch": 0.04, + "learning_rate": 4.97635078664667e-05, + "loss": 1.1444, + "step": 43913 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763497087873736e-05, + "loss": 1.1916, + "step": 43914 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763486309036305e-05, + "loss": 1.4106, + "step": 43915 + }, + { + "epoch": 0.04, + "learning_rate": 4.976347552995443e-05, + "loss": 0.9458, + "step": 43916 + }, + { + "epoch": 0.04, + "learning_rate": 4.976346475062809e-05, + "loss": 1.9274, + "step": 43917 + }, + { + "epoch": 0.04, + "learning_rate": 4.97634539710573e-05, + "loss": 1.2838, + "step": 43918 + }, + { + "epoch": 0.04, + "learning_rate": 4.976344319124205e-05, + "loss": 1.0752, + "step": 43919 + }, + { + "epoch": 0.04, + "learning_rate": 4.976343241118236e-05, + "loss": 1.0277, + "step": 43920 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763421630878206e-05, + "loss": 1.0927, + "step": 43921 + }, + { + "epoch": 0.04, + "learning_rate": 4.97634108503296e-05, + "loss": 1.0625, + "step": 43922 + }, + { + "epoch": 0.04, + "learning_rate": 4.976340006953654e-05, + "loss": 1.104, + "step": 43923 + }, + { + "epoch": 0.04, + "learning_rate": 4.976338928849903e-05, + "loss": 1.0398, + "step": 43924 + }, + { + "epoch": 0.04, + "learning_rate": 4.976337850721706e-05, + "loss": 0.9777, + "step": 43925 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763367725690634e-05, + "loss": 1.1854, + "step": 43926 + }, + { + "epoch": 0.04, + "learning_rate": 4.976335694391976e-05, + "loss": 1.07, + "step": 43927 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763346161904436e-05, + "loss": 0.8965, + "step": 43928 + }, + { + "epoch": 0.04, + "learning_rate": 4.976333537964465e-05, + "loss": 0.8061, + "step": 43929 + }, + { + "epoch": 0.04, + "learning_rate": 4.976332459714042e-05, + "loss": 0.9158, + "step": 43930 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763313814391734e-05, + "loss": 2.8359, + "step": 43931 + }, + { + "epoch": 0.04, + "learning_rate": 4.976330303139859e-05, + "loss": 0.719, + "step": 43932 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763292248161e-05, + "loss": 0.9265, + "step": 43933 + }, + { + "epoch": 0.04, + "learning_rate": 4.976328146467895e-05, + "loss": 0.7525, + "step": 43934 + }, + { + "epoch": 0.04, + "learning_rate": 4.976327068095244e-05, + "loss": 0.8726, + "step": 43935 + }, + { + "epoch": 0.04, + "learning_rate": 4.97632598969815e-05, + "loss": 1.0641, + "step": 43936 + }, + { + "epoch": 0.04, + "learning_rate": 4.976324911276609e-05, + "loss": 1.0193, + "step": 43937 + }, + { + "epoch": 0.04, + "learning_rate": 4.976323832830624e-05, + "loss": 0.8713, + "step": 43938 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763227543601926e-05, + "loss": 1.0793, + "step": 43939 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763216758653166e-05, + "loss": 1.025, + "step": 43940 + }, + { + "epoch": 0.04, + "learning_rate": 4.976320597345995e-05, + "loss": 0.9557, + "step": 43941 + }, + { + "epoch": 0.04, + "learning_rate": 4.976319518802229e-05, + "loss": 1.1189, + "step": 43942 + }, + { + "epoch": 0.04, + "learning_rate": 4.976318440234017e-05, + "loss": 0.9168, + "step": 43943 + }, + { + "epoch": 0.04, + "learning_rate": 4.976317361641361e-05, + "loss": 1.0711, + "step": 43944 + }, + { + "epoch": 0.04, + "learning_rate": 4.976316283024258e-05, + "loss": 1.0314, + "step": 43945 + }, + { + "epoch": 0.04, + "learning_rate": 4.976315204382711e-05, + "loss": 1.2995, + "step": 43946 + }, + { + "epoch": 0.04, + "learning_rate": 4.976314125716719e-05, + "loss": 1.0326, + "step": 43947 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763130470262806e-05, + "loss": 1.0762, + "step": 43948 + }, + { + "epoch": 0.04, + "learning_rate": 4.976311968311399e-05, + "loss": 0.8886, + "step": 43949 + }, + { + "epoch": 0.04, + "learning_rate": 4.976310889572071e-05, + "loss": 1.2064, + "step": 43950 + }, + { + "epoch": 0.04, + "learning_rate": 4.976309810808298e-05, + "loss": 0.9347, + "step": 43951 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763087320200805e-05, + "loss": 1.2747, + "step": 43952 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763076532074173e-05, + "loss": 1.1567, + "step": 43953 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763065743703095e-05, + "loss": 1.4465, + "step": 43954 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763054955087564e-05, + "loss": 1.6489, + "step": 43955 + }, + { + "epoch": 0.04, + "learning_rate": 4.976304416622758e-05, + "loss": 1.4523, + "step": 43956 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763033377123146e-05, + "loss": 0.8848, + "step": 43957 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763022587774266e-05, + "loss": 0.7339, + "step": 43958 + }, + { + "epoch": 0.04, + "learning_rate": 4.976301179818093e-05, + "loss": 1.7325, + "step": 43959 + }, + { + "epoch": 0.04, + "learning_rate": 4.976300100834316e-05, + "loss": 1.2833, + "step": 43960 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762990218260926e-05, + "loss": 1.0028, + "step": 43961 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762979427934245e-05, + "loss": 0.531, + "step": 43962 + }, + { + "epoch": 0.04, + "learning_rate": 4.976296863736311e-05, + "loss": 0.6181, + "step": 43963 + }, + { + "epoch": 0.04, + "learning_rate": 4.976295784654753e-05, + "loss": 1.1166, + "step": 43964 + }, + { + "epoch": 0.04, + "learning_rate": 4.97629470554875e-05, + "loss": 0.9569, + "step": 43965 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762936264183026e-05, + "loss": 0.8895, + "step": 43966 + }, + { + "epoch": 0.04, + "learning_rate": 4.976292547263409e-05, + "loss": 1.1227, + "step": 43967 + }, + { + "epoch": 0.04, + "learning_rate": 4.976291468084071e-05, + "loss": 1.1171, + "step": 43968 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762903888802885e-05, + "loss": 0.8516, + "step": 43969 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762893096520616e-05, + "loss": 1.1567, + "step": 43970 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762882303993885e-05, + "loss": 0.9055, + "step": 43971 + }, + { + "epoch": 0.04, + "learning_rate": 4.976287151122271e-05, + "loss": 0.9782, + "step": 43972 + }, + { + "epoch": 0.04, + "learning_rate": 4.976286071820709e-05, + "loss": 0.9851, + "step": 43973 + }, + { + "epoch": 0.04, + "learning_rate": 4.976284992494702e-05, + "loss": 0.8793, + "step": 43974 + }, + { + "epoch": 0.04, + "learning_rate": 4.97628391314425e-05, + "loss": 0.8815, + "step": 43975 + }, + { + "epoch": 0.04, + "learning_rate": 4.976282833769354e-05, + "loss": 0.8063, + "step": 43976 + }, + { + "epoch": 0.04, + "learning_rate": 4.976281754370011e-05, + "loss": 0.8895, + "step": 43977 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762806749462253e-05, + "loss": 0.3056, + "step": 43978 + }, + { + "epoch": 0.04, + "learning_rate": 4.976279595497994e-05, + "loss": 0.8739, + "step": 43979 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762785160253175e-05, + "loss": 1.1898, + "step": 43980 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762774365281975e-05, + "loss": 1.0681, + "step": 43981 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762763570066316e-05, + "loss": 1.0322, + "step": 43982 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762752774606215e-05, + "loss": 1.1954, + "step": 43983 + }, + { + "epoch": 0.04, + "learning_rate": 4.976274197890167e-05, + "loss": 1.1521, + "step": 43984 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762731182952674e-05, + "loss": 1.2078, + "step": 43985 + }, + { + "epoch": 0.04, + "learning_rate": 4.976272038675922e-05, + "loss": 0.9927, + "step": 43986 + }, + { + "epoch": 0.04, + "learning_rate": 4.976270959032133e-05, + "loss": 0.9494, + "step": 43987 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762698793639e-05, + "loss": 0.9876, + "step": 43988 + }, + { + "epoch": 0.04, + "learning_rate": 4.976268799671221e-05, + "loss": 0.8623, + "step": 43989 + }, + { + "epoch": 0.04, + "learning_rate": 4.976267719954097e-05, + "loss": 1.1027, + "step": 43990 + }, + { + "epoch": 0.04, + "learning_rate": 4.976266640212529e-05, + "loss": 0.3778, + "step": 43991 + }, + { + "epoch": 0.04, + "learning_rate": 4.976265560446517e-05, + "loss": 0.838, + "step": 43992 + }, + { + "epoch": 0.04, + "learning_rate": 4.97626448065606e-05, + "loss": 0.85, + "step": 43993 + }, + { + "epoch": 0.04, + "learning_rate": 4.976263400841158e-05, + "loss": 1.2419, + "step": 43994 + }, + { + "epoch": 0.04, + "learning_rate": 4.976262321001811e-05, + "loss": 0.9743, + "step": 43995 + }, + { + "epoch": 0.04, + "learning_rate": 4.97626124113802e-05, + "loss": 0.8844, + "step": 43996 + }, + { + "epoch": 0.04, + "learning_rate": 4.976260161249784e-05, + "loss": 1.0443, + "step": 43997 + }, + { + "epoch": 0.04, + "learning_rate": 4.976259081337104e-05, + "loss": 1.2677, + "step": 43998 + }, + { + "epoch": 0.04, + "learning_rate": 4.97625800139998e-05, + "loss": 1.0578, + "step": 43999 + }, + { + "epoch": 0.04, + "learning_rate": 4.976256921438409e-05, + "loss": 0.7653, + "step": 44000 + }, + { + "epoch": 0.04, + "eval_loss": 1.0828602313995361, + "eval_runtime": 124.6543, + "eval_samples_per_second": 11.111, + "eval_steps_per_second": 5.559, + "step": 44000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762558414523954e-05, + "loss": 0.592, + "step": 44001 + }, + { + "epoch": 0.04, + "learning_rate": 4.976254761441937e-05, + "loss": 0.5985, + "step": 44002 + }, + { + "epoch": 0.04, + "learning_rate": 4.976253681407034e-05, + "loss": 0.5771, + "step": 44003 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762526013476865e-05, + "loss": 0.5739, + "step": 44004 + }, + { + "epoch": 0.04, + "learning_rate": 4.976251521263894e-05, + "loss": 0.5662, + "step": 44005 + }, + { + "epoch": 0.04, + "learning_rate": 4.976250441155657e-05, + "loss": 0.5654, + "step": 44006 + }, + { + "epoch": 0.04, + "learning_rate": 4.976249361022975e-05, + "loss": 0.5641, + "step": 44007 + }, + { + "epoch": 0.04, + "learning_rate": 4.97624828086585e-05, + "loss": 0.5828, + "step": 44008 + }, + { + "epoch": 0.04, + "learning_rate": 4.97624720068428e-05, + "loss": 0.5704, + "step": 44009 + }, + { + "epoch": 0.04, + "learning_rate": 4.976246120478265e-05, + "loss": 0.7572, + "step": 44010 + }, + { + "epoch": 0.04, + "learning_rate": 4.976245040247806e-05, + "loss": 1.567, + "step": 44011 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762439599929025e-05, + "loss": 1.1449, + "step": 44012 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762428797135544e-05, + "loss": 1.0489, + "step": 44013 + }, + { + "epoch": 0.04, + "learning_rate": 4.976241799409762e-05, + "loss": 0.9013, + "step": 44014 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762407190815255e-05, + "loss": 0.841, + "step": 44015 + }, + { + "epoch": 0.04, + "learning_rate": 4.976239638728843e-05, + "loss": 1.0076, + "step": 44016 + }, + { + "epoch": 0.04, + "learning_rate": 4.976238558351718e-05, + "loss": 1.3432, + "step": 44017 + }, + { + "epoch": 0.04, + "learning_rate": 4.976237477950148e-05, + "loss": 1.1507, + "step": 44018 + }, + { + "epoch": 0.04, + "learning_rate": 4.976236397524133e-05, + "loss": 1.9489, + "step": 44019 + }, + { + "epoch": 0.04, + "learning_rate": 4.976235317073675e-05, + "loss": 1.9206, + "step": 44020 + }, + { + "epoch": 0.04, + "learning_rate": 4.976234236598772e-05, + "loss": 1.0969, + "step": 44021 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762331560994245e-05, + "loss": 1.2279, + "step": 44022 + }, + { + "epoch": 0.04, + "learning_rate": 4.976232075575632e-05, + "loss": 0.9635, + "step": 44023 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762309950273965e-05, + "loss": 1.0792, + "step": 44024 + }, + { + "epoch": 0.04, + "learning_rate": 4.976229914454716e-05, + "loss": 0.9323, + "step": 44025 + }, + { + "epoch": 0.04, + "learning_rate": 4.976228833857591e-05, + "loss": 0.9931, + "step": 44026 + }, + { + "epoch": 0.04, + "learning_rate": 4.976227753236022e-05, + "loss": 1.285, + "step": 44027 + }, + { + "epoch": 0.04, + "learning_rate": 4.976226672590008e-05, + "loss": 0.9303, + "step": 44028 + }, + { + "epoch": 0.04, + "learning_rate": 4.976225591919551e-05, + "loss": 1.1922, + "step": 44029 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762245112246495e-05, + "loss": 1.2473, + "step": 44030 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762234305053037e-05, + "loss": 1.1785, + "step": 44031 + }, + { + "epoch": 0.04, + "learning_rate": 4.976222349761513e-05, + "loss": 0.9143, + "step": 44032 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762212689932787e-05, + "loss": 0.9142, + "step": 44033 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762201882006e-05, + "loss": 1.1529, + "step": 44034 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762191073834776e-05, + "loss": 0.9158, + "step": 44035 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762180265419104e-05, + "loss": 1.0232, + "step": 44036 + }, + { + "epoch": 0.04, + "learning_rate": 4.976216945675899e-05, + "loss": 0.8749, + "step": 44037 + }, + { + "epoch": 0.04, + "learning_rate": 4.976215864785443e-05, + "loss": 0.5178, + "step": 44038 + }, + { + "epoch": 0.04, + "learning_rate": 4.976214783870543e-05, + "loss": 0.8685, + "step": 44039 + }, + { + "epoch": 0.04, + "learning_rate": 4.976213702931199e-05, + "loss": 0.8895, + "step": 44040 + }, + { + "epoch": 0.04, + "learning_rate": 4.976212621967412e-05, + "loss": 0.9726, + "step": 44041 + }, + { + "epoch": 0.04, + "learning_rate": 4.97621154097918e-05, + "loss": 0.7624, + "step": 44042 + }, + { + "epoch": 0.04, + "learning_rate": 4.976210459966504e-05, + "loss": 0.5021, + "step": 44043 + }, + { + "epoch": 0.04, + "learning_rate": 4.976209378929383e-05, + "loss": 0.6401, + "step": 44044 + }, + { + "epoch": 0.04, + "learning_rate": 4.976208297867819e-05, + "loss": 0.3417, + "step": 44045 + }, + { + "epoch": 0.04, + "learning_rate": 4.976207216781811e-05, + "loss": 0.9576, + "step": 44046 + }, + { + "epoch": 0.04, + "learning_rate": 4.976206135671358e-05, + "loss": 0.9135, + "step": 44047 + }, + { + "epoch": 0.04, + "learning_rate": 4.9762050545364615e-05, + "loss": 0.8255, + "step": 44048 + }, + { + "epoch": 0.04, + "learning_rate": 4.976203973377121e-05, + "loss": 0.994, + "step": 44049 + }, + { + "epoch": 0.04, + "learning_rate": 4.976202892193336e-05, + "loss": 0.9562, + "step": 44050 + }, + { + "epoch": 0.04, + "learning_rate": 4.976201810985107e-05, + "loss": 1.1059, + "step": 44051 + }, + { + "epoch": 0.04, + "learning_rate": 4.976200729752435e-05, + "loss": 1.0743, + "step": 44052 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761996484953174e-05, + "loss": 0.8494, + "step": 44053 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761985672137565e-05, + "loss": 1.1504, + "step": 44054 + }, + { + "epoch": 0.04, + "learning_rate": 4.976197485907752e-05, + "loss": 0.8593, + "step": 44055 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761964045773034e-05, + "loss": 0.9904, + "step": 44056 + }, + { + "epoch": 0.04, + "learning_rate": 4.976195323222411e-05, + "loss": 0.9992, + "step": 44057 + }, + { + "epoch": 0.04, + "learning_rate": 4.976194241843074e-05, + "loss": 1.1794, + "step": 44058 + }, + { + "epoch": 0.04, + "learning_rate": 4.976193160439293e-05, + "loss": 0.7106, + "step": 44059 + }, + { + "epoch": 0.04, + "learning_rate": 4.97619207901107e-05, + "loss": 0.9334, + "step": 44060 + }, + { + "epoch": 0.04, + "learning_rate": 4.976190997558401e-05, + "loss": 0.8217, + "step": 44061 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761899160812884e-05, + "loss": 1.1796, + "step": 44062 + }, + { + "epoch": 0.04, + "learning_rate": 4.976188834579732e-05, + "loss": 1.0448, + "step": 44063 + }, + { + "epoch": 0.04, + "learning_rate": 4.976187753053732e-05, + "loss": 1.0609, + "step": 44064 + }, + { + "epoch": 0.04, + "learning_rate": 4.976186671503288e-05, + "loss": 0.8022, + "step": 44065 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761855899283996e-05, + "loss": 1.2078, + "step": 44066 + }, + { + "epoch": 0.04, + "learning_rate": 4.976184508329068e-05, + "loss": 0.6283, + "step": 44067 + }, + { + "epoch": 0.04, + "learning_rate": 4.976183426705293e-05, + "loss": 0.7639, + "step": 44068 + }, + { + "epoch": 0.04, + "learning_rate": 4.976182345057073e-05, + "loss": 0.7874, + "step": 44069 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761812633844093e-05, + "loss": 0.5723, + "step": 44070 + }, + { + "epoch": 0.04, + "learning_rate": 4.976180181687303e-05, + "loss": 1.1733, + "step": 44071 + }, + { + "epoch": 0.04, + "learning_rate": 4.976179099965752e-05, + "loss": 1.1344, + "step": 44072 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761780182197573e-05, + "loss": 1.3794, + "step": 44073 + }, + { + "epoch": 0.04, + "learning_rate": 4.976176936449318e-05, + "loss": 1.4557, + "step": 44074 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761758546544364e-05, + "loss": 1.277, + "step": 44075 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761747728351106e-05, + "loss": 1.1004, + "step": 44076 + }, + { + "epoch": 0.04, + "learning_rate": 4.97617369099134e-05, + "loss": 1.0342, + "step": 44077 + }, + { + "epoch": 0.04, + "learning_rate": 4.976172609123127e-05, + "loss": 0.9638, + "step": 44078 + }, + { + "epoch": 0.04, + "learning_rate": 4.97617152723047e-05, + "loss": 1.1283, + "step": 44079 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761704453133685e-05, + "loss": 1.053, + "step": 44080 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761693633718246e-05, + "loss": 0.97, + "step": 44081 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761682814058353e-05, + "loss": 0.7274, + "step": 44082 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761671994154034e-05, + "loss": 0.8814, + "step": 44083 + }, + { + "epoch": 0.04, + "learning_rate": 4.976166117400528e-05, + "loss": 1.1686, + "step": 44084 + }, + { + "epoch": 0.04, + "learning_rate": 4.976165035361208e-05, + "loss": 1.2396, + "step": 44085 + }, + { + "epoch": 0.04, + "learning_rate": 4.976163953297445e-05, + "loss": 1.3917, + "step": 44086 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761628712092384e-05, + "loss": 1.0037, + "step": 44087 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761617890965884e-05, + "loss": 1.3063, + "step": 44088 + }, + { + "epoch": 0.04, + "learning_rate": 4.976160706959494e-05, + "loss": 1.1423, + "step": 44089 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761596247979565e-05, + "loss": 0.7969, + "step": 44090 + }, + { + "epoch": 0.04, + "learning_rate": 4.976158542611975e-05, + "loss": 0.7192, + "step": 44091 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761574604015505e-05, + "loss": 0.9672, + "step": 44092 + }, + { + "epoch": 0.04, + "learning_rate": 4.976156378166682e-05, + "loss": 0.9671, + "step": 44093 + }, + { + "epoch": 0.04, + "learning_rate": 4.97615529590737e-05, + "loss": 1.2128, + "step": 44094 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761542136236145e-05, + "loss": 1.0147, + "step": 44095 + }, + { + "epoch": 0.04, + "learning_rate": 4.976153131315415e-05, + "loss": 0.5925, + "step": 44096 + }, + { + "epoch": 0.04, + "learning_rate": 4.976152048982773e-05, + "loss": 1.0732, + "step": 44097 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761509666256864e-05, + "loss": 1.1816, + "step": 44098 + }, + { + "epoch": 0.04, + "learning_rate": 4.976149884244157e-05, + "loss": 0.9308, + "step": 44099 + }, + { + "epoch": 0.04, + "learning_rate": 4.976148801838184e-05, + "loss": 0.9243, + "step": 44100 + }, + { + "epoch": 0.04, + "learning_rate": 4.976147719407767e-05, + "loss": 1.0527, + "step": 44101 + }, + { + "epoch": 0.04, + "learning_rate": 4.976146636952907e-05, + "loss": 0.8061, + "step": 44102 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761455544736034e-05, + "loss": 1.1948, + "step": 44103 + }, + { + "epoch": 0.04, + "learning_rate": 4.976144471969856e-05, + "loss": 0.975, + "step": 44104 + }, + { + "epoch": 0.04, + "learning_rate": 4.976143389441665e-05, + "loss": 1.1497, + "step": 44105 + }, + { + "epoch": 0.04, + "learning_rate": 4.976142306889032e-05, + "loss": 0.9723, + "step": 44106 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761412243119544e-05, + "loss": 0.959, + "step": 44107 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761401417104336e-05, + "loss": 1.1215, + "step": 44108 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761390590844695e-05, + "loss": 0.9381, + "step": 44109 + }, + { + "epoch": 0.04, + "learning_rate": 4.976137976434061e-05, + "loss": 0.9153, + "step": 44110 + }, + { + "epoch": 0.04, + "learning_rate": 4.97613689375921e-05, + "loss": 0.717, + "step": 44111 + }, + { + "epoch": 0.04, + "learning_rate": 4.976135811059915e-05, + "loss": 1.0836, + "step": 44112 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761347283361783e-05, + "loss": 1.0914, + "step": 44113 + }, + { + "epoch": 0.04, + "learning_rate": 4.976133645587997e-05, + "loss": 0.7463, + "step": 44114 + }, + { + "epoch": 0.04, + "learning_rate": 4.976132562815372e-05, + "loss": 0.9687, + "step": 44115 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761314800183046e-05, + "loss": 1.0062, + "step": 44116 + }, + { + "epoch": 0.04, + "learning_rate": 4.976130397196793e-05, + "loss": 1.1384, + "step": 44117 + }, + { + "epoch": 0.04, + "learning_rate": 4.976129314350839e-05, + "loss": 1.021, + "step": 44118 + }, + { + "epoch": 0.04, + "learning_rate": 4.976128231480441e-05, + "loss": 0.8197, + "step": 44119 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761271485856e-05, + "loss": 1.0533, + "step": 44120 + }, + { + "epoch": 0.04, + "learning_rate": 4.976126065666316e-05, + "loss": 0.9533, + "step": 44121 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761249827225876e-05, + "loss": 0.7981, + "step": 44122 + }, + { + "epoch": 0.04, + "learning_rate": 4.976123899754417e-05, + "loss": 0.9784, + "step": 44123 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761228167618026e-05, + "loss": 1.2424, + "step": 44124 + }, + { + "epoch": 0.04, + "learning_rate": 4.976121733744745e-05, + "loss": 1.0018, + "step": 44125 + }, + { + "epoch": 0.04, + "learning_rate": 4.976120650703244e-05, + "loss": 1.0299, + "step": 44126 + }, + { + "epoch": 0.04, + "learning_rate": 4.976119567637301e-05, + "loss": 1.05, + "step": 44127 + }, + { + "epoch": 0.04, + "learning_rate": 4.976118484546913e-05, + "loss": 1.2863, + "step": 44128 + }, + { + "epoch": 0.04, + "learning_rate": 4.976117401432083e-05, + "loss": 1.3206, + "step": 44129 + }, + { + "epoch": 0.04, + "learning_rate": 4.97611631829281e-05, + "loss": 1.0551, + "step": 44130 + }, + { + "epoch": 0.04, + "learning_rate": 4.976115235129093e-05, + "loss": 1.0514, + "step": 44131 + }, + { + "epoch": 0.04, + "learning_rate": 4.976114151940934e-05, + "loss": 0.9384, + "step": 44132 + }, + { + "epoch": 0.04, + "learning_rate": 4.976113068728331e-05, + "loss": 1.2602, + "step": 44133 + }, + { + "epoch": 0.04, + "learning_rate": 4.976111985491284e-05, + "loss": 0.951, + "step": 44134 + }, + { + "epoch": 0.04, + "learning_rate": 4.976110902229795e-05, + "loss": 1.071, + "step": 44135 + }, + { + "epoch": 0.04, + "learning_rate": 4.976109818943863e-05, + "loss": 1.0218, + "step": 44136 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761087356334873e-05, + "loss": 1.0886, + "step": 44137 + }, + { + "epoch": 0.04, + "learning_rate": 4.976107652298669e-05, + "loss": 1.0422, + "step": 44138 + }, + { + "epoch": 0.04, + "learning_rate": 4.976106568939407e-05, + "loss": 0.9577, + "step": 44139 + }, + { + "epoch": 0.04, + "learning_rate": 4.976105485555702e-05, + "loss": 1.5232, + "step": 44140 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761044021475545e-05, + "loss": 0.9271, + "step": 44141 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761033187149636e-05, + "loss": 0.7668, + "step": 44142 + }, + { + "epoch": 0.04, + "learning_rate": 4.97610223525793e-05, + "loss": 1.1506, + "step": 44143 + }, + { + "epoch": 0.04, + "learning_rate": 4.9761011517764525e-05, + "loss": 0.6363, + "step": 44144 + }, + { + "epoch": 0.04, + "learning_rate": 4.976100068270533e-05, + "loss": 0.9266, + "step": 44145 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760989847401695e-05, + "loss": 1.1848, + "step": 44146 + }, + { + "epoch": 0.04, + "learning_rate": 4.976097901185364e-05, + "loss": 1.1761, + "step": 44147 + }, + { + "epoch": 0.04, + "learning_rate": 4.976096817606115e-05, + "loss": 1.1871, + "step": 44148 + }, + { + "epoch": 0.04, + "learning_rate": 4.976095734002423e-05, + "loss": 1.3471, + "step": 44149 + }, + { + "epoch": 0.04, + "learning_rate": 4.976094650374288e-05, + "loss": 1.0561, + "step": 44150 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760935667217104e-05, + "loss": 1.2854, + "step": 44151 + }, + { + "epoch": 0.04, + "learning_rate": 4.976092483044689e-05, + "loss": 1.2822, + "step": 44152 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760913993432254e-05, + "loss": 1.3523, + "step": 44153 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760903156173185e-05, + "loss": 0.9657, + "step": 44154 + }, + { + "epoch": 0.04, + "learning_rate": 4.976089231866969e-05, + "loss": 1.0776, + "step": 44155 + }, + { + "epoch": 0.04, + "learning_rate": 4.976088148092176e-05, + "loss": 0.6714, + "step": 44156 + }, + { + "epoch": 0.04, + "learning_rate": 4.97608706429294e-05, + "loss": 1.4955, + "step": 44157 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760859804692624e-05, + "loss": 1.0876, + "step": 44158 + }, + { + "epoch": 0.04, + "learning_rate": 4.976084896621141e-05, + "loss": 1.0059, + "step": 44159 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760838127485774e-05, + "loss": 1.0153, + "step": 44160 + }, + { + "epoch": 0.04, + "learning_rate": 4.97608272885157e-05, + "loss": 0.814, + "step": 44161 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760816449301204e-05, + "loss": 0.8434, + "step": 44162 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760805609842275e-05, + "loss": 1.1125, + "step": 44163 + }, + { + "epoch": 0.04, + "learning_rate": 4.976079477013892e-05, + "loss": 1.1781, + "step": 44164 + }, + { + "epoch": 0.04, + "learning_rate": 4.976078393019113e-05, + "loss": 0.8701, + "step": 44165 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760773089998924e-05, + "loss": 1.3188, + "step": 44166 + }, + { + "epoch": 0.04, + "learning_rate": 4.976076224956229e-05, + "loss": 1.1876, + "step": 44167 + }, + { + "epoch": 0.04, + "learning_rate": 4.976075140888122e-05, + "loss": 0.7875, + "step": 44168 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760740567955726e-05, + "loss": 0.0912, + "step": 44169 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760729726785804e-05, + "loss": 0.0765, + "step": 44170 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760718885371456e-05, + "loss": 0.0613, + "step": 44171 + }, + { + "epoch": 0.04, + "learning_rate": 4.976070804371268e-05, + "loss": 0.064, + "step": 44172 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760697201809474e-05, + "loss": 0.0575, + "step": 44173 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760686359661846e-05, + "loss": 0.0435, + "step": 44174 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760675517269785e-05, + "loss": 0.0477, + "step": 44175 + }, + { + "epoch": 0.04, + "learning_rate": 4.97606646746333e-05, + "loss": 0.0519, + "step": 44176 + }, + { + "epoch": 0.04, + "learning_rate": 4.976065383175239e-05, + "loss": 0.0475, + "step": 44177 + }, + { + "epoch": 0.04, + "learning_rate": 4.976064298862705e-05, + "loss": 0.0619, + "step": 44178 + }, + { + "epoch": 0.04, + "learning_rate": 4.976063214525728e-05, + "loss": 0.0459, + "step": 44179 + }, + { + "epoch": 0.04, + "learning_rate": 4.976062130164309e-05, + "loss": 0.0328, + "step": 44180 + }, + { + "epoch": 0.04, + "learning_rate": 4.976061045778447e-05, + "loss": 0.0409, + "step": 44181 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760599613681425e-05, + "loss": 0.0519, + "step": 44182 + }, + { + "epoch": 0.04, + "learning_rate": 4.976058876933396e-05, + "loss": 0.0442, + "step": 44183 + }, + { + "epoch": 0.04, + "learning_rate": 4.976057792474206e-05, + "loss": 0.0363, + "step": 44184 + }, + { + "epoch": 0.04, + "learning_rate": 4.976056707990574e-05, + "loss": 0.0389, + "step": 44185 + }, + { + "epoch": 0.04, + "learning_rate": 4.976055623482498e-05, + "loss": 0.0315, + "step": 44186 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760545389499816e-05, + "loss": 0.0257, + "step": 44187 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760534543930215e-05, + "loss": 0.0252, + "step": 44188 + }, + { + "epoch": 0.04, + "learning_rate": 4.976052369811618e-05, + "loss": 0.0427, + "step": 44189 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760512852057735e-05, + "loss": 0.0568, + "step": 44190 + }, + { + "epoch": 0.04, + "learning_rate": 4.976050200575486e-05, + "loss": 0.0601, + "step": 44191 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760491159207555e-05, + "loss": 0.0584, + "step": 44192 + }, + { + "epoch": 0.04, + "learning_rate": 4.976048031241582e-05, + "loss": 0.0693, + "step": 44193 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760469465379675e-05, + "loss": 0.0478, + "step": 44194 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760458618099096e-05, + "loss": 0.0367, + "step": 44195 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760447770574096e-05, + "loss": 0.0461, + "step": 44196 + }, + { + "epoch": 0.04, + "learning_rate": 4.976043692280467e-05, + "loss": 0.3163, + "step": 44197 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760426074790824e-05, + "loss": 1.1577, + "step": 44198 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760415226532544e-05, + "loss": 1.2372, + "step": 44199 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760404378029845e-05, + "loss": 1.2552, + "step": 44200 + }, + { + "epoch": 0.04, + "learning_rate": 4.976039352928272e-05, + "loss": 0.9884, + "step": 44201 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760382680291174e-05, + "loss": 1.1368, + "step": 44202 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760371831055195e-05, + "loss": 1.4516, + "step": 44203 + }, + { + "epoch": 0.04, + "learning_rate": 4.97603609815748e-05, + "loss": 1.2783, + "step": 44204 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760350131849984e-05, + "loss": 0.8059, + "step": 44205 + }, + { + "epoch": 0.04, + "learning_rate": 4.976033928188074e-05, + "loss": 0.8574, + "step": 44206 + }, + { + "epoch": 0.04, + "learning_rate": 4.976032843166707e-05, + "loss": 0.7806, + "step": 44207 + }, + { + "epoch": 0.04, + "learning_rate": 4.976031758120898e-05, + "loss": 0.6277, + "step": 44208 + }, + { + "epoch": 0.04, + "learning_rate": 4.976030673050647e-05, + "loss": 0.7787, + "step": 44209 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760295879559526e-05, + "loss": 0.732, + "step": 44210 + }, + { + "epoch": 0.04, + "learning_rate": 4.976028502836816e-05, + "loss": 0.8467, + "step": 44211 + }, + { + "epoch": 0.04, + "learning_rate": 4.976027417693238e-05, + "loss": 0.8257, + "step": 44212 + }, + { + "epoch": 0.04, + "learning_rate": 4.976026332525217e-05, + "loss": 0.7253, + "step": 44213 + }, + { + "epoch": 0.04, + "learning_rate": 4.976025247332754e-05, + "loss": 0.3938, + "step": 44214 + }, + { + "epoch": 0.04, + "learning_rate": 4.976024162115849e-05, + "loss": 0.9221, + "step": 44215 + }, + { + "epoch": 0.04, + "learning_rate": 4.976023076874501e-05, + "loss": 0.4342, + "step": 44216 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760219916087116e-05, + "loss": 0.0892, + "step": 44217 + }, + { + "epoch": 0.04, + "learning_rate": 4.976020906318479e-05, + "loss": 0.0775, + "step": 44218 + }, + { + "epoch": 0.04, + "learning_rate": 4.976019821003805e-05, + "loss": 0.7691, + "step": 44219 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760187356646884e-05, + "loss": 0.6447, + "step": 44220 + }, + { + "epoch": 0.04, + "learning_rate": 4.97601765030113e-05, + "loss": 0.9974, + "step": 44221 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760165649131284e-05, + "loss": 0.6943, + "step": 44222 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760154795006855e-05, + "loss": 1.1356, + "step": 44223 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760143940638005e-05, + "loss": 0.9912, + "step": 44224 + }, + { + "epoch": 0.04, + "learning_rate": 4.976013308602473e-05, + "loss": 1.3537, + "step": 44225 + }, + { + "epoch": 0.04, + "learning_rate": 4.976012223116703e-05, + "loss": 1.2782, + "step": 44226 + }, + { + "epoch": 0.04, + "learning_rate": 4.976011137606492e-05, + "loss": 1.4961, + "step": 44227 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760100520718375e-05, + "loss": 1.1146, + "step": 44228 + }, + { + "epoch": 0.04, + "learning_rate": 4.976008966512742e-05, + "loss": 1.1507, + "step": 44229 + }, + { + "epoch": 0.04, + "learning_rate": 4.976007880929203e-05, + "loss": 1.1987, + "step": 44230 + }, + { + "epoch": 0.04, + "learning_rate": 4.976006795321223e-05, + "loss": 1.4091, + "step": 44231 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760057096888007e-05, + "loss": 1.1412, + "step": 44232 + }, + { + "epoch": 0.04, + "learning_rate": 4.976004624031936e-05, + "loss": 1.1057, + "step": 44233 + }, + { + "epoch": 0.04, + "learning_rate": 4.97600353835063e-05, + "loss": 0.9919, + "step": 44234 + }, + { + "epoch": 0.04, + "learning_rate": 4.976002452644881e-05, + "loss": 1.0438, + "step": 44235 + }, + { + "epoch": 0.04, + "learning_rate": 4.976001366914691e-05, + "loss": 1.0243, + "step": 44236 + }, + { + "epoch": 0.04, + "learning_rate": 4.976000281160057e-05, + "loss": 0.9582, + "step": 44237 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759991953809826e-05, + "loss": 0.9205, + "step": 44238 + }, + { + "epoch": 0.04, + "learning_rate": 4.975998109577466e-05, + "loss": 1.3505, + "step": 44239 + }, + { + "epoch": 0.04, + "learning_rate": 4.975997023749507e-05, + "loss": 0.9823, + "step": 44240 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759959378971064e-05, + "loss": 0.9657, + "step": 44241 + }, + { + "epoch": 0.04, + "learning_rate": 4.975994852020264e-05, + "loss": 1.1239, + "step": 44242 + }, + { + "epoch": 0.04, + "learning_rate": 4.975993766118979e-05, + "loss": 0.7258, + "step": 44243 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759926801932524e-05, + "loss": 0.811, + "step": 44244 + }, + { + "epoch": 0.04, + "learning_rate": 4.975991594243084e-05, + "loss": 0.6553, + "step": 44245 + }, + { + "epoch": 0.04, + "learning_rate": 4.975990508268473e-05, + "loss": 0.7437, + "step": 44246 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759894222694206e-05, + "loss": 1.114, + "step": 44247 + }, + { + "epoch": 0.04, + "learning_rate": 4.975988336245926e-05, + "loss": 1.3326, + "step": 44248 + }, + { + "epoch": 0.04, + "learning_rate": 4.97598725019799e-05, + "loss": 1.3546, + "step": 44249 + }, + { + "epoch": 0.04, + "learning_rate": 4.975986164125611e-05, + "loss": 0.9978, + "step": 44250 + }, + { + "epoch": 0.04, + "learning_rate": 4.975985078028792e-05, + "loss": 0.5191, + "step": 44251 + }, + { + "epoch": 0.04, + "learning_rate": 4.975983991907529e-05, + "loss": 0.8519, + "step": 44252 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759829057618254e-05, + "loss": 1.2441, + "step": 44253 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759818195916796e-05, + "loss": 0.7463, + "step": 44254 + }, + { + "epoch": 0.04, + "learning_rate": 4.975980733397092e-05, + "loss": 0.972, + "step": 44255 + }, + { + "epoch": 0.04, + "learning_rate": 4.975979647178063e-05, + "loss": 0.9985, + "step": 44256 + }, + { + "epoch": 0.04, + "learning_rate": 4.975978560934591e-05, + "loss": 1.0512, + "step": 44257 + }, + { + "epoch": 0.04, + "learning_rate": 4.975977474666678e-05, + "loss": 0.5471, + "step": 44258 + }, + { + "epoch": 0.04, + "learning_rate": 4.975976388374323e-05, + "loss": 0.8604, + "step": 44259 + }, + { + "epoch": 0.04, + "learning_rate": 4.975975302057526e-05, + "loss": 1.3499, + "step": 44260 + }, + { + "epoch": 0.04, + "learning_rate": 4.975974215716288e-05, + "loss": 1.2628, + "step": 44261 + }, + { + "epoch": 0.04, + "learning_rate": 4.975973129350607e-05, + "loss": 1.0545, + "step": 44262 + }, + { + "epoch": 0.04, + "learning_rate": 4.975972042960486e-05, + "loss": 0.9905, + "step": 44263 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759709565459214e-05, + "loss": 0.9398, + "step": 44264 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759698701069165e-05, + "loss": 0.986, + "step": 44265 + }, + { + "epoch": 0.04, + "learning_rate": 4.975968783643469e-05, + "loss": 1.6306, + "step": 44266 + }, + { + "epoch": 0.04, + "learning_rate": 4.97596769715558e-05, + "loss": 1.0801, + "step": 44267 + }, + { + "epoch": 0.04, + "learning_rate": 4.975966610643249e-05, + "loss": 1.0212, + "step": 44268 + }, + { + "epoch": 0.04, + "learning_rate": 4.975965524106477e-05, + "loss": 1.0442, + "step": 44269 + }, + { + "epoch": 0.04, + "learning_rate": 4.975964437545263e-05, + "loss": 1.0723, + "step": 44270 + }, + { + "epoch": 0.04, + "learning_rate": 4.975963350959607e-05, + "loss": 0.7761, + "step": 44271 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759622643495095e-05, + "loss": 0.7226, + "step": 44272 + }, + { + "epoch": 0.04, + "learning_rate": 4.975961177714971e-05, + "loss": 0.5996, + "step": 44273 + }, + { + "epoch": 0.04, + "learning_rate": 4.97596009105599e-05, + "loss": 1.0736, + "step": 44274 + }, + { + "epoch": 0.04, + "learning_rate": 4.975959004372568e-05, + "loss": 1.3396, + "step": 44275 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759579176647033e-05, + "loss": 1.3403, + "step": 44276 + }, + { + "epoch": 0.04, + "learning_rate": 4.975956830932398e-05, + "loss": 0.9343, + "step": 44277 + }, + { + "epoch": 0.04, + "learning_rate": 4.975955744175651e-05, + "loss": 0.6843, + "step": 44278 + }, + { + "epoch": 0.04, + "learning_rate": 4.975954657394462e-05, + "loss": 1.1279, + "step": 44279 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759535705888316e-05, + "loss": 1.0705, + "step": 44280 + }, + { + "epoch": 0.04, + "learning_rate": 4.97595248375876e-05, + "loss": 1.0829, + "step": 44281 + }, + { + "epoch": 0.04, + "learning_rate": 4.975951396904246e-05, + "loss": 0.9895, + "step": 44282 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759503100252914e-05, + "loss": 1.115, + "step": 44283 + }, + { + "epoch": 0.04, + "learning_rate": 4.975949223121895e-05, + "loss": 1.0899, + "step": 44284 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759481361940565e-05, + "loss": 0.8426, + "step": 44285 + }, + { + "epoch": 0.04, + "learning_rate": 4.975947049241777e-05, + "loss": 1.0312, + "step": 44286 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759459622650557e-05, + "loss": 1.4461, + "step": 44287 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759448752638926e-05, + "loss": 1.1075, + "step": 44288 + }, + { + "epoch": 0.04, + "learning_rate": 4.975943788238289e-05, + "loss": 1.0394, + "step": 44289 + }, + { + "epoch": 0.04, + "learning_rate": 4.975942701188243e-05, + "loss": 1.0553, + "step": 44290 + }, + { + "epoch": 0.04, + "learning_rate": 4.975941614113756e-05, + "loss": 1.3153, + "step": 44291 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759405270148274e-05, + "loss": 0.8663, + "step": 44292 + }, + { + "epoch": 0.04, + "learning_rate": 4.975939439891458e-05, + "loss": 1.183, + "step": 44293 + }, + { + "epoch": 0.04, + "learning_rate": 4.975938352743646e-05, + "loss": 1.6465, + "step": 44294 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759372655713934e-05, + "loss": 1.1937, + "step": 44295 + }, + { + "epoch": 0.04, + "learning_rate": 4.975936178374699e-05, + "loss": 1.1428, + "step": 44296 + }, + { + "epoch": 0.04, + "learning_rate": 4.975935091153564e-05, + "loss": 1.2035, + "step": 44297 + }, + { + "epoch": 0.04, + "learning_rate": 4.975934003907986e-05, + "loss": 0.9618, + "step": 44298 + }, + { + "epoch": 0.04, + "learning_rate": 4.975932916637969e-05, + "loss": 0.8727, + "step": 44299 + }, + { + "epoch": 0.04, + "learning_rate": 4.975931829343509e-05, + "loss": 0.8357, + "step": 44300 + }, + { + "epoch": 0.04, + "learning_rate": 4.975930742024607e-05, + "loss": 1.2124, + "step": 44301 + }, + { + "epoch": 0.04, + "learning_rate": 4.975929654681265e-05, + "loss": 1.0133, + "step": 44302 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759285673134815e-05, + "loss": 0.9626, + "step": 44303 + }, + { + "epoch": 0.04, + "learning_rate": 4.975927479921256e-05, + "loss": 1.3344, + "step": 44304 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759263925045897e-05, + "loss": 1.2246, + "step": 44305 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759253050634824e-05, + "loss": 0.9112, + "step": 44306 + }, + { + "epoch": 0.04, + "learning_rate": 4.975924217597933e-05, + "loss": 0.9912, + "step": 44307 + }, + { + "epoch": 0.04, + "learning_rate": 4.975923130107943e-05, + "loss": 1.0901, + "step": 44308 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759220425935116e-05, + "loss": 1.2172, + "step": 44309 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759209550546385e-05, + "loss": 1.2216, + "step": 44310 + }, + { + "epoch": 0.04, + "learning_rate": 4.975919867491324e-05, + "loss": 1.0841, + "step": 44311 + }, + { + "epoch": 0.04, + "learning_rate": 4.975918779903569e-05, + "loss": 1.3634, + "step": 44312 + }, + { + "epoch": 0.04, + "learning_rate": 4.975917692291373e-05, + "loss": 0.9954, + "step": 44313 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759166046547344e-05, + "loss": 0.8513, + "step": 44314 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759155169936554e-05, + "loss": 0.628, + "step": 44315 + }, + { + "epoch": 0.04, + "learning_rate": 4.975914429308136e-05, + "loss": 0.6733, + "step": 44316 + }, + { + "epoch": 0.04, + "learning_rate": 4.975913341598174e-05, + "loss": 0.9488, + "step": 44317 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759122538637714e-05, + "loss": 1.1872, + "step": 44318 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759111661049285e-05, + "loss": 0.8717, + "step": 44319 + }, + { + "epoch": 0.04, + "learning_rate": 4.975910078321643e-05, + "loss": 0.9229, + "step": 44320 + }, + { + "epoch": 0.04, + "learning_rate": 4.975908990513917e-05, + "loss": 1.0926, + "step": 44321 + }, + { + "epoch": 0.04, + "learning_rate": 4.97590790268175e-05, + "loss": 0.9759, + "step": 44322 + }, + { + "epoch": 0.04, + "learning_rate": 4.975906814825142e-05, + "loss": 0.8217, + "step": 44323 + }, + { + "epoch": 0.04, + "learning_rate": 4.975905726944093e-05, + "loss": 0.8467, + "step": 44324 + }, + { + "epoch": 0.04, + "learning_rate": 4.975904639038602e-05, + "loss": 0.8962, + "step": 44325 + }, + { + "epoch": 0.04, + "learning_rate": 4.97590355110867e-05, + "loss": 1.1065, + "step": 44326 + }, + { + "epoch": 0.04, + "learning_rate": 4.975902463154298e-05, + "loss": 0.598, + "step": 44327 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759013751754844e-05, + "loss": 1.3974, + "step": 44328 + }, + { + "epoch": 0.04, + "learning_rate": 4.97590028717223e-05, + "loss": 0.9934, + "step": 44329 + }, + { + "epoch": 0.04, + "learning_rate": 4.975899199144534e-05, + "loss": 1.0534, + "step": 44330 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758981110923976e-05, + "loss": 0.9054, + "step": 44331 + }, + { + "epoch": 0.04, + "learning_rate": 4.97589702301582e-05, + "loss": 1.3586, + "step": 44332 + }, + { + "epoch": 0.04, + "learning_rate": 4.975895934914801e-05, + "loss": 1.1052, + "step": 44333 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758948467893404e-05, + "loss": 0.7523, + "step": 44334 + }, + { + "epoch": 0.04, + "learning_rate": 4.975893758639441e-05, + "loss": 0.755, + "step": 44335 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758926704650985e-05, + "loss": 0.5167, + "step": 44336 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758915822663155e-05, + "loss": 1.1771, + "step": 44337 + }, + { + "epoch": 0.04, + "learning_rate": 4.975890494043092e-05, + "loss": 2.3497, + "step": 44338 + }, + { + "epoch": 0.04, + "learning_rate": 4.975889405795428e-05, + "loss": 1.7266, + "step": 44339 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758883175233216e-05, + "loss": 1.3096, + "step": 44340 + }, + { + "epoch": 0.04, + "learning_rate": 4.975887229226775e-05, + "loss": 0.9277, + "step": 44341 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758861409057874e-05, + "loss": 0.9703, + "step": 44342 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758850525603594e-05, + "loss": 1.1053, + "step": 44343 + }, + { + "epoch": 0.04, + "learning_rate": 4.97588396419049e-05, + "loss": 1.0579, + "step": 44344 + }, + { + "epoch": 0.04, + "learning_rate": 4.97588287579618e-05, + "loss": 1.1893, + "step": 44345 + }, + { + "epoch": 0.04, + "learning_rate": 4.975881787377429e-05, + "loss": 1.4656, + "step": 44346 + }, + { + "epoch": 0.04, + "learning_rate": 4.975880698934237e-05, + "loss": 0.715, + "step": 44347 + }, + { + "epoch": 0.04, + "learning_rate": 4.975879610466604e-05, + "loss": 0.8638, + "step": 44348 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758785219745305e-05, + "loss": 1.0714, + "step": 44349 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758774334580167e-05, + "loss": 0.5026, + "step": 44350 + }, + { + "epoch": 0.04, + "learning_rate": 4.975876344917061e-05, + "loss": 0.2241, + "step": 44351 + }, + { + "epoch": 0.04, + "learning_rate": 4.975875256351665e-05, + "loss": 0.1609, + "step": 44352 + }, + { + "epoch": 0.04, + "learning_rate": 4.975874167761828e-05, + "loss": 0.1384, + "step": 44353 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758730791475505e-05, + "loss": 0.1097, + "step": 44354 + }, + { + "epoch": 0.04, + "learning_rate": 4.975871990508832e-05, + "loss": 0.1432, + "step": 44355 + }, + { + "epoch": 0.04, + "learning_rate": 4.975870901845673e-05, + "loss": 0.1288, + "step": 44356 + }, + { + "epoch": 0.04, + "learning_rate": 4.975869813158074e-05, + "loss": 0.1296, + "step": 44357 + }, + { + "epoch": 0.04, + "learning_rate": 4.975868724446033e-05, + "loss": 1.1406, + "step": 44358 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758676357095514e-05, + "loss": 1.1604, + "step": 44359 + }, + { + "epoch": 0.04, + "learning_rate": 4.97586654694863e-05, + "loss": 1.2105, + "step": 44360 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758654581632665e-05, + "loss": 1.0885, + "step": 44361 + }, + { + "epoch": 0.04, + "learning_rate": 4.975864369353463e-05, + "loss": 1.0051, + "step": 44362 + }, + { + "epoch": 0.04, + "learning_rate": 4.975863280519219e-05, + "loss": 1.2788, + "step": 44363 + }, + { + "epoch": 0.04, + "learning_rate": 4.975862191660534e-05, + "loss": 0.9128, + "step": 44364 + }, + { + "epoch": 0.04, + "learning_rate": 4.975861102777408e-05, + "loss": 0.9093, + "step": 44365 + }, + { + "epoch": 0.04, + "learning_rate": 4.975860013869843e-05, + "loss": 1.0936, + "step": 44366 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758589249378356e-05, + "loss": 0.5281, + "step": 44367 + }, + { + "epoch": 0.04, + "learning_rate": 4.975857835981388e-05, + "loss": 0.2065, + "step": 44368 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758567470005e-05, + "loss": 1.1356, + "step": 44369 + }, + { + "epoch": 0.04, + "learning_rate": 4.975855657995171e-05, + "loss": 1.2061, + "step": 44370 + }, + { + "epoch": 0.04, + "learning_rate": 4.975854568965403e-05, + "loss": 0.9431, + "step": 44371 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758534799111924e-05, + "loss": 0.8633, + "step": 44372 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758523908325414e-05, + "loss": 0.714, + "step": 44373 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758513017294506e-05, + "loss": 1.2124, + "step": 44374 + }, + { + "epoch": 0.04, + "learning_rate": 4.975850212601919e-05, + "loss": 1.1215, + "step": 44375 + }, + { + "epoch": 0.04, + "learning_rate": 4.975849123449947e-05, + "loss": 1.4983, + "step": 44376 + }, + { + "epoch": 0.04, + "learning_rate": 4.975848034273534e-05, + "loss": 0.982, + "step": 44377 + }, + { + "epoch": 0.04, + "learning_rate": 4.975846945072681e-05, + "loss": 1.0833, + "step": 44378 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758458558473874e-05, + "loss": 0.9185, + "step": 44379 + }, + { + "epoch": 0.04, + "learning_rate": 4.975844766597653e-05, + "loss": 1.1255, + "step": 44380 + }, + { + "epoch": 0.04, + "learning_rate": 4.975843677323478e-05, + "loss": 0.7952, + "step": 44381 + }, + { + "epoch": 0.04, + "learning_rate": 4.975842588024863e-05, + "loss": 1.2562, + "step": 44382 + }, + { + "epoch": 0.04, + "learning_rate": 4.975841498701807e-05, + "loss": 1.1091, + "step": 44383 + }, + { + "epoch": 0.04, + "learning_rate": 4.975840409354311e-05, + "loss": 1.1255, + "step": 44384 + }, + { + "epoch": 0.04, + "learning_rate": 4.975839319982374e-05, + "loss": 1.147, + "step": 44385 + }, + { + "epoch": 0.04, + "learning_rate": 4.975838230585998e-05, + "loss": 0.9163, + "step": 44386 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758371411651794e-05, + "loss": 0.7632, + "step": 44387 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758360517199224e-05, + "loss": 0.8683, + "step": 44388 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758349622502235e-05, + "loss": 1.0161, + "step": 44389 + }, + { + "epoch": 0.04, + "learning_rate": 4.975833872756085e-05, + "loss": 1.1837, + "step": 44390 + }, + { + "epoch": 0.04, + "learning_rate": 4.975832783237506e-05, + "loss": 0.921, + "step": 44391 + }, + { + "epoch": 0.04, + "learning_rate": 4.975831693694486e-05, + "loss": 1.0609, + "step": 44392 + }, + { + "epoch": 0.04, + "learning_rate": 4.975830604127026e-05, + "loss": 1.15, + "step": 44393 + }, + { + "epoch": 0.04, + "learning_rate": 4.975829514535126e-05, + "loss": 0.9297, + "step": 44394 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758284249187854e-05, + "loss": 0.8869, + "step": 44395 + }, + { + "epoch": 0.04, + "learning_rate": 4.975827335278004e-05, + "loss": 1.0168, + "step": 44396 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758262456127835e-05, + "loss": 1.0765, + "step": 44397 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758251559231217e-05, + "loss": 1.0971, + "step": 44398 + }, + { + "epoch": 0.04, + "learning_rate": 4.97582406620902e-05, + "loss": 0.6828, + "step": 44399 + }, + { + "epoch": 0.04, + "learning_rate": 4.975822976470478e-05, + "loss": 0.9077, + "step": 44400 + }, + { + "epoch": 0.04, + "learning_rate": 4.975821886707495e-05, + "loss": 1.0426, + "step": 44401 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758207969200726e-05, + "loss": 0.8804, + "step": 44402 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758197071082096e-05, + "loss": 1.1705, + "step": 44403 + }, + { + "epoch": 0.04, + "learning_rate": 4.975818617271907e-05, + "loss": 1.0337, + "step": 44404 + }, + { + "epoch": 0.04, + "learning_rate": 4.975817527411163e-05, + "loss": 1.2102, + "step": 44405 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758164375259796e-05, + "loss": 1.1132, + "step": 44406 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758153476163555e-05, + "loss": 1.0734, + "step": 44407 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758142576822914e-05, + "loss": 1.204, + "step": 44408 + }, + { + "epoch": 0.04, + "learning_rate": 4.975813167723787e-05, + "loss": 1.074, + "step": 44409 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758120777408427e-05, + "loss": 1.144, + "step": 44410 + }, + { + "epoch": 0.04, + "learning_rate": 4.975810987733458e-05, + "loss": 1.2265, + "step": 44411 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758098977016335e-05, + "loss": 1.1079, + "step": 44412 + }, + { + "epoch": 0.04, + "learning_rate": 4.975808807645368e-05, + "loss": 1.134, + "step": 44413 + }, + { + "epoch": 0.04, + "learning_rate": 4.975807717564663e-05, + "loss": 0.9788, + "step": 44414 + }, + { + "epoch": 0.04, + "learning_rate": 4.975806627459518e-05, + "loss": 0.8611, + "step": 44415 + }, + { + "epoch": 0.04, + "learning_rate": 4.975805537329933e-05, + "loss": 1.0164, + "step": 44416 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758044471759066e-05, + "loss": 0.8108, + "step": 44417 + }, + { + "epoch": 0.04, + "learning_rate": 4.975803356997442e-05, + "loss": 1.3235, + "step": 44418 + }, + { + "epoch": 0.04, + "learning_rate": 4.975802266794536e-05, + "loss": 1.3523, + "step": 44419 + }, + { + "epoch": 0.04, + "learning_rate": 4.97580117656719e-05, + "loss": 0.9915, + "step": 44420 + }, + { + "epoch": 0.04, + "learning_rate": 4.9758000863154045e-05, + "loss": 0.7567, + "step": 44421 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757989960391784e-05, + "loss": 0.879, + "step": 44422 + }, + { + "epoch": 0.04, + "learning_rate": 4.975797905738513e-05, + "loss": 0.8727, + "step": 44423 + }, + { + "epoch": 0.04, + "learning_rate": 4.975796815413406e-05, + "loss": 1.0431, + "step": 44424 + }, + { + "epoch": 0.04, + "learning_rate": 4.975795725063861e-05, + "loss": 1.0133, + "step": 44425 + }, + { + "epoch": 0.04, + "learning_rate": 4.975794634689875e-05, + "loss": 0.85, + "step": 44426 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757935442914486e-05, + "loss": 1.5363, + "step": 44427 + }, + { + "epoch": 0.04, + "learning_rate": 4.975792453868583e-05, + "loss": 1.0249, + "step": 44428 + }, + { + "epoch": 0.04, + "learning_rate": 4.975791363421277e-05, + "loss": 0.996, + "step": 44429 + }, + { + "epoch": 0.04, + "learning_rate": 4.975790272949531e-05, + "loss": 0.6647, + "step": 44430 + }, + { + "epoch": 0.04, + "learning_rate": 4.975789182453345e-05, + "loss": 1.178, + "step": 44431 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757880919327194e-05, + "loss": 0.7118, + "step": 44432 + }, + { + "epoch": 0.04, + "learning_rate": 4.975787001387654e-05, + "loss": 1.0416, + "step": 44433 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757859108181484e-05, + "loss": 0.9892, + "step": 44434 + }, + { + "epoch": 0.04, + "learning_rate": 4.975784820224202e-05, + "loss": 0.7129, + "step": 44435 + }, + { + "epoch": 0.04, + "learning_rate": 4.975783729605817e-05, + "loss": 0.9722, + "step": 44436 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757826389629915e-05, + "loss": 0.8658, + "step": 44437 + }, + { + "epoch": 0.04, + "learning_rate": 4.975781548295726e-05, + "loss": 1.1323, + "step": 44438 + }, + { + "epoch": 0.04, + "learning_rate": 4.975780457604021e-05, + "loss": 1.1424, + "step": 44439 + }, + { + "epoch": 0.04, + "learning_rate": 4.975779366887876e-05, + "loss": 0.9862, + "step": 44440 + }, + { + "epoch": 0.04, + "learning_rate": 4.975778276147291e-05, + "loss": 1.1434, + "step": 44441 + }, + { + "epoch": 0.04, + "learning_rate": 4.975777185382266e-05, + "loss": 0.8663, + "step": 44442 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757760945928025e-05, + "loss": 0.6844, + "step": 44443 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757750037788975e-05, + "loss": 0.6476, + "step": 44444 + }, + { + "epoch": 0.04, + "learning_rate": 4.975773912940553e-05, + "loss": 0.6389, + "step": 44445 + }, + { + "epoch": 0.04, + "learning_rate": 4.97577282207777e-05, + "loss": 0.5223, + "step": 44446 + }, + { + "epoch": 0.04, + "learning_rate": 4.975771731190546e-05, + "loss": 1.2739, + "step": 44447 + }, + { + "epoch": 0.04, + "learning_rate": 4.975770640278883e-05, + "loss": 1.4555, + "step": 44448 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757695493427794e-05, + "loss": 1.3284, + "step": 44449 + }, + { + "epoch": 0.04, + "learning_rate": 4.975768458382236e-05, + "loss": 1.0855, + "step": 44450 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757673673972535e-05, + "loss": 0.7777, + "step": 44451 + }, + { + "epoch": 0.04, + "learning_rate": 4.975766276387831e-05, + "loss": 0.2085, + "step": 44452 + }, + { + "epoch": 0.04, + "learning_rate": 4.975765185353969e-05, + "loss": 1.1639, + "step": 44453 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757640942956666e-05, + "loss": 0.9765, + "step": 44454 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757630032129255e-05, + "loss": 0.9448, + "step": 44455 + }, + { + "epoch": 0.04, + "learning_rate": 4.975761912105744e-05, + "loss": 1.0741, + "step": 44456 + }, + { + "epoch": 0.04, + "learning_rate": 4.975760820974122e-05, + "loss": 1.5458, + "step": 44457 + }, + { + "epoch": 0.04, + "learning_rate": 4.975759729818063e-05, + "loss": 1.7422, + "step": 44458 + }, + { + "epoch": 0.04, + "learning_rate": 4.975758638637562e-05, + "loss": 1.62, + "step": 44459 + }, + { + "epoch": 0.04, + "learning_rate": 4.975757547432622e-05, + "loss": 1.1053, + "step": 44460 + }, + { + "epoch": 0.04, + "learning_rate": 4.975756456203242e-05, + "loss": 1.2188, + "step": 44461 + }, + { + "epoch": 0.04, + "learning_rate": 4.975755364949423e-05, + "loss": 1.0944, + "step": 44462 + }, + { + "epoch": 0.04, + "learning_rate": 4.975754273671164e-05, + "loss": 0.7203, + "step": 44463 + }, + { + "epoch": 0.04, + "learning_rate": 4.975753182368466e-05, + "loss": 0.9387, + "step": 44464 + }, + { + "epoch": 0.04, + "learning_rate": 4.975752091041328e-05, + "loss": 1.3965, + "step": 44465 + }, + { + "epoch": 0.04, + "learning_rate": 4.97575099968975e-05, + "loss": 1.2373, + "step": 44466 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757499083137324e-05, + "loss": 1.178, + "step": 44467 + }, + { + "epoch": 0.04, + "learning_rate": 4.975748816913276e-05, + "loss": 1.0698, + "step": 44468 + }, + { + "epoch": 0.04, + "learning_rate": 4.97574772548838e-05, + "loss": 1.1282, + "step": 44469 + }, + { + "epoch": 0.04, + "learning_rate": 4.975746634039044e-05, + "loss": 1.1798, + "step": 44470 + }, + { + "epoch": 0.04, + "learning_rate": 4.975745542565269e-05, + "loss": 1.4351, + "step": 44471 + }, + { + "epoch": 0.04, + "learning_rate": 4.975744451067054e-05, + "loss": 1.1219, + "step": 44472 + }, + { + "epoch": 0.04, + "learning_rate": 4.975743359544399e-05, + "loss": 0.54, + "step": 44473 + }, + { + "epoch": 0.04, + "learning_rate": 4.975742267997305e-05, + "loss": 0.5427, + "step": 44474 + }, + { + "epoch": 0.04, + "learning_rate": 4.975741176425772e-05, + "loss": 0.2614, + "step": 44475 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757400848298e-05, + "loss": 0.1312, + "step": 44476 + }, + { + "epoch": 0.04, + "learning_rate": 4.975738993209387e-05, + "loss": 0.128, + "step": 44477 + }, + { + "epoch": 0.04, + "learning_rate": 4.975737901564536e-05, + "loss": 0.0685, + "step": 44478 + }, + { + "epoch": 0.04, + "learning_rate": 4.975736809895244e-05, + "loss": 0.1192, + "step": 44479 + }, + { + "epoch": 0.04, + "learning_rate": 4.975735718201514e-05, + "loss": 0.7829, + "step": 44480 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757346264833434e-05, + "loss": 0.9829, + "step": 44481 + }, + { + "epoch": 0.04, + "learning_rate": 4.975733534740734e-05, + "loss": 1.0255, + "step": 44482 + }, + { + "epoch": 0.04, + "learning_rate": 4.975732442973685e-05, + "loss": 1.2642, + "step": 44483 + }, + { + "epoch": 0.04, + "learning_rate": 4.975731351182197e-05, + "loss": 1.0232, + "step": 44484 + }, + { + "epoch": 0.04, + "learning_rate": 4.975730259366269e-05, + "loss": 1.2657, + "step": 44485 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757291675259024e-05, + "loss": 0.9014, + "step": 44486 + }, + { + "epoch": 0.04, + "learning_rate": 4.975728075661096e-05, + "loss": 0.611, + "step": 44487 + }, + { + "epoch": 0.04, + "learning_rate": 4.97572698377185e-05, + "loss": 0.4641, + "step": 44488 + }, + { + "epoch": 0.04, + "learning_rate": 4.975725891858165e-05, + "loss": 0.642, + "step": 44489 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757247999200404e-05, + "loss": 1.117, + "step": 44490 + }, + { + "epoch": 0.04, + "learning_rate": 4.975723707957477e-05, + "loss": 1.1836, + "step": 44491 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757226159704745e-05, + "loss": 0.8819, + "step": 44492 + }, + { + "epoch": 0.04, + "learning_rate": 4.975721523959032e-05, + "loss": 1.0615, + "step": 44493 + }, + { + "epoch": 0.04, + "learning_rate": 4.975720431923151e-05, + "loss": 1.2509, + "step": 44494 + }, + { + "epoch": 0.04, + "learning_rate": 4.97571933986283e-05, + "loss": 1.1723, + "step": 44495 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757182477780693e-05, + "loss": 0.7287, + "step": 44496 + }, + { + "epoch": 0.04, + "learning_rate": 4.975717155668871e-05, + "loss": 1.3351, + "step": 44497 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757160635352315e-05, + "loss": 1.3256, + "step": 44498 + }, + { + "epoch": 0.04, + "learning_rate": 4.975714971377154e-05, + "loss": 1.028, + "step": 44499 + }, + { + "epoch": 0.04, + "learning_rate": 4.975713879194637e-05, + "loss": 1.1134, + "step": 44500 + }, + { + "epoch": 0.04, + "eval_loss": 1.0934947729110718, + "eval_runtime": 129.2656, + "eval_samples_per_second": 10.714, + "eval_steps_per_second": 5.361, + "step": 44500 + }, + { + "epoch": 0.04, + "learning_rate": 4.975712786987681e-05, + "loss": 1.1821, + "step": 44501 + }, + { + "epoch": 0.04, + "learning_rate": 4.975711694756285e-05, + "loss": 1.004, + "step": 44502 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757106025004516e-05, + "loss": 0.9842, + "step": 44503 + }, + { + "epoch": 0.04, + "learning_rate": 4.975709510220178e-05, + "loss": 0.6816, + "step": 44504 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757084179154646e-05, + "loss": 0.6615, + "step": 44505 + }, + { + "epoch": 0.04, + "learning_rate": 4.975707325586313e-05, + "loss": 1.1789, + "step": 44506 + }, + { + "epoch": 0.04, + "learning_rate": 4.975706233232721e-05, + "loss": 1.1939, + "step": 44507 + }, + { + "epoch": 0.04, + "learning_rate": 4.975705140854691e-05, + "loss": 1.0176, + "step": 44508 + }, + { + "epoch": 0.04, + "learning_rate": 4.975704048452221e-05, + "loss": 1.1917, + "step": 44509 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757029560253135e-05, + "loss": 1.3303, + "step": 44510 + }, + { + "epoch": 0.04, + "learning_rate": 4.975701863573966e-05, + "loss": 1.2063, + "step": 44511 + }, + { + "epoch": 0.04, + "learning_rate": 4.975700771098179e-05, + "loss": 1.0244, + "step": 44512 + }, + { + "epoch": 0.04, + "learning_rate": 4.975699678597953e-05, + "loss": 0.8673, + "step": 44513 + }, + { + "epoch": 0.04, + "learning_rate": 4.975698586073288e-05, + "loss": 0.9546, + "step": 44514 + }, + { + "epoch": 0.04, + "learning_rate": 4.975697493524184e-05, + "loss": 1.0203, + "step": 44515 + }, + { + "epoch": 0.04, + "learning_rate": 4.975696400950641e-05, + "loss": 0.9554, + "step": 44516 + }, + { + "epoch": 0.04, + "learning_rate": 4.975695308352659e-05, + "loss": 0.8055, + "step": 44517 + }, + { + "epoch": 0.04, + "learning_rate": 4.975694215730238e-05, + "loss": 1.214, + "step": 44518 + }, + { + "epoch": 0.04, + "learning_rate": 4.975693123083378e-05, + "loss": 1.1121, + "step": 44519 + }, + { + "epoch": 0.04, + "learning_rate": 4.975692030412079e-05, + "loss": 0.8523, + "step": 44520 + }, + { + "epoch": 0.04, + "learning_rate": 4.975690937716341e-05, + "loss": 1.0666, + "step": 44521 + }, + { + "epoch": 0.04, + "learning_rate": 4.975689844996164e-05, + "loss": 1.1977, + "step": 44522 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756887522515485e-05, + "loss": 1.0113, + "step": 44523 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756876594824936e-05, + "loss": 1.2196, + "step": 44524 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756865666889994e-05, + "loss": 1.3104, + "step": 44525 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756854738710666e-05, + "loss": 1.018, + "step": 44526 + }, + { + "epoch": 0.04, + "learning_rate": 4.975684381028695e-05, + "loss": 0.8012, + "step": 44527 + }, + { + "epoch": 0.04, + "learning_rate": 4.975683288161884e-05, + "loss": 1.4019, + "step": 44528 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756821952706354e-05, + "loss": 0.8504, + "step": 44529 + }, + { + "epoch": 0.04, + "learning_rate": 4.975681102354947e-05, + "loss": 0.7718, + "step": 44530 + }, + { + "epoch": 0.04, + "learning_rate": 4.975680009414819e-05, + "loss": 0.8842, + "step": 44531 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756789164502534e-05, + "loss": 0.9013, + "step": 44532 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756778234612484e-05, + "loss": 1.1925, + "step": 44533 + }, + { + "epoch": 0.04, + "learning_rate": 4.975676730447805e-05, + "loss": 1.0736, + "step": 44534 + }, + { + "epoch": 0.04, + "learning_rate": 4.975675637409922e-05, + "loss": 1.1944, + "step": 44535 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756745443476006e-05, + "loss": 1.8551, + "step": 44536 + }, + { + "epoch": 0.04, + "learning_rate": 4.97567345126084e-05, + "loss": 1.8024, + "step": 44537 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756723581496406e-05, + "loss": 1.2288, + "step": 44538 + }, + { + "epoch": 0.04, + "learning_rate": 4.975671265014003e-05, + "loss": 0.7171, + "step": 44539 + }, + { + "epoch": 0.04, + "learning_rate": 4.975670171853926e-05, + "loss": 0.9267, + "step": 44540 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756690786694105e-05, + "loss": 0.9044, + "step": 44541 + }, + { + "epoch": 0.04, + "learning_rate": 4.975667985460456e-05, + "loss": 1.3449, + "step": 44542 + }, + { + "epoch": 0.04, + "learning_rate": 4.975666892227063e-05, + "loss": 1.1153, + "step": 44543 + }, + { + "epoch": 0.04, + "learning_rate": 4.975665798969231e-05, + "loss": 1.0271, + "step": 44544 + }, + { + "epoch": 0.04, + "learning_rate": 4.975664705686961e-05, + "loss": 0.9081, + "step": 44545 + }, + { + "epoch": 0.04, + "learning_rate": 4.975663612380252e-05, + "loss": 1.0335, + "step": 44546 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756625190491034e-05, + "loss": 1.0113, + "step": 44547 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756614256935175e-05, + "loss": 1.0543, + "step": 44548 + }, + { + "epoch": 0.04, + "learning_rate": 4.975660332313492e-05, + "loss": 1.1105, + "step": 44549 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756592389090285e-05, + "loss": 1.0074, + "step": 44550 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756581454801255e-05, + "loss": 0.868, + "step": 44551 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756570520267845e-05, + "loss": 1.045, + "step": 44552 + }, + { + "epoch": 0.04, + "learning_rate": 4.975655958549004e-05, + "loss": 0.7093, + "step": 44553 + }, + { + "epoch": 0.04, + "learning_rate": 4.975654865046786e-05, + "loss": 0.8159, + "step": 44554 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756537715201286e-05, + "loss": 0.9377, + "step": 44555 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756526779690326e-05, + "loss": 1.1757, + "step": 44556 + }, + { + "epoch": 0.04, + "learning_rate": 4.975651584393498e-05, + "loss": 0.8202, + "step": 44557 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756504907935254e-05, + "loss": 0.9256, + "step": 44558 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756493971691136e-05, + "loss": 0.9236, + "step": 44559 + }, + { + "epoch": 0.04, + "learning_rate": 4.975648303520264e-05, + "loss": 0.8721, + "step": 44560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756472098469756e-05, + "loss": 1.0116, + "step": 44561 + }, + { + "epoch": 0.04, + "learning_rate": 4.975646116149247e-05, + "loss": 1.0285, + "step": 44562 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756450224270825e-05, + "loss": 1.2667, + "step": 44563 + }, + { + "epoch": 0.04, + "learning_rate": 4.975643928680478e-05, + "loss": 1.0311, + "step": 44564 + }, + { + "epoch": 0.04, + "learning_rate": 4.975642834909435e-05, + "loss": 0.9458, + "step": 44565 + }, + { + "epoch": 0.04, + "learning_rate": 4.975641741113953e-05, + "loss": 1.1597, + "step": 44566 + }, + { + "epoch": 0.04, + "learning_rate": 4.975640647294034e-05, + "loss": 0.9621, + "step": 44567 + }, + { + "epoch": 0.04, + "learning_rate": 4.975639553449676e-05, + "loss": 0.8551, + "step": 44568 + }, + { + "epoch": 0.04, + "learning_rate": 4.975638459580879e-05, + "loss": 0.5337, + "step": 44569 + }, + { + "epoch": 0.04, + "learning_rate": 4.975637365687643e-05, + "loss": 0.3477, + "step": 44570 + }, + { + "epoch": 0.04, + "learning_rate": 4.97563627176997e-05, + "loss": 0.3831, + "step": 44571 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756351778278574e-05, + "loss": 0.715, + "step": 44572 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756340838613066e-05, + "loss": 1.4877, + "step": 44573 + }, + { + "epoch": 0.04, + "learning_rate": 4.975632989870318e-05, + "loss": 0.8682, + "step": 44574 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756318958548906e-05, + "loss": 1.027, + "step": 44575 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756308018150254e-05, + "loss": 0.4843, + "step": 44576 + }, + { + "epoch": 0.04, + "learning_rate": 4.975629707750721e-05, + "loss": 0.3178, + "step": 44577 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756286136619786e-05, + "loss": 1.4994, + "step": 44578 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756275195487976e-05, + "loss": 1.1377, + "step": 44579 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756264254111786e-05, + "loss": 0.9389, + "step": 44580 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756253312491205e-05, + "loss": 0.9669, + "step": 44581 + }, + { + "epoch": 0.04, + "learning_rate": 4.975624237062625e-05, + "loss": 1.0074, + "step": 44582 + }, + { + "epoch": 0.04, + "learning_rate": 4.975623142851691e-05, + "loss": 0.8625, + "step": 44583 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756220486163184e-05, + "loss": 0.9084, + "step": 44584 + }, + { + "epoch": 0.04, + "learning_rate": 4.975620954356507e-05, + "loss": 0.7655, + "step": 44585 + }, + { + "epoch": 0.04, + "learning_rate": 4.975619860072258e-05, + "loss": 1.0475, + "step": 44586 + }, + { + "epoch": 0.04, + "learning_rate": 4.975618765763571e-05, + "loss": 0.9622, + "step": 44587 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756176714304454e-05, + "loss": 1.321, + "step": 44588 + }, + { + "epoch": 0.04, + "learning_rate": 4.975616577072881e-05, + "loss": 1.2546, + "step": 44589 + }, + { + "epoch": 0.04, + "learning_rate": 4.97561548269088e-05, + "loss": 1.0696, + "step": 44590 + }, + { + "epoch": 0.04, + "learning_rate": 4.975614388284438e-05, + "loss": 1.2378, + "step": 44591 + }, + { + "epoch": 0.04, + "learning_rate": 4.97561329385356e-05, + "loss": 1.2604, + "step": 44592 + }, + { + "epoch": 0.04, + "learning_rate": 4.975612199398244e-05, + "loss": 0.9712, + "step": 44593 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756111049184885e-05, + "loss": 1.201, + "step": 44594 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756100104142955e-05, + "loss": 1.5143, + "step": 44595 + }, + { + "epoch": 0.04, + "learning_rate": 4.975608915885664e-05, + "loss": 0.7694, + "step": 44596 + }, + { + "epoch": 0.04, + "learning_rate": 4.975607821332594e-05, + "loss": 1.4325, + "step": 44597 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756067267550867e-05, + "loss": 1.1435, + "step": 44598 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756056321531405e-05, + "loss": 1.0175, + "step": 44599 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756045375267565e-05, + "loss": 0.7461, + "step": 44600 + }, + { + "epoch": 0.04, + "learning_rate": 4.9756034428759346e-05, + "loss": 1.0247, + "step": 44601 + }, + { + "epoch": 0.04, + "learning_rate": 4.975602348200675e-05, + "loss": 1.0005, + "step": 44602 + }, + { + "epoch": 0.04, + "learning_rate": 4.975601253500977e-05, + "loss": 0.8307, + "step": 44603 + }, + { + "epoch": 0.04, + "learning_rate": 4.97560015877684e-05, + "loss": 0.7061, + "step": 44604 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755990640282656e-05, + "loss": 0.5068, + "step": 44605 + }, + { + "epoch": 0.04, + "learning_rate": 4.975597969255253e-05, + "loss": 1.2718, + "step": 44606 + }, + { + "epoch": 0.04, + "learning_rate": 4.975596874457802e-05, + "loss": 1.0863, + "step": 44607 + }, + { + "epoch": 0.04, + "learning_rate": 4.975595779635913e-05, + "loss": 0.7542, + "step": 44608 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755946847895865e-05, + "loss": 1.8435, + "step": 44609 + }, + { + "epoch": 0.04, + "learning_rate": 4.975593589918822e-05, + "loss": 1.2309, + "step": 44610 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755924950236195e-05, + "loss": 1.2967, + "step": 44611 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755914001039785e-05, + "loss": 1.0971, + "step": 44612 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755903051598995e-05, + "loss": 0.7682, + "step": 44613 + }, + { + "epoch": 0.04, + "learning_rate": 4.975589210191383e-05, + "loss": 0.8004, + "step": 44614 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755881151984284e-05, + "loss": 0.9758, + "step": 44615 + }, + { + "epoch": 0.04, + "learning_rate": 4.975587020181036e-05, + "loss": 0.699, + "step": 44616 + }, + { + "epoch": 0.04, + "learning_rate": 4.975585925139205e-05, + "loss": 0.9747, + "step": 44617 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755848300729365e-05, + "loss": 0.9779, + "step": 44618 + }, + { + "epoch": 0.04, + "learning_rate": 4.97558373498223e-05, + "loss": 0.8999, + "step": 44619 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755826398670856e-05, + "loss": 0.8572, + "step": 44620 + }, + { + "epoch": 0.04, + "learning_rate": 4.975581544727503e-05, + "loss": 0.9102, + "step": 44621 + }, + { + "epoch": 0.04, + "learning_rate": 4.975580449563483e-05, + "loss": 0.8558, + "step": 44622 + }, + { + "epoch": 0.04, + "learning_rate": 4.975579354375025e-05, + "loss": 1.1688, + "step": 44623 + }, + { + "epoch": 0.04, + "learning_rate": 4.975578259162129e-05, + "loss": 1.1068, + "step": 44624 + }, + { + "epoch": 0.04, + "learning_rate": 4.975577163924795e-05, + "loss": 1.1215, + "step": 44625 + }, + { + "epoch": 0.04, + "learning_rate": 4.975576068663023e-05, + "loss": 1.1038, + "step": 44626 + }, + { + "epoch": 0.04, + "learning_rate": 4.975574973376814e-05, + "loss": 1.0779, + "step": 44627 + }, + { + "epoch": 0.04, + "learning_rate": 4.975573878066167e-05, + "loss": 1.0511, + "step": 44628 + }, + { + "epoch": 0.04, + "learning_rate": 4.975572782731081e-05, + "loss": 0.8964, + "step": 44629 + }, + { + "epoch": 0.04, + "learning_rate": 4.975571687371558e-05, + "loss": 1.0021, + "step": 44630 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755705919875974e-05, + "loss": 0.6134, + "step": 44631 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755694965791987e-05, + "loss": 0.8286, + "step": 44632 + }, + { + "epoch": 0.04, + "learning_rate": 4.975568401146362e-05, + "loss": 0.5051, + "step": 44633 + }, + { + "epoch": 0.04, + "learning_rate": 4.975567305689088e-05, + "loss": 0.6416, + "step": 44634 + }, + { + "epoch": 0.04, + "learning_rate": 4.975566210207376e-05, + "loss": 0.509, + "step": 44635 + }, + { + "epoch": 0.04, + "learning_rate": 4.975565114701226e-05, + "loss": 0.9992, + "step": 44636 + }, + { + "epoch": 0.04, + "learning_rate": 4.975564019170639e-05, + "loss": 1.0517, + "step": 44637 + }, + { + "epoch": 0.04, + "learning_rate": 4.975562923615614e-05, + "loss": 1.1653, + "step": 44638 + }, + { + "epoch": 0.04, + "learning_rate": 4.975561828036151e-05, + "loss": 1.3733, + "step": 44639 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755607324322506e-05, + "loss": 1.1836, + "step": 44640 + }, + { + "epoch": 0.04, + "learning_rate": 4.975559636803913e-05, + "loss": 0.7331, + "step": 44641 + }, + { + "epoch": 0.04, + "learning_rate": 4.975558541151136e-05, + "loss": 1.1572, + "step": 44642 + }, + { + "epoch": 0.04, + "learning_rate": 4.975557445473923e-05, + "loss": 1.0862, + "step": 44643 + }, + { + "epoch": 0.04, + "learning_rate": 4.975556349772271e-05, + "loss": 1.1315, + "step": 44644 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755552540461824e-05, + "loss": 1.243, + "step": 44645 + }, + { + "epoch": 0.04, + "learning_rate": 4.975554158295656e-05, + "loss": 1.1664, + "step": 44646 + }, + { + "epoch": 0.04, + "learning_rate": 4.975553062520692e-05, + "loss": 1.2336, + "step": 44647 + }, + { + "epoch": 0.04, + "learning_rate": 4.97555196672129e-05, + "loss": 1.1135, + "step": 44648 + }, + { + "epoch": 0.04, + "learning_rate": 4.975550870897451e-05, + "loss": 1.0859, + "step": 44649 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755497750491744e-05, + "loss": 0.7014, + "step": 44650 + }, + { + "epoch": 0.04, + "learning_rate": 4.975548679176459e-05, + "loss": 0.9634, + "step": 44651 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755475832793076e-05, + "loss": 1.0242, + "step": 44652 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755464873577174e-05, + "loss": 0.8344, + "step": 44653 + }, + { + "epoch": 0.04, + "learning_rate": 4.97554539141169e-05, + "loss": 0.8389, + "step": 44654 + }, + { + "epoch": 0.04, + "learning_rate": 4.975544295441226e-05, + "loss": 0.7255, + "step": 44655 + }, + { + "epoch": 0.04, + "learning_rate": 4.975543199446323e-05, + "loss": 0.5986, + "step": 44656 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755421034269834e-05, + "loss": 1.1447, + "step": 44657 + }, + { + "epoch": 0.04, + "learning_rate": 4.975541007383206e-05, + "loss": 0.9064, + "step": 44658 + }, + { + "epoch": 0.04, + "learning_rate": 4.975539911314991e-05, + "loss": 0.9572, + "step": 44659 + }, + { + "epoch": 0.04, + "learning_rate": 4.975538815222339e-05, + "loss": 0.9671, + "step": 44660 + }, + { + "epoch": 0.04, + "learning_rate": 4.975537719105249e-05, + "loss": 0.6614, + "step": 44661 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755366229637213e-05, + "loss": 1.3022, + "step": 44662 + }, + { + "epoch": 0.04, + "learning_rate": 4.975535526797757e-05, + "loss": 1.1474, + "step": 44663 + }, + { + "epoch": 0.04, + "learning_rate": 4.975534430607355e-05, + "loss": 1.0395, + "step": 44664 + }, + { + "epoch": 0.04, + "learning_rate": 4.975533334392515e-05, + "loss": 0.6726, + "step": 44665 + }, + { + "epoch": 0.04, + "learning_rate": 4.975532238153238e-05, + "loss": 0.8152, + "step": 44666 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755311418895236e-05, + "loss": 0.8524, + "step": 44667 + }, + { + "epoch": 0.04, + "learning_rate": 4.975530045601372e-05, + "loss": 0.7359, + "step": 44668 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755289492887826e-05, + "loss": 0.9296, + "step": 44669 + }, + { + "epoch": 0.04, + "learning_rate": 4.975527852951756e-05, + "loss": 0.9121, + "step": 44670 + }, + { + "epoch": 0.04, + "learning_rate": 4.975526756590292e-05, + "loss": 1.1387, + "step": 44671 + }, + { + "epoch": 0.04, + "learning_rate": 4.975525660204391e-05, + "loss": 1.3952, + "step": 44672 + }, + { + "epoch": 0.04, + "learning_rate": 4.975524563794052e-05, + "loss": 1.3147, + "step": 44673 + }, + { + "epoch": 0.04, + "learning_rate": 4.975523467359276e-05, + "loss": 1.3324, + "step": 44674 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755223709000625e-05, + "loss": 1.2069, + "step": 44675 + }, + { + "epoch": 0.04, + "learning_rate": 4.975521274416412e-05, + "loss": 1.1597, + "step": 44676 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755201779083236e-05, + "loss": 1.0615, + "step": 44677 + }, + { + "epoch": 0.04, + "learning_rate": 4.975519081375799e-05, + "loss": 0.8524, + "step": 44678 + }, + { + "epoch": 0.04, + "learning_rate": 4.975517984818836e-05, + "loss": 1.0143, + "step": 44679 + }, + { + "epoch": 0.04, + "learning_rate": 4.975516888237437e-05, + "loss": 1.131, + "step": 44680 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755157916316e-05, + "loss": 0.7606, + "step": 44681 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755146950013255e-05, + "loss": 0.3431, + "step": 44682 + }, + { + "epoch": 0.04, + "learning_rate": 4.975513598346614e-05, + "loss": 0.2043, + "step": 44683 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755125016674646e-05, + "loss": 0.2056, + "step": 44684 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755114049638786e-05, + "loss": 0.8166, + "step": 44685 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755103082358554e-05, + "loss": 1.06, + "step": 44686 + }, + { + "epoch": 0.04, + "learning_rate": 4.975509211483395e-05, + "loss": 0.8609, + "step": 44687 + }, + { + "epoch": 0.04, + "learning_rate": 4.975508114706497e-05, + "loss": 1.3636, + "step": 44688 + }, + { + "epoch": 0.04, + "learning_rate": 4.975507017905163e-05, + "loss": 1.233, + "step": 44689 + }, + { + "epoch": 0.04, + "learning_rate": 4.975505921079391e-05, + "loss": 0.9506, + "step": 44690 + }, + { + "epoch": 0.04, + "learning_rate": 4.975504824229182e-05, + "loss": 1.2235, + "step": 44691 + }, + { + "epoch": 0.04, + "learning_rate": 4.9755037273545355e-05, + "loss": 1.031, + "step": 44692 + }, + { + "epoch": 0.04, + "learning_rate": 4.975502630455452e-05, + "loss": 0.9867, + "step": 44693 + }, + { + "epoch": 0.04, + "learning_rate": 4.975501533531932e-05, + "loss": 1.0148, + "step": 44694 + }, + { + "epoch": 0.04, + "learning_rate": 4.975500436583974e-05, + "loss": 1.0594, + "step": 44695 + }, + { + "epoch": 0.04, + "learning_rate": 4.97549933961158e-05, + "loss": 0.9252, + "step": 44696 + }, + { + "epoch": 0.04, + "learning_rate": 4.975498242614748e-05, + "loss": 1.0979, + "step": 44697 + }, + { + "epoch": 0.04, + "learning_rate": 4.97549714559348e-05, + "loss": 0.9666, + "step": 44698 + }, + { + "epoch": 0.04, + "learning_rate": 4.975496048547773e-05, + "loss": 0.9766, + "step": 44699 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754949514776306e-05, + "loss": 1.0011, + "step": 44700 + }, + { + "epoch": 0.04, + "learning_rate": 4.97549385438305e-05, + "loss": 0.9986, + "step": 44701 + }, + { + "epoch": 0.04, + "learning_rate": 4.975492757264033e-05, + "loss": 1.0975, + "step": 44702 + }, + { + "epoch": 0.04, + "learning_rate": 4.975491660120579e-05, + "loss": 0.9012, + "step": 44703 + }, + { + "epoch": 0.04, + "learning_rate": 4.975490562952688e-05, + "loss": 1.2465, + "step": 44704 + }, + { + "epoch": 0.04, + "learning_rate": 4.97548946576036e-05, + "loss": 0.7686, + "step": 44705 + }, + { + "epoch": 0.04, + "learning_rate": 4.975488368543595e-05, + "loss": 0.6763, + "step": 44706 + }, + { + "epoch": 0.04, + "learning_rate": 4.975487271302393e-05, + "loss": 0.6651, + "step": 44707 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754861740367545e-05, + "loss": 0.9087, + "step": 44708 + }, + { + "epoch": 0.04, + "learning_rate": 4.975485076746679e-05, + "loss": 0.7607, + "step": 44709 + }, + { + "epoch": 0.04, + "learning_rate": 4.975483979432165e-05, + "loss": 1.0558, + "step": 44710 + }, + { + "epoch": 0.04, + "learning_rate": 4.975482882093215e-05, + "loss": 1.0221, + "step": 44711 + }, + { + "epoch": 0.04, + "learning_rate": 4.975481784729829e-05, + "loss": 0.878, + "step": 44712 + }, + { + "epoch": 0.04, + "learning_rate": 4.975480687342006e-05, + "loss": 1.1327, + "step": 44713 + }, + { + "epoch": 0.04, + "learning_rate": 4.975479589929745e-05, + "loss": 0.9941, + "step": 44714 + }, + { + "epoch": 0.04, + "learning_rate": 4.975478492493048e-05, + "loss": 0.9818, + "step": 44715 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754773950319135e-05, + "loss": 1.15, + "step": 44716 + }, + { + "epoch": 0.04, + "learning_rate": 4.975476297546342e-05, + "loss": 1.0272, + "step": 44717 + }, + { + "epoch": 0.04, + "learning_rate": 4.975475200036334e-05, + "loss": 1.1155, + "step": 44718 + }, + { + "epoch": 0.04, + "learning_rate": 4.975474102501889e-05, + "loss": 0.5805, + "step": 44719 + }, + { + "epoch": 0.04, + "learning_rate": 4.975473004943007e-05, + "loss": 0.8576, + "step": 44720 + }, + { + "epoch": 0.04, + "learning_rate": 4.975471907359689e-05, + "loss": 1.5823, + "step": 44721 + }, + { + "epoch": 0.04, + "learning_rate": 4.975470809751933e-05, + "loss": 1.0981, + "step": 44722 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754697121197416e-05, + "loss": 1.9315, + "step": 44723 + }, + { + "epoch": 0.04, + "learning_rate": 4.975468614463112e-05, + "loss": 1.1256, + "step": 44724 + }, + { + "epoch": 0.04, + "learning_rate": 4.975467516782046e-05, + "loss": 1.1036, + "step": 44725 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754664190765435e-05, + "loss": 1.0376, + "step": 44726 + }, + { + "epoch": 0.04, + "learning_rate": 4.975465321346604e-05, + "loss": 1.0951, + "step": 44727 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754642235922286e-05, + "loss": 1.142, + "step": 44728 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754631258134156e-05, + "loss": 1.2358, + "step": 44729 + }, + { + "epoch": 0.04, + "learning_rate": 4.975462028010166e-05, + "loss": 1.0066, + "step": 44730 + }, + { + "epoch": 0.04, + "learning_rate": 4.975460930182479e-05, + "loss": 0.9037, + "step": 44731 + }, + { + "epoch": 0.04, + "learning_rate": 4.975459832330357e-05, + "loss": 0.6867, + "step": 44732 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754587344537974e-05, + "loss": 0.3736, + "step": 44733 + }, + { + "epoch": 0.04, + "learning_rate": 4.975457636552801e-05, + "loss": 1.5164, + "step": 44734 + }, + { + "epoch": 0.04, + "learning_rate": 4.975456538627368e-05, + "loss": 1.294, + "step": 44735 + }, + { + "epoch": 0.04, + "learning_rate": 4.975455440677498e-05, + "loss": 1.0131, + "step": 44736 + }, + { + "epoch": 0.04, + "learning_rate": 4.975454342703192e-05, + "loss": 1.418, + "step": 44737 + }, + { + "epoch": 0.04, + "learning_rate": 4.975453244704449e-05, + "loss": 0.9046, + "step": 44738 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754521466812695e-05, + "loss": 0.8484, + "step": 44739 + }, + { + "epoch": 0.04, + "learning_rate": 4.975451048633654e-05, + "loss": 0.6592, + "step": 44740 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754499505616006e-05, + "loss": 0.7011, + "step": 44741 + }, + { + "epoch": 0.04, + "learning_rate": 4.975448852465111e-05, + "loss": 0.7187, + "step": 44742 + }, + { + "epoch": 0.04, + "learning_rate": 4.975447754344185e-05, + "loss": 0.6957, + "step": 44743 + }, + { + "epoch": 0.04, + "learning_rate": 4.975446656198822e-05, + "loss": 0.5661, + "step": 44744 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754455580290234e-05, + "loss": 0.6814, + "step": 44745 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754444598347875e-05, + "loss": 0.5777, + "step": 44746 + }, + { + "epoch": 0.04, + "learning_rate": 4.975443361616116e-05, + "loss": 0.8581, + "step": 44747 + }, + { + "epoch": 0.04, + "learning_rate": 4.975442263373007e-05, + "loss": 0.9126, + "step": 44748 + }, + { + "epoch": 0.04, + "learning_rate": 4.975441165105461e-05, + "loss": 1.0361, + "step": 44749 + }, + { + "epoch": 0.04, + "learning_rate": 4.975440066813479e-05, + "loss": 1.0008, + "step": 44750 + }, + { + "epoch": 0.04, + "learning_rate": 4.975438968497062e-05, + "loss": 1.3096, + "step": 44751 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754378701562065e-05, + "loss": 1.2048, + "step": 44752 + }, + { + "epoch": 0.04, + "learning_rate": 4.975436771790915e-05, + "loss": 0.9418, + "step": 44753 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754356734011876e-05, + "loss": 1.4237, + "step": 44754 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754345749870234e-05, + "loss": 1.1344, + "step": 44755 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754334765484225e-05, + "loss": 0.9671, + "step": 44756 + }, + { + "epoch": 0.04, + "learning_rate": 4.975432378085385e-05, + "loss": 0.6821, + "step": 44757 + }, + { + "epoch": 0.04, + "learning_rate": 4.975431279597912e-05, + "loss": 1.0775, + "step": 44758 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754301810860014e-05, + "loss": 1.2316, + "step": 44759 + }, + { + "epoch": 0.04, + "learning_rate": 4.975429082549655e-05, + "loss": 0.9194, + "step": 44760 + }, + { + "epoch": 0.04, + "learning_rate": 4.975427983988873e-05, + "loss": 1.0207, + "step": 44761 + }, + { + "epoch": 0.04, + "learning_rate": 4.975426885403653e-05, + "loss": 0.7967, + "step": 44762 + }, + { + "epoch": 0.04, + "learning_rate": 4.975425786793998e-05, + "loss": 1.0806, + "step": 44763 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754246881599065e-05, + "loss": 1.3228, + "step": 44764 + }, + { + "epoch": 0.04, + "learning_rate": 4.975423589501378e-05, + "loss": 0.9707, + "step": 44765 + }, + { + "epoch": 0.04, + "learning_rate": 4.975422490818413e-05, + "loss": 1.0923, + "step": 44766 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754213921110127e-05, + "loss": 0.9895, + "step": 44767 + }, + { + "epoch": 0.04, + "learning_rate": 4.975420293379176e-05, + "loss": 1.0864, + "step": 44768 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754191946229024e-05, + "loss": 1.3676, + "step": 44769 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754180958421924e-05, + "loss": 0.9547, + "step": 44770 + }, + { + "epoch": 0.04, + "learning_rate": 4.975416997037046e-05, + "loss": 0.2065, + "step": 44771 + }, + { + "epoch": 0.04, + "learning_rate": 4.975415898207464e-05, + "loss": 0.1494, + "step": 44772 + }, + { + "epoch": 0.04, + "learning_rate": 4.975414799353445e-05, + "loss": 0.2069, + "step": 44773 + }, + { + "epoch": 0.04, + "learning_rate": 4.97541370047499e-05, + "loss": 0.1593, + "step": 44774 + }, + { + "epoch": 0.04, + "learning_rate": 4.975412601572099e-05, + "loss": 0.4166, + "step": 44775 + }, + { + "epoch": 0.04, + "learning_rate": 4.975411502644772e-05, + "loss": 1.2356, + "step": 44776 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754104036930084e-05, + "loss": 2.1477, + "step": 44777 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754093047168086e-05, + "loss": 1.3884, + "step": 44778 + }, + { + "epoch": 0.04, + "learning_rate": 4.975408205716173e-05, + "loss": 1.0473, + "step": 44779 + }, + { + "epoch": 0.04, + "learning_rate": 4.975407106691101e-05, + "loss": 0.8656, + "step": 44780 + }, + { + "epoch": 0.04, + "learning_rate": 4.975406007641592e-05, + "loss": 0.6559, + "step": 44781 + }, + { + "epoch": 0.04, + "learning_rate": 4.975404908567648e-05, + "loss": 0.9071, + "step": 44782 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754038094692675e-05, + "loss": 1.2203, + "step": 44783 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754027103464504e-05, + "loss": 1.0669, + "step": 44784 + }, + { + "epoch": 0.04, + "learning_rate": 4.9754016111991974e-05, + "loss": 0.9295, + "step": 44785 + }, + { + "epoch": 0.04, + "learning_rate": 4.975400512027508e-05, + "loss": 0.8734, + "step": 44786 + }, + { + "epoch": 0.04, + "learning_rate": 4.975399412831384e-05, + "loss": 1.1031, + "step": 44787 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753983136108225e-05, + "loss": 0.2647, + "step": 44788 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753972143658247e-05, + "loss": 0.1885, + "step": 44789 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753961150963916e-05, + "loss": 0.1141, + "step": 44790 + }, + { + "epoch": 0.04, + "learning_rate": 4.975395015802522e-05, + "loss": 0.1346, + "step": 44791 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753939164842164e-05, + "loss": 0.0499, + "step": 44792 + }, + { + "epoch": 0.04, + "learning_rate": 4.975392817141475e-05, + "loss": 0.0695, + "step": 44793 + }, + { + "epoch": 0.04, + "learning_rate": 4.975391717774297e-05, + "loss": 0.2828, + "step": 44794 + }, + { + "epoch": 0.04, + "learning_rate": 4.975390618382684e-05, + "loss": 0.4614, + "step": 44795 + }, + { + "epoch": 0.04, + "learning_rate": 4.975389518966634e-05, + "loss": 1.0854, + "step": 44796 + }, + { + "epoch": 0.04, + "learning_rate": 4.975388419526148e-05, + "loss": 1.1155, + "step": 44797 + }, + { + "epoch": 0.04, + "learning_rate": 4.975387320061226e-05, + "loss": 1.0671, + "step": 44798 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753862205718696e-05, + "loss": 0.6711, + "step": 44799 + }, + { + "epoch": 0.04, + "learning_rate": 4.975385121058076e-05, + "loss": 0.309, + "step": 44800 + }, + { + "epoch": 0.04, + "learning_rate": 4.975384021519846e-05, + "loss": 0.3785, + "step": 44801 + }, + { + "epoch": 0.04, + "learning_rate": 4.97538292195718e-05, + "loss": 0.2926, + "step": 44802 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753818223700785e-05, + "loss": 0.3417, + "step": 44803 + }, + { + "epoch": 0.04, + "learning_rate": 4.975380722758542e-05, + "loss": 0.5569, + "step": 44804 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753796231225683e-05, + "loss": 0.2783, + "step": 44805 + }, + { + "epoch": 0.04, + "learning_rate": 4.975378523462159e-05, + "loss": 0.5899, + "step": 44806 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753774237773146e-05, + "loss": 0.5167, + "step": 44807 + }, + { + "epoch": 0.04, + "learning_rate": 4.975376324068033e-05, + "loss": 0.4065, + "step": 44808 + }, + { + "epoch": 0.04, + "learning_rate": 4.975375224334317e-05, + "loss": 0.4748, + "step": 44809 + }, + { + "epoch": 0.04, + "learning_rate": 4.975374124576164e-05, + "loss": 0.1819, + "step": 44810 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753730247935746e-05, + "loss": 0.5018, + "step": 44811 + }, + { + "epoch": 0.04, + "learning_rate": 4.975371924986551e-05, + "loss": 0.6016, + "step": 44812 + }, + { + "epoch": 0.04, + "learning_rate": 4.975370825155091e-05, + "loss": 0.3445, + "step": 44813 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753697252991946e-05, + "loss": 0.2138, + "step": 44814 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753686254188624e-05, + "loss": 0.329, + "step": 44815 + }, + { + "epoch": 0.04, + "learning_rate": 4.975367525514095e-05, + "loss": 0.3994, + "step": 44816 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753664255848917e-05, + "loss": 0.3802, + "step": 44817 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753653256312525e-05, + "loss": 0.2543, + "step": 44818 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753642256531774e-05, + "loss": 0.2238, + "step": 44819 + }, + { + "epoch": 0.04, + "learning_rate": 4.975363125650667e-05, + "loss": 0.4314, + "step": 44820 + }, + { + "epoch": 0.04, + "learning_rate": 4.97536202562372e-05, + "loss": 0.1919, + "step": 44821 + }, + { + "epoch": 0.04, + "learning_rate": 4.975360925572338e-05, + "loss": 0.3393, + "step": 44822 + }, + { + "epoch": 0.04, + "learning_rate": 4.97535982549652e-05, + "loss": 0.1563, + "step": 44823 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753587253962665e-05, + "loss": 0.221, + "step": 44824 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753576252715775e-05, + "loss": 0.1512, + "step": 44825 + }, + { + "epoch": 0.04, + "learning_rate": 4.975356525122452e-05, + "loss": 0.1061, + "step": 44826 + }, + { + "epoch": 0.04, + "learning_rate": 4.975355424948892e-05, + "loss": 0.1789, + "step": 44827 + }, + { + "epoch": 0.04, + "learning_rate": 4.975354324750896e-05, + "loss": 0.2461, + "step": 44828 + }, + { + "epoch": 0.04, + "learning_rate": 4.975353224528463e-05, + "loss": 0.1733, + "step": 44829 + }, + { + "epoch": 0.04, + "learning_rate": 4.975352124281596e-05, + "loss": 0.1316, + "step": 44830 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753510240102925e-05, + "loss": 0.147, + "step": 44831 + }, + { + "epoch": 0.04, + "learning_rate": 4.975349923714554e-05, + "loss": 0.9518, + "step": 44832 + }, + { + "epoch": 0.04, + "learning_rate": 4.975348823394379e-05, + "loss": 0.8017, + "step": 44833 + }, + { + "epoch": 0.04, + "learning_rate": 4.975347723049769e-05, + "loss": 0.9334, + "step": 44834 + }, + { + "epoch": 0.04, + "learning_rate": 4.975346622680723e-05, + "loss": 1.1929, + "step": 44835 + }, + { + "epoch": 0.04, + "learning_rate": 4.975345522287243e-05, + "loss": 1.3007, + "step": 44836 + }, + { + "epoch": 0.04, + "learning_rate": 4.975344421869325e-05, + "loss": 1.0461, + "step": 44837 + }, + { + "epoch": 0.04, + "learning_rate": 4.975343321426973e-05, + "loss": 1.2757, + "step": 44838 + }, + { + "epoch": 0.04, + "learning_rate": 4.975342220960185e-05, + "loss": 1.7081, + "step": 44839 + }, + { + "epoch": 0.04, + "learning_rate": 4.975341120468961e-05, + "loss": 1.1015, + "step": 44840 + }, + { + "epoch": 0.04, + "learning_rate": 4.975340019953302e-05, + "loss": 0.6531, + "step": 44841 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753389194132085e-05, + "loss": 1.0066, + "step": 44842 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753378188486776e-05, + "loss": 0.8918, + "step": 44843 + }, + { + "epoch": 0.04, + "learning_rate": 4.975336718259712e-05, + "loss": 0.7089, + "step": 44844 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753356176463116e-05, + "loss": 0.9216, + "step": 44845 + }, + { + "epoch": 0.04, + "learning_rate": 4.975334517008475e-05, + "loss": 0.8726, + "step": 44846 + }, + { + "epoch": 0.04, + "learning_rate": 4.975333416346203e-05, + "loss": 0.9996, + "step": 44847 + }, + { + "epoch": 0.04, + "learning_rate": 4.975332315659496e-05, + "loss": 1.2096, + "step": 44848 + }, + { + "epoch": 0.04, + "learning_rate": 4.975331214948353e-05, + "loss": 0.9338, + "step": 44849 + }, + { + "epoch": 0.04, + "learning_rate": 4.975330114212775e-05, + "loss": 0.9201, + "step": 44850 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753290134527605e-05, + "loss": 1.0328, + "step": 44851 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753279126683114e-05, + "loss": 0.8648, + "step": 44852 + }, + { + "epoch": 0.04, + "learning_rate": 4.975326811859428e-05, + "loss": 1.0368, + "step": 44853 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753257110261077e-05, + "loss": 0.9456, + "step": 44854 + }, + { + "epoch": 0.04, + "learning_rate": 4.975324610168352e-05, + "loss": 0.931, + "step": 44855 + }, + { + "epoch": 0.04, + "learning_rate": 4.975323509286162e-05, + "loss": 1.2036, + "step": 44856 + }, + { + "epoch": 0.04, + "learning_rate": 4.975322408379536e-05, + "loss": 0.9429, + "step": 44857 + }, + { + "epoch": 0.04, + "learning_rate": 4.975321307448475e-05, + "loss": 1.3163, + "step": 44858 + }, + { + "epoch": 0.04, + "learning_rate": 4.975320206492978e-05, + "loss": 1.0306, + "step": 44859 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753191055130466e-05, + "loss": 1.5476, + "step": 44860 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753180045086786e-05, + "loss": 1.3502, + "step": 44861 + }, + { + "epoch": 0.04, + "learning_rate": 4.975316903479876e-05, + "loss": 0.6222, + "step": 44862 + }, + { + "epoch": 0.04, + "learning_rate": 4.975315802426639e-05, + "loss": 1.1246, + "step": 44863 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753147013489655e-05, + "loss": 1.2325, + "step": 44864 + }, + { + "epoch": 0.04, + "learning_rate": 4.975313600246857e-05, + "loss": 0.4396, + "step": 44865 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753124991203134e-05, + "loss": 0.2761, + "step": 44866 + }, + { + "epoch": 0.04, + "learning_rate": 4.975311397969335e-05, + "loss": 0.3198, + "step": 44867 + }, + { + "epoch": 0.04, + "learning_rate": 4.975310296793921e-05, + "loss": 0.2452, + "step": 44868 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753091955940715e-05, + "loss": 0.1967, + "step": 44869 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753080943697874e-05, + "loss": 0.2111, + "step": 44870 + }, + { + "epoch": 0.04, + "learning_rate": 4.975306993121067e-05, + "loss": 0.2684, + "step": 44871 + }, + { + "epoch": 0.04, + "learning_rate": 4.975305891847912e-05, + "loss": 0.0639, + "step": 44872 + }, + { + "epoch": 0.04, + "learning_rate": 4.975304790550322e-05, + "loss": 0.131, + "step": 44873 + }, + { + "epoch": 0.04, + "learning_rate": 4.975303689228298e-05, + "loss": 0.0674, + "step": 44874 + }, + { + "epoch": 0.04, + "learning_rate": 4.975302587881837e-05, + "loss": 0.0422, + "step": 44875 + }, + { + "epoch": 0.04, + "learning_rate": 4.975301486510942e-05, + "loss": 0.0354, + "step": 44876 + }, + { + "epoch": 0.04, + "learning_rate": 4.975300385115611e-05, + "loss": 0.0319, + "step": 44877 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752992836958454e-05, + "loss": 0.0295, + "step": 44878 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752981822516443e-05, + "loss": 0.0281, + "step": 44879 + }, + { + "epoch": 0.04, + "learning_rate": 4.975297080783008e-05, + "loss": 0.1183, + "step": 44880 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752959792899374e-05, + "loss": 0.8379, + "step": 44881 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752948777724314e-05, + "loss": 1.347, + "step": 44882 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752937762304896e-05, + "loss": 1.108, + "step": 44883 + }, + { + "epoch": 0.04, + "learning_rate": 4.975292674664114e-05, + "loss": 1.0976, + "step": 44884 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752915730733023e-05, + "loss": 0.9036, + "step": 44885 + }, + { + "epoch": 0.04, + "learning_rate": 4.975290471458056e-05, + "loss": 1.0365, + "step": 44886 + }, + { + "epoch": 0.04, + "learning_rate": 4.975289369818374e-05, + "loss": 1.1883, + "step": 44887 + }, + { + "epoch": 0.04, + "learning_rate": 4.975288268154258e-05, + "loss": 0.7084, + "step": 44888 + }, + { + "epoch": 0.04, + "learning_rate": 4.975287166465706e-05, + "loss": 1.2789, + "step": 44889 + }, + { + "epoch": 0.04, + "learning_rate": 4.97528606475272e-05, + "loss": 1.1134, + "step": 44890 + }, + { + "epoch": 0.04, + "learning_rate": 4.975284963015299e-05, + "loss": 1.2318, + "step": 44891 + }, + { + "epoch": 0.04, + "learning_rate": 4.975283861253442e-05, + "loss": 0.9731, + "step": 44892 + }, + { + "epoch": 0.04, + "learning_rate": 4.975282759467151e-05, + "loss": 0.9577, + "step": 44893 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752816576564254e-05, + "loss": 0.9122, + "step": 44894 + }, + { + "epoch": 0.04, + "learning_rate": 4.975280555821264e-05, + "loss": 1.0336, + "step": 44895 + }, + { + "epoch": 0.04, + "learning_rate": 4.975279453961668e-05, + "loss": 0.9913, + "step": 44896 + }, + { + "epoch": 0.04, + "learning_rate": 4.975278352077637e-05, + "loss": 1.3524, + "step": 44897 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752772501691705e-05, + "loss": 1.3725, + "step": 44898 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752761482362694e-05, + "loss": 1.0489, + "step": 44899 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752750462789345e-05, + "loss": 1.1152, + "step": 44900 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752739442971644e-05, + "loss": 0.7187, + "step": 44901 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752728422909583e-05, + "loss": 0.6874, + "step": 44902 + }, + { + "epoch": 0.04, + "learning_rate": 4.975271740260318e-05, + "loss": 1.1892, + "step": 44903 + }, + { + "epoch": 0.04, + "learning_rate": 4.975270638205243e-05, + "loss": 0.9221, + "step": 44904 + }, + { + "epoch": 0.04, + "learning_rate": 4.975269536125733e-05, + "loss": 1.0483, + "step": 44905 + }, + { + "epoch": 0.04, + "learning_rate": 4.975268434021788e-05, + "loss": 0.8381, + "step": 44906 + }, + { + "epoch": 0.04, + "learning_rate": 4.975267331893408e-05, + "loss": 1.1118, + "step": 44907 + }, + { + "epoch": 0.04, + "learning_rate": 4.975266229740594e-05, + "loss": 1.1981, + "step": 44908 + }, + { + "epoch": 0.04, + "learning_rate": 4.975265127563345e-05, + "loss": 1.2222, + "step": 44909 + }, + { + "epoch": 0.04, + "learning_rate": 4.975264025361661e-05, + "loss": 1.1773, + "step": 44910 + }, + { + "epoch": 0.04, + "learning_rate": 4.975262923135542e-05, + "loss": 0.7475, + "step": 44911 + }, + { + "epoch": 0.04, + "learning_rate": 4.975261820884989e-05, + "loss": 0.9007, + "step": 44912 + }, + { + "epoch": 0.04, + "learning_rate": 4.975260718610001e-05, + "loss": 0.8683, + "step": 44913 + }, + { + "epoch": 0.04, + "learning_rate": 4.975259616310578e-05, + "loss": 0.9892, + "step": 44914 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752585139867204e-05, + "loss": 0.8248, + "step": 44915 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752574116384276e-05, + "loss": 1.229, + "step": 44916 + }, + { + "epoch": 0.04, + "learning_rate": 4.975256309265701e-05, + "loss": 1.053, + "step": 44917 + }, + { + "epoch": 0.04, + "learning_rate": 4.975255206868539e-05, + "loss": 1.034, + "step": 44918 + }, + { + "epoch": 0.04, + "learning_rate": 4.975254104446943e-05, + "loss": 1.1204, + "step": 44919 + }, + { + "epoch": 0.04, + "learning_rate": 4.975253002000911e-05, + "loss": 1.2639, + "step": 44920 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752518995304456e-05, + "loss": 1.0992, + "step": 44921 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752507970355457e-05, + "loss": 1.2524, + "step": 44922 + }, + { + "epoch": 0.04, + "learning_rate": 4.975249694516211e-05, + "loss": 0.9764, + "step": 44923 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752485919724414e-05, + "loss": 1.0095, + "step": 44924 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752474894042365e-05, + "loss": 1.0224, + "step": 44925 + }, + { + "epoch": 0.04, + "learning_rate": 4.975246386811598e-05, + "loss": 1.0915, + "step": 44926 + }, + { + "epoch": 0.04, + "learning_rate": 4.975245284194524e-05, + "loss": 0.4114, + "step": 44927 + }, + { + "epoch": 0.04, + "learning_rate": 4.975244181553016e-05, + "loss": 0.3242, + "step": 44928 + }, + { + "epoch": 0.04, + "learning_rate": 4.975243078887074e-05, + "loss": 0.336, + "step": 44929 + }, + { + "epoch": 0.04, + "learning_rate": 4.975241976196696e-05, + "loss": 1.1962, + "step": 44930 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752408734818846e-05, + "loss": 1.3061, + "step": 44931 + }, + { + "epoch": 0.04, + "learning_rate": 4.975239770742639e-05, + "loss": 1.0549, + "step": 44932 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752386679789575e-05, + "loss": 0.7283, + "step": 44933 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752375651908425e-05, + "loss": 1.086, + "step": 44934 + }, + { + "epoch": 0.04, + "learning_rate": 4.975236462378293e-05, + "loss": 0.7938, + "step": 44935 + }, + { + "epoch": 0.04, + "learning_rate": 4.975235359541309e-05, + "loss": 1.2172, + "step": 44936 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752342566798896e-05, + "loss": 1.3086, + "step": 44937 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752331537940365e-05, + "loss": 1.3057, + "step": 44938 + }, + { + "epoch": 0.04, + "learning_rate": 4.975232050883749e-05, + "loss": 0.9294, + "step": 44939 + }, + { + "epoch": 0.04, + "learning_rate": 4.975230947949027e-05, + "loss": 0.8175, + "step": 44940 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752298449898706e-05, + "loss": 0.9415, + "step": 44941 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752287420062794e-05, + "loss": 0.9386, + "step": 44942 + }, + { + "epoch": 0.04, + "learning_rate": 4.975227638998254e-05, + "loss": 1.0469, + "step": 44943 + }, + { + "epoch": 0.04, + "learning_rate": 4.975226535965795e-05, + "loss": 0.7103, + "step": 44944 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752254329089e-05, + "loss": 1.2237, + "step": 44945 + }, + { + "epoch": 0.04, + "learning_rate": 4.975224329827571e-05, + "loss": 1.387, + "step": 44946 + }, + { + "epoch": 0.04, + "learning_rate": 4.975223226721809e-05, + "loss": 1.1254, + "step": 44947 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752221235916116e-05, + "loss": 2.2726, + "step": 44948 + }, + { + "epoch": 0.04, + "learning_rate": 4.97522102043698e-05, + "loss": 1.0963, + "step": 44949 + }, + { + "epoch": 0.04, + "learning_rate": 4.975219917257914e-05, + "loss": 1.7169, + "step": 44950 + }, + { + "epoch": 0.04, + "learning_rate": 4.975218814054413e-05, + "loss": 1.1791, + "step": 44951 + }, + { + "epoch": 0.04, + "learning_rate": 4.975217710826479e-05, + "loss": 1.0217, + "step": 44952 + }, + { + "epoch": 0.04, + "learning_rate": 4.97521660757411e-05, + "loss": 0.8002, + "step": 44953 + }, + { + "epoch": 0.04, + "learning_rate": 4.975215504297306e-05, + "loss": 1.1321, + "step": 44954 + }, + { + "epoch": 0.04, + "learning_rate": 4.975214400996069e-05, + "loss": 1.0201, + "step": 44955 + }, + { + "epoch": 0.04, + "learning_rate": 4.975213297670397e-05, + "loss": 1.0695, + "step": 44956 + }, + { + "epoch": 0.04, + "learning_rate": 4.975212194320291e-05, + "loss": 0.9391, + "step": 44957 + }, + { + "epoch": 0.04, + "learning_rate": 4.975211090945751e-05, + "loss": 0.9929, + "step": 44958 + }, + { + "epoch": 0.04, + "learning_rate": 4.975209987546776e-05, + "loss": 1.1957, + "step": 44959 + }, + { + "epoch": 0.04, + "learning_rate": 4.975208884123368e-05, + "loss": 1.2957, + "step": 44960 + }, + { + "epoch": 0.04, + "learning_rate": 4.975207780675525e-05, + "loss": 1.237, + "step": 44961 + }, + { + "epoch": 0.04, + "learning_rate": 4.975206677203248e-05, + "loss": 1.0741, + "step": 44962 + }, + { + "epoch": 0.04, + "learning_rate": 4.975205573706536e-05, + "loss": 1.1337, + "step": 44963 + }, + { + "epoch": 0.04, + "learning_rate": 4.975204470185391e-05, + "loss": 1.3126, + "step": 44964 + }, + { + "epoch": 0.04, + "learning_rate": 4.975203366639811e-05, + "loss": 1.1745, + "step": 44965 + }, + { + "epoch": 0.04, + "learning_rate": 4.975202263069797e-05, + "loss": 1.18, + "step": 44966 + }, + { + "epoch": 0.04, + "learning_rate": 4.975201159475349e-05, + "loss": 1.1576, + "step": 44967 + }, + { + "epoch": 0.04, + "learning_rate": 4.975200055856466e-05, + "loss": 1.2912, + "step": 44968 + }, + { + "epoch": 0.04, + "learning_rate": 4.97519895221315e-05, + "loss": 0.8619, + "step": 44969 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751978485453997e-05, + "loss": 0.9547, + "step": 44970 + }, + { + "epoch": 0.04, + "learning_rate": 4.975196744853215e-05, + "loss": 0.7138, + "step": 44971 + }, + { + "epoch": 0.04, + "learning_rate": 4.975195641136596e-05, + "loss": 0.3376, + "step": 44972 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751945373955434e-05, + "loss": 1.2377, + "step": 44973 + }, + { + "epoch": 0.04, + "learning_rate": 4.975193433630057e-05, + "loss": 1.1007, + "step": 44974 + }, + { + "epoch": 0.04, + "learning_rate": 4.975192329840136e-05, + "loss": 0.9587, + "step": 44975 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751912260257805e-05, + "loss": 0.8838, + "step": 44976 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751901221869916e-05, + "loss": 1.0732, + "step": 44977 + }, + { + "epoch": 0.04, + "learning_rate": 4.975189018323769e-05, + "loss": 1.1389, + "step": 44978 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751879144361116e-05, + "loss": 0.9199, + "step": 44979 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751868105240204e-05, + "loss": 0.6879, + "step": 44980 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751857065874955e-05, + "loss": 1.0365, + "step": 44981 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751846026265366e-05, + "loss": 0.9801, + "step": 44982 + }, + { + "epoch": 0.04, + "learning_rate": 4.975183498641143e-05, + "loss": 1.0043, + "step": 44983 + }, + { + "epoch": 0.04, + "learning_rate": 4.975182394631316e-05, + "loss": 0.9838, + "step": 44984 + }, + { + "epoch": 0.04, + "learning_rate": 4.975181290597055e-05, + "loss": 1.1512, + "step": 44985 + }, + { + "epoch": 0.04, + "learning_rate": 4.975180186538361e-05, + "loss": 0.927, + "step": 44986 + }, + { + "epoch": 0.04, + "learning_rate": 4.975179082455231e-05, + "loss": 0.7608, + "step": 44987 + }, + { + "epoch": 0.04, + "learning_rate": 4.975177978347669e-05, + "loss": 0.9134, + "step": 44988 + }, + { + "epoch": 0.04, + "learning_rate": 4.975176874215672e-05, + "loss": 0.999, + "step": 44989 + }, + { + "epoch": 0.04, + "learning_rate": 4.975175770059242e-05, + "loss": 0.9714, + "step": 44990 + }, + { + "epoch": 0.04, + "learning_rate": 4.975174665878377e-05, + "loss": 0.9053, + "step": 44991 + }, + { + "epoch": 0.04, + "learning_rate": 4.975173561673079e-05, + "loss": 1.0055, + "step": 44992 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751724574433464e-05, + "loss": 1.0004, + "step": 44993 + }, + { + "epoch": 0.04, + "learning_rate": 4.97517135318918e-05, + "loss": 1.3202, + "step": 44994 + }, + { + "epoch": 0.04, + "learning_rate": 4.97517024891058e-05, + "loss": 1.0582, + "step": 44995 + }, + { + "epoch": 0.04, + "learning_rate": 4.9751691446075464e-05, + "loss": 0.9525, + "step": 44996 + }, + { + "epoch": 0.04, + "learning_rate": 4.975168040280079e-05, + "loss": 1.1146, + "step": 44997 + }, + { + "epoch": 0.04, + "learning_rate": 4.975166935928177e-05, + "loss": 1.0947, + "step": 44998 + }, + { + "epoch": 0.04, + "learning_rate": 4.975165831551842e-05, + "loss": 1.2053, + "step": 44999 + }, + { + "epoch": 0.04, + "learning_rate": 4.975164727151073e-05, + "loss": 1.2435, + "step": 45000 + }, + { + "epoch": 0.04, + "eval_loss": 1.081416368484497, + "eval_runtime": 126.1782, + "eval_samples_per_second": 10.977, + "eval_steps_per_second": 5.492, + "step": 45000 + }, + { + "epoch": 0.05, + "learning_rate": 4.97516362272587e-05, + "loss": 1.1749, + "step": 45001 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751625182762335e-05, + "loss": 1.0522, + "step": 45002 + }, + { + "epoch": 0.05, + "learning_rate": 4.975161413802163e-05, + "loss": 1.0559, + "step": 45003 + }, + { + "epoch": 0.05, + "learning_rate": 4.975160309303659e-05, + "loss": 1.0992, + "step": 45004 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751592047807215e-05, + "loss": 0.8538, + "step": 45005 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751581002333496e-05, + "loss": 0.8421, + "step": 45006 + }, + { + "epoch": 0.05, + "learning_rate": 4.975156995661544e-05, + "loss": 0.9507, + "step": 45007 + }, + { + "epoch": 0.05, + "learning_rate": 4.975155891065305e-05, + "loss": 1.1403, + "step": 45008 + }, + { + "epoch": 0.05, + "learning_rate": 4.975154786444633e-05, + "loss": 1.4794, + "step": 45009 + }, + { + "epoch": 0.05, + "learning_rate": 4.975153681799526e-05, + "loss": 1.3929, + "step": 45010 + }, + { + "epoch": 0.05, + "learning_rate": 4.975152577129986e-05, + "loss": 1.094, + "step": 45011 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751514724360125e-05, + "loss": 1.2292, + "step": 45012 + }, + { + "epoch": 0.05, + "learning_rate": 4.975150367717605e-05, + "loss": 1.195, + "step": 45013 + }, + { + "epoch": 0.05, + "learning_rate": 4.975149262974764e-05, + "loss": 1.1291, + "step": 45014 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751481582074896e-05, + "loss": 1.0864, + "step": 45015 + }, + { + "epoch": 0.05, + "learning_rate": 4.975147053415781e-05, + "loss": 1.0364, + "step": 45016 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751459485996396e-05, + "loss": 1.1367, + "step": 45017 + }, + { + "epoch": 0.05, + "learning_rate": 4.975144843759064e-05, + "loss": 1.121, + "step": 45018 + }, + { + "epoch": 0.05, + "learning_rate": 4.975143738894055e-05, + "loss": 1.7532, + "step": 45019 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751426340046124e-05, + "loss": 1.5382, + "step": 45020 + }, + { + "epoch": 0.05, + "learning_rate": 4.975141529090736e-05, + "loss": 0.8824, + "step": 45021 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751404241524266e-05, + "loss": 1.102, + "step": 45022 + }, + { + "epoch": 0.05, + "learning_rate": 4.975139319189683e-05, + "loss": 1.2102, + "step": 45023 + }, + { + "epoch": 0.05, + "learning_rate": 4.975138214202506e-05, + "loss": 1.0549, + "step": 45024 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751371091908963e-05, + "loss": 1.3539, + "step": 45025 + }, + { + "epoch": 0.05, + "learning_rate": 4.975136004154852e-05, + "loss": 0.9218, + "step": 45026 + }, + { + "epoch": 0.05, + "learning_rate": 4.975134899094375e-05, + "loss": 1.1068, + "step": 45027 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751337940094635e-05, + "loss": 1.1768, + "step": 45028 + }, + { + "epoch": 0.05, + "learning_rate": 4.975132688900119e-05, + "loss": 0.3547, + "step": 45029 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751315837663415e-05, + "loss": 0.0718, + "step": 45030 + }, + { + "epoch": 0.05, + "learning_rate": 4.975130478608131e-05, + "loss": 0.0571, + "step": 45031 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751293734254854e-05, + "loss": 0.0496, + "step": 45032 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751282682184076e-05, + "loss": 0.0491, + "step": 45033 + }, + { + "epoch": 0.05, + "learning_rate": 4.975127162986896e-05, + "loss": 0.0412, + "step": 45034 + }, + { + "epoch": 0.05, + "learning_rate": 4.975126057730951e-05, + "loss": 0.0415, + "step": 45035 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751249524505725e-05, + "loss": 0.0441, + "step": 45036 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751238471457606e-05, + "loss": 0.0354, + "step": 45037 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751227418165156e-05, + "loss": 0.0353, + "step": 45038 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751216364628374e-05, + "loss": 0.0445, + "step": 45039 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751205310847254e-05, + "loss": 0.0363, + "step": 45040 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751194256821795e-05, + "loss": 0.0379, + "step": 45041 + }, + { + "epoch": 0.05, + "learning_rate": 4.975118320255202e-05, + "loss": 0.0335, + "step": 45042 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751172148037895e-05, + "loss": 0.0371, + "step": 45043 + }, + { + "epoch": 0.05, + "learning_rate": 4.975116109327945e-05, + "loss": 0.0331, + "step": 45044 + }, + { + "epoch": 0.05, + "learning_rate": 4.975115003827666e-05, + "loss": 0.0304, + "step": 45045 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751138983029545e-05, + "loss": 0.035, + "step": 45046 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751127927538095e-05, + "loss": 0.0296, + "step": 45047 + }, + { + "epoch": 0.05, + "learning_rate": 4.975111687180231e-05, + "loss": 0.03, + "step": 45048 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751105815822195e-05, + "loss": 0.0296, + "step": 45049 + }, + { + "epoch": 0.05, + "learning_rate": 4.975109475959774e-05, + "loss": 0.0393, + "step": 45050 + }, + { + "epoch": 0.05, + "learning_rate": 4.975108370312897e-05, + "loss": 0.0372, + "step": 45051 + }, + { + "epoch": 0.05, + "learning_rate": 4.975107264641585e-05, + "loss": 0.0404, + "step": 45052 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751061589458406e-05, + "loss": 0.0326, + "step": 45053 + }, + { + "epoch": 0.05, + "learning_rate": 4.975105053225663e-05, + "loss": 0.0371, + "step": 45054 + }, + { + "epoch": 0.05, + "learning_rate": 4.9751039474810524e-05, + "loss": 0.0307, + "step": 45055 + }, + { + "epoch": 0.05, + "learning_rate": 4.975102841712008e-05, + "loss": 0.035, + "step": 45056 + }, + { + "epoch": 0.05, + "learning_rate": 4.97510173591853e-05, + "loss": 0.0345, + "step": 45057 + }, + { + "epoch": 0.05, + "learning_rate": 4.97510063010062e-05, + "loss": 0.0341, + "step": 45058 + }, + { + "epoch": 0.05, + "learning_rate": 4.975099524258277e-05, + "loss": 0.0309, + "step": 45059 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750984183915e-05, + "loss": 0.0272, + "step": 45060 + }, + { + "epoch": 0.05, + "learning_rate": 4.97509731250029e-05, + "loss": 0.0304, + "step": 45061 + }, + { + "epoch": 0.05, + "learning_rate": 4.975096206584647e-05, + "loss": 0.0251, + "step": 45062 + }, + { + "epoch": 0.05, + "learning_rate": 4.975095100644571e-05, + "loss": 0.0271, + "step": 45063 + }, + { + "epoch": 0.05, + "learning_rate": 4.975093994680062e-05, + "loss": 0.0302, + "step": 45064 + }, + { + "epoch": 0.05, + "learning_rate": 4.97509288869112e-05, + "loss": 0.642, + "step": 45065 + }, + { + "epoch": 0.05, + "learning_rate": 4.975091782677745e-05, + "loss": 0.9271, + "step": 45066 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750906766399356e-05, + "loss": 1.6864, + "step": 45067 + }, + { + "epoch": 0.05, + "learning_rate": 4.975089570577695e-05, + "loss": 1.8238, + "step": 45068 + }, + { + "epoch": 0.05, + "learning_rate": 4.97508846449102e-05, + "loss": 1.7354, + "step": 45069 + }, + { + "epoch": 0.05, + "learning_rate": 4.975087358379913e-05, + "loss": 1.7124, + "step": 45070 + }, + { + "epoch": 0.05, + "learning_rate": 4.975086252244372e-05, + "loss": 1.7294, + "step": 45071 + }, + { + "epoch": 0.05, + "learning_rate": 4.975085146084399e-05, + "loss": 1.7527, + "step": 45072 + }, + { + "epoch": 0.05, + "learning_rate": 4.975084039899991e-05, + "loss": 1.6492, + "step": 45073 + }, + { + "epoch": 0.05, + "learning_rate": 4.975082933691152e-05, + "loss": 1.0768, + "step": 45074 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750818274578795e-05, + "loss": 0.8626, + "step": 45075 + }, + { + "epoch": 0.05, + "learning_rate": 4.975080721200174e-05, + "loss": 1.0625, + "step": 45076 + }, + { + "epoch": 0.05, + "learning_rate": 4.975079614918036e-05, + "loss": 1.4153, + "step": 45077 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750785086114645e-05, + "loss": 1.0602, + "step": 45078 + }, + { + "epoch": 0.05, + "learning_rate": 4.97507740228046e-05, + "loss": 1.2477, + "step": 45079 + }, + { + "epoch": 0.05, + "learning_rate": 4.975076295925023e-05, + "loss": 1.1645, + "step": 45080 + }, + { + "epoch": 0.05, + "learning_rate": 4.975075189545153e-05, + "loss": 1.1143, + "step": 45081 + }, + { + "epoch": 0.05, + "learning_rate": 4.975074083140849e-05, + "loss": 1.4485, + "step": 45082 + }, + { + "epoch": 0.05, + "learning_rate": 4.975072976712114e-05, + "loss": 1.6532, + "step": 45083 + }, + { + "epoch": 0.05, + "learning_rate": 4.975071870258945e-05, + "loss": 1.0953, + "step": 45084 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750707637813435e-05, + "loss": 1.0015, + "step": 45085 + }, + { + "epoch": 0.05, + "learning_rate": 4.975069657279309e-05, + "loss": 0.8919, + "step": 45086 + }, + { + "epoch": 0.05, + "learning_rate": 4.975068550752842e-05, + "loss": 1.2019, + "step": 45087 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750674442019414e-05, + "loss": 1.0006, + "step": 45088 + }, + { + "epoch": 0.05, + "learning_rate": 4.975066337626608e-05, + "loss": 1.2085, + "step": 45089 + }, + { + "epoch": 0.05, + "learning_rate": 4.975065231026842e-05, + "loss": 0.9528, + "step": 45090 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750641244026434e-05, + "loss": 1.0163, + "step": 45091 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750630177540125e-05, + "loss": 0.9069, + "step": 45092 + }, + { + "epoch": 0.05, + "learning_rate": 4.975061911080948e-05, + "loss": 1.2189, + "step": 45093 + }, + { + "epoch": 0.05, + "learning_rate": 4.975060804383451e-05, + "loss": 0.6215, + "step": 45094 + }, + { + "epoch": 0.05, + "learning_rate": 4.975059697661522e-05, + "loss": 0.4131, + "step": 45095 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750585909151596e-05, + "loss": 0.3299, + "step": 45096 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750574841443634e-05, + "loss": 0.3197, + "step": 45097 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750563773491354e-05, + "loss": 0.4498, + "step": 45098 + }, + { + "epoch": 0.05, + "learning_rate": 4.975055270529475e-05, + "loss": 0.4816, + "step": 45099 + }, + { + "epoch": 0.05, + "learning_rate": 4.975054163685382e-05, + "loss": 0.5327, + "step": 45100 + }, + { + "epoch": 0.05, + "learning_rate": 4.975053056816856e-05, + "loss": 1.0949, + "step": 45101 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750519499238965e-05, + "loss": 1.3901, + "step": 45102 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750508430065054e-05, + "loss": 0.9668, + "step": 45103 + }, + { + "epoch": 0.05, + "learning_rate": 4.975049736064681e-05, + "loss": 0.9951, + "step": 45104 + }, + { + "epoch": 0.05, + "learning_rate": 4.975048629098424e-05, + "loss": 0.8842, + "step": 45105 + }, + { + "epoch": 0.05, + "learning_rate": 4.975047522107735e-05, + "loss": 1.0635, + "step": 45106 + }, + { + "epoch": 0.05, + "learning_rate": 4.975046415092613e-05, + "loss": 0.9109, + "step": 45107 + }, + { + "epoch": 0.05, + "learning_rate": 4.975045308053059e-05, + "loss": 0.9959, + "step": 45108 + }, + { + "epoch": 0.05, + "learning_rate": 4.975044200989072e-05, + "loss": 1.1767, + "step": 45109 + }, + { + "epoch": 0.05, + "learning_rate": 4.975043093900652e-05, + "loss": 1.2173, + "step": 45110 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750419867878e-05, + "loss": 1.074, + "step": 45111 + }, + { + "epoch": 0.05, + "learning_rate": 4.975040879650514e-05, + "loss": 1.2368, + "step": 45112 + }, + { + "epoch": 0.05, + "learning_rate": 4.975039772488797e-05, + "loss": 1.2961, + "step": 45113 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750386653026474e-05, + "loss": 1.1593, + "step": 45114 + }, + { + "epoch": 0.05, + "learning_rate": 4.975037558092064e-05, + "loss": 0.8731, + "step": 45115 + }, + { + "epoch": 0.05, + "learning_rate": 4.975036450857049e-05, + "loss": 1.2964, + "step": 45116 + }, + { + "epoch": 0.05, + "learning_rate": 4.975035343597602e-05, + "loss": 0.7613, + "step": 45117 + }, + { + "epoch": 0.05, + "learning_rate": 4.975034236313721e-05, + "loss": 1.2456, + "step": 45118 + }, + { + "epoch": 0.05, + "learning_rate": 4.975033129005409e-05, + "loss": 1.1556, + "step": 45119 + }, + { + "epoch": 0.05, + "learning_rate": 4.975032021672664e-05, + "loss": 1.1195, + "step": 45120 + }, + { + "epoch": 0.05, + "learning_rate": 4.975030914315486e-05, + "loss": 0.9922, + "step": 45121 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750298069338766e-05, + "loss": 1.1315, + "step": 45122 + }, + { + "epoch": 0.05, + "learning_rate": 4.975028699527834e-05, + "loss": 1.2117, + "step": 45123 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750275920973586e-05, + "loss": 1.0989, + "step": 45124 + }, + { + "epoch": 0.05, + "learning_rate": 4.975026484642451e-05, + "loss": 0.8537, + "step": 45125 + }, + { + "epoch": 0.05, + "learning_rate": 4.975025377163112e-05, + "loss": 0.9661, + "step": 45126 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750242696593396e-05, + "loss": 0.9866, + "step": 45127 + }, + { + "epoch": 0.05, + "learning_rate": 4.975023162131135e-05, + "loss": 0.8868, + "step": 45128 + }, + { + "epoch": 0.05, + "learning_rate": 4.975022054578497e-05, + "loss": 0.7089, + "step": 45129 + }, + { + "epoch": 0.05, + "learning_rate": 4.975020947001429e-05, + "loss": 1.0286, + "step": 45130 + }, + { + "epoch": 0.05, + "learning_rate": 4.975019839399927e-05, + "loss": 1.2002, + "step": 45131 + }, + { + "epoch": 0.05, + "learning_rate": 4.975018731773993e-05, + "loss": 1.2059, + "step": 45132 + }, + { + "epoch": 0.05, + "learning_rate": 4.975017624123626e-05, + "loss": 1.1255, + "step": 45133 + }, + { + "epoch": 0.05, + "learning_rate": 4.975016516448827e-05, + "loss": 1.1136, + "step": 45134 + }, + { + "epoch": 0.05, + "learning_rate": 4.975015408749597e-05, + "loss": 0.7031, + "step": 45135 + }, + { + "epoch": 0.05, + "learning_rate": 4.975014301025933e-05, + "loss": 1.0829, + "step": 45136 + }, + { + "epoch": 0.05, + "learning_rate": 4.975013193277838e-05, + "loss": 0.9216, + "step": 45137 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750120855053094e-05, + "loss": 1.1699, + "step": 45138 + }, + { + "epoch": 0.05, + "learning_rate": 4.975010977708349e-05, + "loss": 1.1037, + "step": 45139 + }, + { + "epoch": 0.05, + "learning_rate": 4.975009869886957e-05, + "loss": 1.0175, + "step": 45140 + }, + { + "epoch": 0.05, + "learning_rate": 4.975008762041132e-05, + "loss": 1.1403, + "step": 45141 + }, + { + "epoch": 0.05, + "learning_rate": 4.975007654170875e-05, + "loss": 1.0154, + "step": 45142 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750065462761856e-05, + "loss": 0.9215, + "step": 45143 + }, + { + "epoch": 0.05, + "learning_rate": 4.975005438357064e-05, + "loss": 1.1156, + "step": 45144 + }, + { + "epoch": 0.05, + "learning_rate": 4.975004330413511e-05, + "loss": 0.9766, + "step": 45145 + }, + { + "epoch": 0.05, + "learning_rate": 4.9750032224455244e-05, + "loss": 0.9748, + "step": 45146 + }, + { + "epoch": 0.05, + "learning_rate": 4.975002114453107e-05, + "loss": 0.9899, + "step": 45147 + }, + { + "epoch": 0.05, + "learning_rate": 4.975001006436256e-05, + "loss": 0.7922, + "step": 45148 + }, + { + "epoch": 0.05, + "learning_rate": 4.974999898394974e-05, + "loss": 0.853, + "step": 45149 + }, + { + "epoch": 0.05, + "learning_rate": 4.974998790329259e-05, + "loss": 1.2391, + "step": 45150 + }, + { + "epoch": 0.05, + "learning_rate": 4.974997682239113e-05, + "loss": 0.9765, + "step": 45151 + }, + { + "epoch": 0.05, + "learning_rate": 4.974996574124534e-05, + "loss": 0.8738, + "step": 45152 + }, + { + "epoch": 0.05, + "learning_rate": 4.974995465985523e-05, + "loss": 1.0319, + "step": 45153 + }, + { + "epoch": 0.05, + "learning_rate": 4.97499435782208e-05, + "loss": 0.9573, + "step": 45154 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749932496342046e-05, + "loss": 0.7157, + "step": 45155 + }, + { + "epoch": 0.05, + "learning_rate": 4.974992141421897e-05, + "loss": 0.9446, + "step": 45156 + }, + { + "epoch": 0.05, + "learning_rate": 4.974991033185158e-05, + "loss": 0.7535, + "step": 45157 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749899249239865e-05, + "loss": 1.3936, + "step": 45158 + }, + { + "epoch": 0.05, + "learning_rate": 4.974988816638384e-05, + "loss": 1.1573, + "step": 45159 + }, + { + "epoch": 0.05, + "learning_rate": 4.974987708328348e-05, + "loss": 1.1638, + "step": 45160 + }, + { + "epoch": 0.05, + "learning_rate": 4.974986599993881e-05, + "loss": 0.904, + "step": 45161 + }, + { + "epoch": 0.05, + "learning_rate": 4.974985491634981e-05, + "loss": 0.8174, + "step": 45162 + }, + { + "epoch": 0.05, + "learning_rate": 4.974984383251649e-05, + "loss": 0.9463, + "step": 45163 + }, + { + "epoch": 0.05, + "learning_rate": 4.974983274843885e-05, + "loss": 0.9484, + "step": 45164 + }, + { + "epoch": 0.05, + "learning_rate": 4.97498216641169e-05, + "loss": 1.1732, + "step": 45165 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749810579550625e-05, + "loss": 1.0148, + "step": 45166 + }, + { + "epoch": 0.05, + "learning_rate": 4.974979949474003e-05, + "loss": 0.8493, + "step": 45167 + }, + { + "epoch": 0.05, + "learning_rate": 4.974978840968511e-05, + "loss": 0.7276, + "step": 45168 + }, + { + "epoch": 0.05, + "learning_rate": 4.974977732438588e-05, + "loss": 1.064, + "step": 45169 + }, + { + "epoch": 0.05, + "learning_rate": 4.974976623884233e-05, + "loss": 0.899, + "step": 45170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749755153054455e-05, + "loss": 0.8799, + "step": 45171 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749744067022264e-05, + "loss": 1.1309, + "step": 45172 + }, + { + "epoch": 0.05, + "learning_rate": 4.974973298074575e-05, + "loss": 0.9082, + "step": 45173 + }, + { + "epoch": 0.05, + "learning_rate": 4.974972189422492e-05, + "loss": 0.9244, + "step": 45174 + }, + { + "epoch": 0.05, + "learning_rate": 4.974971080745977e-05, + "loss": 1.0575, + "step": 45175 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749699720450304e-05, + "loss": 1.1233, + "step": 45176 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749688633196515e-05, + "loss": 1.0123, + "step": 45177 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749677545698414e-05, + "loss": 1.1396, + "step": 45178 + }, + { + "epoch": 0.05, + "learning_rate": 4.974966645795599e-05, + "loss": 1.1146, + "step": 45179 + }, + { + "epoch": 0.05, + "learning_rate": 4.974965536996925e-05, + "loss": 0.9914, + "step": 45180 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749644281738184e-05, + "loss": 0.8558, + "step": 45181 + }, + { + "epoch": 0.05, + "learning_rate": 4.974963319326281e-05, + "loss": 1.0081, + "step": 45182 + }, + { + "epoch": 0.05, + "learning_rate": 4.974962210454311e-05, + "loss": 0.7723, + "step": 45183 + }, + { + "epoch": 0.05, + "learning_rate": 4.97496110155791e-05, + "loss": 0.9731, + "step": 45184 + }, + { + "epoch": 0.05, + "learning_rate": 4.974959992637077e-05, + "loss": 0.9544, + "step": 45185 + }, + { + "epoch": 0.05, + "learning_rate": 4.974958883691812e-05, + "loss": 1.1308, + "step": 45186 + }, + { + "epoch": 0.05, + "learning_rate": 4.974957774722115e-05, + "loss": 1.2003, + "step": 45187 + }, + { + "epoch": 0.05, + "learning_rate": 4.974956665727987e-05, + "loss": 1.31, + "step": 45188 + }, + { + "epoch": 0.05, + "learning_rate": 4.974955556709427e-05, + "loss": 0.9887, + "step": 45189 + }, + { + "epoch": 0.05, + "learning_rate": 4.974954447666435e-05, + "loss": 1.0726, + "step": 45190 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749533385990114e-05, + "loss": 0.9515, + "step": 45191 + }, + { + "epoch": 0.05, + "learning_rate": 4.974952229507156e-05, + "loss": 0.8425, + "step": 45192 + }, + { + "epoch": 0.05, + "learning_rate": 4.974951120390869e-05, + "loss": 0.616, + "step": 45193 + }, + { + "epoch": 0.05, + "learning_rate": 4.974950011250151e-05, + "loss": 1.0505, + "step": 45194 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749489020850006e-05, + "loss": 0.974, + "step": 45195 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749477928954185e-05, + "loss": 0.684, + "step": 45196 + }, + { + "epoch": 0.05, + "learning_rate": 4.974946683681405e-05, + "loss": 1.146, + "step": 45197 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749455744429595e-05, + "loss": 0.7992, + "step": 45198 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749444651800834e-05, + "loss": 1.2101, + "step": 45199 + }, + { + "epoch": 0.05, + "learning_rate": 4.974943355892775e-05, + "loss": 0.7671, + "step": 45200 + }, + { + "epoch": 0.05, + "learning_rate": 4.974942246581035e-05, + "loss": 0.5096, + "step": 45201 + }, + { + "epoch": 0.05, + "learning_rate": 4.974941137244863e-05, + "loss": 1.0877, + "step": 45202 + }, + { + "epoch": 0.05, + "learning_rate": 4.97494002788426e-05, + "loss": 1.2123, + "step": 45203 + }, + { + "epoch": 0.05, + "learning_rate": 4.974938918499226e-05, + "loss": 0.868, + "step": 45204 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749378090897594e-05, + "loss": 1.2787, + "step": 45205 + }, + { + "epoch": 0.05, + "learning_rate": 4.974936699655861e-05, + "loss": 1.2031, + "step": 45206 + }, + { + "epoch": 0.05, + "learning_rate": 4.974935590197531e-05, + "loss": 0.9604, + "step": 45207 + }, + { + "epoch": 0.05, + "learning_rate": 4.974934480714771e-05, + "loss": 1.0682, + "step": 45208 + }, + { + "epoch": 0.05, + "learning_rate": 4.974933371207578e-05, + "loss": 1.0775, + "step": 45209 + }, + { + "epoch": 0.05, + "learning_rate": 4.974932261675955e-05, + "loss": 0.9724, + "step": 45210 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749311521198995e-05, + "loss": 1.2723, + "step": 45211 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749300425394124e-05, + "loss": 1.0278, + "step": 45212 + }, + { + "epoch": 0.05, + "learning_rate": 4.974928932934494e-05, + "loss": 1.361, + "step": 45213 + }, + { + "epoch": 0.05, + "learning_rate": 4.974927823305145e-05, + "loss": 1.0255, + "step": 45214 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749267136513635e-05, + "loss": 0.9509, + "step": 45215 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749256039731505e-05, + "loss": 1.0657, + "step": 45216 + }, + { + "epoch": 0.05, + "learning_rate": 4.974924494270506e-05, + "loss": 1.1995, + "step": 45217 + }, + { + "epoch": 0.05, + "learning_rate": 4.974923384543431e-05, + "loss": 0.9356, + "step": 45218 + }, + { + "epoch": 0.05, + "learning_rate": 4.974922274791924e-05, + "loss": 1.0813, + "step": 45219 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749211650159856e-05, + "loss": 1.1885, + "step": 45220 + }, + { + "epoch": 0.05, + "learning_rate": 4.974920055215616e-05, + "loss": 1.0586, + "step": 45221 + }, + { + "epoch": 0.05, + "learning_rate": 4.974918945390815e-05, + "loss": 0.4824, + "step": 45222 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749178355415825e-05, + "loss": 0.5888, + "step": 45223 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749167256679184e-05, + "loss": 0.9159, + "step": 45224 + }, + { + "epoch": 0.05, + "learning_rate": 4.974915615769824e-05, + "loss": 1.1589, + "step": 45225 + }, + { + "epoch": 0.05, + "learning_rate": 4.974914505847297e-05, + "loss": 0.8721, + "step": 45226 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749133959003396e-05, + "loss": 1.0649, + "step": 45227 + }, + { + "epoch": 0.05, + "learning_rate": 4.974912285928951e-05, + "loss": 0.9134, + "step": 45228 + }, + { + "epoch": 0.05, + "learning_rate": 4.97491117593313e-05, + "loss": 0.704, + "step": 45229 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749100659128786e-05, + "loss": 1.0633, + "step": 45230 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749089558681964e-05, + "loss": 1.1241, + "step": 45231 + }, + { + "epoch": 0.05, + "learning_rate": 4.974907845799082e-05, + "loss": 0.8353, + "step": 45232 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749067357055366e-05, + "loss": 0.2652, + "step": 45233 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749056255875596e-05, + "loss": 1.0149, + "step": 45234 + }, + { + "epoch": 0.05, + "learning_rate": 4.974904515445152e-05, + "loss": 1.1209, + "step": 45235 + }, + { + "epoch": 0.05, + "learning_rate": 4.974903405278313e-05, + "loss": 1.1441, + "step": 45236 + }, + { + "epoch": 0.05, + "learning_rate": 4.9749022950870426e-05, + "loss": 0.4583, + "step": 45237 + }, + { + "epoch": 0.05, + "learning_rate": 4.974901184871341e-05, + "loss": 0.6922, + "step": 45238 + }, + { + "epoch": 0.05, + "learning_rate": 4.974900074631209e-05, + "loss": 1.0469, + "step": 45239 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748989643666446e-05, + "loss": 0.7679, + "step": 45240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748978540776504e-05, + "loss": 1.441, + "step": 45241 + }, + { + "epoch": 0.05, + "learning_rate": 4.974896743764224e-05, + "loss": 1.257, + "step": 45242 + }, + { + "epoch": 0.05, + "learning_rate": 4.974895633426367e-05, + "loss": 0.773, + "step": 45243 + }, + { + "epoch": 0.05, + "learning_rate": 4.974894523064079e-05, + "loss": 0.1957, + "step": 45244 + }, + { + "epoch": 0.05, + "learning_rate": 4.974893412677359e-05, + "loss": 0.1528, + "step": 45245 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748923022662086e-05, + "loss": 1.0194, + "step": 45246 + }, + { + "epoch": 0.05, + "learning_rate": 4.974891191830627e-05, + "loss": 0.981, + "step": 45247 + }, + { + "epoch": 0.05, + "learning_rate": 4.974890081370614e-05, + "loss": 1.0224, + "step": 45248 + }, + { + "epoch": 0.05, + "learning_rate": 4.97488897088617e-05, + "loss": 1.1468, + "step": 45249 + }, + { + "epoch": 0.05, + "learning_rate": 4.974887860377295e-05, + "loss": 1.1514, + "step": 45250 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748867498439896e-05, + "loss": 1.3772, + "step": 45251 + }, + { + "epoch": 0.05, + "learning_rate": 4.974885639286252e-05, + "loss": 1.2337, + "step": 45252 + }, + { + "epoch": 0.05, + "learning_rate": 4.974884528704085e-05, + "loss": 0.9497, + "step": 45253 + }, + { + "epoch": 0.05, + "learning_rate": 4.974883418097486e-05, + "loss": 1.3047, + "step": 45254 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748823074664555e-05, + "loss": 1.1624, + "step": 45255 + }, + { + "epoch": 0.05, + "learning_rate": 4.974881196810994e-05, + "loss": 0.575, + "step": 45256 + }, + { + "epoch": 0.05, + "learning_rate": 4.974880086131102e-05, + "loss": 0.8556, + "step": 45257 + }, + { + "epoch": 0.05, + "learning_rate": 4.974878975426779e-05, + "loss": 1.1081, + "step": 45258 + }, + { + "epoch": 0.05, + "learning_rate": 4.974877864698025e-05, + "loss": 0.8327, + "step": 45259 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748767539448406e-05, + "loss": 0.9423, + "step": 45260 + }, + { + "epoch": 0.05, + "learning_rate": 4.974875643167225e-05, + "loss": 1.0396, + "step": 45261 + }, + { + "epoch": 0.05, + "learning_rate": 4.974874532365178e-05, + "loss": 1.0444, + "step": 45262 + }, + { + "epoch": 0.05, + "learning_rate": 4.974873421538701e-05, + "loss": 0.8424, + "step": 45263 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748723106877916e-05, + "loss": 1.2464, + "step": 45264 + }, + { + "epoch": 0.05, + "learning_rate": 4.974871199812452e-05, + "loss": 1.1604, + "step": 45265 + }, + { + "epoch": 0.05, + "learning_rate": 4.974870088912682e-05, + "loss": 1.3916, + "step": 45266 + }, + { + "epoch": 0.05, + "learning_rate": 4.974868977988481e-05, + "loss": 1.2731, + "step": 45267 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748678670398483e-05, + "loss": 0.9024, + "step": 45268 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748667560667855e-05, + "loss": 1.079, + "step": 45269 + }, + { + "epoch": 0.05, + "learning_rate": 4.974865645069292e-05, + "loss": 1.0964, + "step": 45270 + }, + { + "epoch": 0.05, + "learning_rate": 4.974864534047367e-05, + "loss": 1.3282, + "step": 45271 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748634230010116e-05, + "loss": 1.1647, + "step": 45272 + }, + { + "epoch": 0.05, + "learning_rate": 4.974862311930225e-05, + "loss": 1.1202, + "step": 45273 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748612008350084e-05, + "loss": 1.1994, + "step": 45274 + }, + { + "epoch": 0.05, + "learning_rate": 4.974860089715361e-05, + "loss": 1.4489, + "step": 45275 + }, + { + "epoch": 0.05, + "learning_rate": 4.974858978571282e-05, + "loss": 1.2949, + "step": 45276 + }, + { + "epoch": 0.05, + "learning_rate": 4.974857867402772e-05, + "loss": 1.0715, + "step": 45277 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748567562098324e-05, + "loss": 1.1513, + "step": 45278 + }, + { + "epoch": 0.05, + "learning_rate": 4.974855644992461e-05, + "loss": 1.2285, + "step": 45279 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748545337506596e-05, + "loss": 0.8852, + "step": 45280 + }, + { + "epoch": 0.05, + "learning_rate": 4.974853422484428e-05, + "loss": 1.0448, + "step": 45281 + }, + { + "epoch": 0.05, + "learning_rate": 4.974852311193764e-05, + "loss": 1.0282, + "step": 45282 + }, + { + "epoch": 0.05, + "learning_rate": 4.974851199878671e-05, + "loss": 0.5898, + "step": 45283 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748500885391464e-05, + "loss": 0.5369, + "step": 45284 + }, + { + "epoch": 0.05, + "learning_rate": 4.974848977175191e-05, + "loss": 1.1653, + "step": 45285 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748478657868054e-05, + "loss": 1.161, + "step": 45286 + }, + { + "epoch": 0.05, + "learning_rate": 4.974846754373989e-05, + "loss": 0.997, + "step": 45287 + }, + { + "epoch": 0.05, + "learning_rate": 4.974845642936742e-05, + "loss": 0.6296, + "step": 45288 + }, + { + "epoch": 0.05, + "learning_rate": 4.974844531475064e-05, + "loss": 1.22, + "step": 45289 + }, + { + "epoch": 0.05, + "learning_rate": 4.974843419988956e-05, + "loss": 0.9667, + "step": 45290 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748423084784165e-05, + "loss": 1.1858, + "step": 45291 + }, + { + "epoch": 0.05, + "learning_rate": 4.974841196943447e-05, + "loss": 1.0047, + "step": 45292 + }, + { + "epoch": 0.05, + "learning_rate": 4.974840085384047e-05, + "loss": 0.9149, + "step": 45293 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748389738002164e-05, + "loss": 0.6327, + "step": 45294 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748378621919555e-05, + "loss": 0.8862, + "step": 45295 + }, + { + "epoch": 0.05, + "learning_rate": 4.974836750559263e-05, + "loss": 0.7488, + "step": 45296 + }, + { + "epoch": 0.05, + "learning_rate": 4.974835638902141e-05, + "loss": 0.8968, + "step": 45297 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748345272205885e-05, + "loss": 1.0556, + "step": 45298 + }, + { + "epoch": 0.05, + "learning_rate": 4.974833415514605e-05, + "loss": 1.1312, + "step": 45299 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748323037841904e-05, + "loss": 1.132, + "step": 45300 + }, + { + "epoch": 0.05, + "learning_rate": 4.974831192029347e-05, + "loss": 1.5206, + "step": 45301 + }, + { + "epoch": 0.05, + "learning_rate": 4.974830080250071e-05, + "loss": 0.9617, + "step": 45302 + }, + { + "epoch": 0.05, + "learning_rate": 4.974828968446366e-05, + "loss": 1.1518, + "step": 45303 + }, + { + "epoch": 0.05, + "learning_rate": 4.97482785661823e-05, + "loss": 1.0042, + "step": 45304 + }, + { + "epoch": 0.05, + "learning_rate": 4.974826744765664e-05, + "loss": 0.8067, + "step": 45305 + }, + { + "epoch": 0.05, + "learning_rate": 4.974825632888666e-05, + "loss": 1.226, + "step": 45306 + }, + { + "epoch": 0.05, + "learning_rate": 4.97482452098724e-05, + "loss": 1.175, + "step": 45307 + }, + { + "epoch": 0.05, + "learning_rate": 4.974823409061382e-05, + "loss": 1.1052, + "step": 45308 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748222971110934e-05, + "loss": 1.1019, + "step": 45309 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748211851363755e-05, + "loss": 1.3041, + "step": 45310 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748200731372264e-05, + "loss": 1.0257, + "step": 45311 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748189611136476e-05, + "loss": 1.0521, + "step": 45312 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748178490656377e-05, + "loss": 1.6129, + "step": 45313 + }, + { + "epoch": 0.05, + "learning_rate": 4.974816736993197e-05, + "loss": 0.9352, + "step": 45314 + }, + { + "epoch": 0.05, + "learning_rate": 4.974815624896328e-05, + "loss": 1.1099, + "step": 45315 + }, + { + "epoch": 0.05, + "learning_rate": 4.974814512775027e-05, + "loss": 1.8138, + "step": 45316 + }, + { + "epoch": 0.05, + "learning_rate": 4.974813400629296e-05, + "loss": 1.0721, + "step": 45317 + }, + { + "epoch": 0.05, + "learning_rate": 4.974812288459134e-05, + "loss": 1.1049, + "step": 45318 + }, + { + "epoch": 0.05, + "learning_rate": 4.974811176264543e-05, + "loss": 1.0869, + "step": 45319 + }, + { + "epoch": 0.05, + "learning_rate": 4.974810064045521e-05, + "loss": 1.1086, + "step": 45320 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748089518020694e-05, + "loss": 0.4041, + "step": 45321 + }, + { + "epoch": 0.05, + "learning_rate": 4.974807839534186e-05, + "loss": 0.8972, + "step": 45322 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748067272418734e-05, + "loss": 1.1584, + "step": 45323 + }, + { + "epoch": 0.05, + "learning_rate": 4.974805614925131e-05, + "loss": 1.1507, + "step": 45324 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748045025839575e-05, + "loss": 1.2661, + "step": 45325 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748033902183536e-05, + "loss": 1.1833, + "step": 45326 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748022778283205e-05, + "loss": 1.1638, + "step": 45327 + }, + { + "epoch": 0.05, + "learning_rate": 4.974801165413856e-05, + "loss": 1.156, + "step": 45328 + }, + { + "epoch": 0.05, + "learning_rate": 4.974800052974963e-05, + "loss": 1.2618, + "step": 45329 + }, + { + "epoch": 0.05, + "learning_rate": 4.974798940511638e-05, + "loss": 1.0741, + "step": 45330 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747978280238837e-05, + "loss": 0.9962, + "step": 45331 + }, + { + "epoch": 0.05, + "learning_rate": 4.974796715511699e-05, + "loss": 0.9079, + "step": 45332 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747956029750845e-05, + "loss": 1.0099, + "step": 45333 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747944904140395e-05, + "loss": 1.0306, + "step": 45334 + }, + { + "epoch": 0.05, + "learning_rate": 4.974793377828565e-05, + "loss": 1.0914, + "step": 45335 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747922652186596e-05, + "loss": 0.9007, + "step": 45336 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747911525843246e-05, + "loss": 0.696, + "step": 45337 + }, + { + "epoch": 0.05, + "learning_rate": 4.974790039925559e-05, + "loss": 1.2572, + "step": 45338 + }, + { + "epoch": 0.05, + "learning_rate": 4.974788927242364e-05, + "loss": 0.9738, + "step": 45339 + }, + { + "epoch": 0.05, + "learning_rate": 4.974787814534738e-05, + "loss": 1.035, + "step": 45340 + }, + { + "epoch": 0.05, + "learning_rate": 4.974786701802683e-05, + "loss": 0.8461, + "step": 45341 + }, + { + "epoch": 0.05, + "learning_rate": 4.974785589046197e-05, + "loss": 0.9708, + "step": 45342 + }, + { + "epoch": 0.05, + "learning_rate": 4.974784476265282e-05, + "loss": 1.2521, + "step": 45343 + }, + { + "epoch": 0.05, + "learning_rate": 4.974783363459936e-05, + "loss": 0.9828, + "step": 45344 + }, + { + "epoch": 0.05, + "learning_rate": 4.97478225063016e-05, + "loss": 0.7231, + "step": 45345 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747811377759546e-05, + "loss": 1.0131, + "step": 45346 + }, + { + "epoch": 0.05, + "learning_rate": 4.974780024897318e-05, + "loss": 1.538, + "step": 45347 + }, + { + "epoch": 0.05, + "learning_rate": 4.974778911994254e-05, + "loss": 1.4834, + "step": 45348 + }, + { + "epoch": 0.05, + "learning_rate": 4.974777799066758e-05, + "loss": 1.0353, + "step": 45349 + }, + { + "epoch": 0.05, + "learning_rate": 4.974776686114832e-05, + "loss": 1.0018, + "step": 45350 + }, + { + "epoch": 0.05, + "learning_rate": 4.974775573138476e-05, + "loss": 1.051, + "step": 45351 + }, + { + "epoch": 0.05, + "learning_rate": 4.974774460137691e-05, + "loss": 1.2842, + "step": 45352 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747733471124755e-05, + "loss": 0.6176, + "step": 45353 + }, + { + "epoch": 0.05, + "learning_rate": 4.97477223406283e-05, + "loss": 0.8435, + "step": 45354 + }, + { + "epoch": 0.05, + "learning_rate": 4.974771120988755e-05, + "loss": 1.1477, + "step": 45355 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747700078902506e-05, + "loss": 0.9518, + "step": 45356 + }, + { + "epoch": 0.05, + "learning_rate": 4.974768894767315e-05, + "loss": 1.2132, + "step": 45357 + }, + { + "epoch": 0.05, + "learning_rate": 4.97476778161995e-05, + "loss": 0.9391, + "step": 45358 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747666684481556e-05, + "loss": 1.1341, + "step": 45359 + }, + { + "epoch": 0.05, + "learning_rate": 4.974765555251931e-05, + "loss": 0.8503, + "step": 45360 + }, + { + "epoch": 0.05, + "learning_rate": 4.974764442031276e-05, + "loss": 0.8064, + "step": 45361 + }, + { + "epoch": 0.05, + "learning_rate": 4.974763328786192e-05, + "loss": 0.8399, + "step": 45362 + }, + { + "epoch": 0.05, + "learning_rate": 4.974762215516678e-05, + "loss": 0.9692, + "step": 45363 + }, + { + "epoch": 0.05, + "learning_rate": 4.974761102222734e-05, + "loss": 1.0445, + "step": 45364 + }, + { + "epoch": 0.05, + "learning_rate": 4.974759988904361e-05, + "loss": 1.3997, + "step": 45365 + }, + { + "epoch": 0.05, + "learning_rate": 4.974758875561557e-05, + "loss": 1.1154, + "step": 45366 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747577621943233e-05, + "loss": 1.3388, + "step": 45367 + }, + { + "epoch": 0.05, + "learning_rate": 4.97475664880266e-05, + "loss": 0.9506, + "step": 45368 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747555353865674e-05, + "loss": 0.6001, + "step": 45369 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747544219460454e-05, + "loss": 0.9743, + "step": 45370 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747533084810923e-05, + "loss": 0.9414, + "step": 45371 + }, + { + "epoch": 0.05, + "learning_rate": 4.974752194991711e-05, + "loss": 1.1604, + "step": 45372 + }, + { + "epoch": 0.05, + "learning_rate": 4.974751081477899e-05, + "loss": 1.0663, + "step": 45373 + }, + { + "epoch": 0.05, + "learning_rate": 4.974749967939658e-05, + "loss": 0.7129, + "step": 45374 + }, + { + "epoch": 0.05, + "learning_rate": 4.974748854376987e-05, + "loss": 1.0883, + "step": 45375 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747477407898854e-05, + "loss": 0.5705, + "step": 45376 + }, + { + "epoch": 0.05, + "learning_rate": 4.974746627178355e-05, + "loss": 1.2442, + "step": 45377 + }, + { + "epoch": 0.05, + "learning_rate": 4.974745513542395e-05, + "loss": 1.5215, + "step": 45378 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747443998820054e-05, + "loss": 1.3067, + "step": 45379 + }, + { + "epoch": 0.05, + "learning_rate": 4.974743286197186e-05, + "loss": 1.1963, + "step": 45380 + }, + { + "epoch": 0.05, + "learning_rate": 4.974742172487937e-05, + "loss": 1.1076, + "step": 45381 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747410587542586e-05, + "loss": 1.143, + "step": 45382 + }, + { + "epoch": 0.05, + "learning_rate": 4.97473994499615e-05, + "loss": 1.0724, + "step": 45383 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747388312136125e-05, + "loss": 0.8348, + "step": 45384 + }, + { + "epoch": 0.05, + "learning_rate": 4.974737717406645e-05, + "loss": 0.9344, + "step": 45385 + }, + { + "epoch": 0.05, + "learning_rate": 4.974736603575248e-05, + "loss": 1.1494, + "step": 45386 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747354897194216e-05, + "loss": 0.9777, + "step": 45387 + }, + { + "epoch": 0.05, + "learning_rate": 4.974734375839165e-05, + "loss": 1.0619, + "step": 45388 + }, + { + "epoch": 0.05, + "learning_rate": 4.97473326193448e-05, + "loss": 0.9086, + "step": 45389 + }, + { + "epoch": 0.05, + "learning_rate": 4.974732148005364e-05, + "loss": 0.9629, + "step": 45390 + }, + { + "epoch": 0.05, + "learning_rate": 4.97473103405182e-05, + "loss": 1.158, + "step": 45391 + }, + { + "epoch": 0.05, + "learning_rate": 4.974729920073846e-05, + "loss": 1.0624, + "step": 45392 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747288060714425e-05, + "loss": 0.8178, + "step": 45393 + }, + { + "epoch": 0.05, + "learning_rate": 4.97472769204461e-05, + "loss": 0.8235, + "step": 45394 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747265779933465e-05, + "loss": 1.2565, + "step": 45395 + }, + { + "epoch": 0.05, + "learning_rate": 4.974725463917654e-05, + "loss": 1.0901, + "step": 45396 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747243498175334e-05, + "loss": 0.9782, + "step": 45397 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747232356929816e-05, + "loss": 0.975, + "step": 45398 + }, + { + "epoch": 0.05, + "learning_rate": 4.974722121544002e-05, + "loss": 1.2183, + "step": 45399 + }, + { + "epoch": 0.05, + "learning_rate": 4.974721007370592e-05, + "loss": 1.0256, + "step": 45400 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747198931727526e-05, + "loss": 0.9213, + "step": 45401 + }, + { + "epoch": 0.05, + "learning_rate": 4.974718778950484e-05, + "loss": 0.5183, + "step": 45402 + }, + { + "epoch": 0.05, + "learning_rate": 4.974717664703786e-05, + "loss": 0.5569, + "step": 45403 + }, + { + "epoch": 0.05, + "learning_rate": 4.974716550432659e-05, + "loss": 0.4812, + "step": 45404 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747154361371026e-05, + "loss": 0.4368, + "step": 45405 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747143218171165e-05, + "loss": 1.1171, + "step": 45406 + }, + { + "epoch": 0.05, + "learning_rate": 4.974713207472701e-05, + "loss": 1.2322, + "step": 45407 + }, + { + "epoch": 0.05, + "learning_rate": 4.974712093103856e-05, + "loss": 0.7163, + "step": 45408 + }, + { + "epoch": 0.05, + "learning_rate": 4.974710978710583e-05, + "loss": 1.0936, + "step": 45409 + }, + { + "epoch": 0.05, + "learning_rate": 4.974709864292879e-05, + "loss": 1.438, + "step": 45410 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747087498507466e-05, + "loss": 1.1511, + "step": 45411 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747076353841845e-05, + "loss": 1.3857, + "step": 45412 + }, + { + "epoch": 0.05, + "learning_rate": 4.974706520893194e-05, + "loss": 1.241, + "step": 45413 + }, + { + "epoch": 0.05, + "learning_rate": 4.974705406377773e-05, + "loss": 1.1779, + "step": 45414 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747042918379235e-05, + "loss": 0.9653, + "step": 45415 + }, + { + "epoch": 0.05, + "learning_rate": 4.974703177273645e-05, + "loss": 1.1707, + "step": 45416 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747020626849366e-05, + "loss": 1.0022, + "step": 45417 + }, + { + "epoch": 0.05, + "learning_rate": 4.974700948071799e-05, + "loss": 0.8391, + "step": 45418 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746998334342333e-05, + "loss": 0.6924, + "step": 45419 + }, + { + "epoch": 0.05, + "learning_rate": 4.974698718772237e-05, + "loss": 0.9913, + "step": 45420 + }, + { + "epoch": 0.05, + "learning_rate": 4.974697604085812e-05, + "loss": 1.0964, + "step": 45421 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746964893749584e-05, + "loss": 0.8411, + "step": 45422 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746953746396754e-05, + "loss": 0.8076, + "step": 45423 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746942598799626e-05, + "loss": 1.1273, + "step": 45424 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746931450958215e-05, + "loss": 1.0856, + "step": 45425 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746920302872505e-05, + "loss": 1.2764, + "step": 45426 + }, + { + "epoch": 0.05, + "learning_rate": 4.974690915454251e-05, + "loss": 1.4223, + "step": 45427 + }, + { + "epoch": 0.05, + "learning_rate": 4.974689800596822e-05, + "loss": 1.3855, + "step": 45428 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746886857149636e-05, + "loss": 1.0838, + "step": 45429 + }, + { + "epoch": 0.05, + "learning_rate": 4.974687570808677e-05, + "loss": 0.8186, + "step": 45430 + }, + { + "epoch": 0.05, + "learning_rate": 4.97468645587796e-05, + "loss": 0.9951, + "step": 45431 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746853409228154e-05, + "loss": 1.2055, + "step": 45432 + }, + { + "epoch": 0.05, + "learning_rate": 4.974684225943241e-05, + "loss": 1.0613, + "step": 45433 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746831109392375e-05, + "loss": 1.0256, + "step": 45434 + }, + { + "epoch": 0.05, + "learning_rate": 4.974681995910805e-05, + "loss": 1.0921, + "step": 45435 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746808808579445e-05, + "loss": 1.1969, + "step": 45436 + }, + { + "epoch": 0.05, + "learning_rate": 4.974679765780654e-05, + "loss": 0.9368, + "step": 45437 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746786506789344e-05, + "loss": 0.881, + "step": 45438 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746775355527864e-05, + "loss": 1.2086, + "step": 45439 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746764204022086e-05, + "loss": 0.9978, + "step": 45440 + }, + { + "epoch": 0.05, + "learning_rate": 4.974675305227203e-05, + "loss": 0.7796, + "step": 45441 + }, + { + "epoch": 0.05, + "learning_rate": 4.974674190027767e-05, + "loss": 1.2899, + "step": 45442 + }, + { + "epoch": 0.05, + "learning_rate": 4.974673074803903e-05, + "loss": 1.0924, + "step": 45443 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746719595556104e-05, + "loss": 1.132, + "step": 45444 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746708442828884e-05, + "loss": 0.815, + "step": 45445 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746697289857366e-05, + "loss": 1.2364, + "step": 45446 + }, + { + "epoch": 0.05, + "learning_rate": 4.974668613664157e-05, + "loss": 1.0115, + "step": 45447 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746674983181484e-05, + "loss": 1.1063, + "step": 45448 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746663829477113e-05, + "loss": 1.071, + "step": 45449 + }, + { + "epoch": 0.05, + "learning_rate": 4.974665267552845e-05, + "loss": 2.2968, + "step": 45450 + }, + { + "epoch": 0.05, + "learning_rate": 4.97466415213355e-05, + "loss": 1.4211, + "step": 45451 + }, + { + "epoch": 0.05, + "learning_rate": 4.974663036689825e-05, + "loss": 0.9691, + "step": 45452 + }, + { + "epoch": 0.05, + "learning_rate": 4.974661921221673e-05, + "loss": 1.0364, + "step": 45453 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746608057290914e-05, + "loss": 1.0775, + "step": 45454 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746596902120804e-05, + "loss": 1.0891, + "step": 45455 + }, + { + "epoch": 0.05, + "learning_rate": 4.974658574670641e-05, + "loss": 1.1017, + "step": 45456 + }, + { + "epoch": 0.05, + "learning_rate": 4.974657459104772e-05, + "loss": 0.9819, + "step": 45457 + }, + { + "epoch": 0.05, + "learning_rate": 4.974656343514475e-05, + "loss": 1.0283, + "step": 45458 + }, + { + "epoch": 0.05, + "learning_rate": 4.97465522789975e-05, + "loss": 1.2659, + "step": 45459 + }, + { + "epoch": 0.05, + "learning_rate": 4.974654112260595e-05, + "loss": 1.8127, + "step": 45460 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746529965970124e-05, + "loss": 1.1422, + "step": 45461 + }, + { + "epoch": 0.05, + "learning_rate": 4.974651880909e-05, + "loss": 1.1374, + "step": 45462 + }, + { + "epoch": 0.05, + "learning_rate": 4.97465076519656e-05, + "loss": 0.5853, + "step": 45463 + }, + { + "epoch": 0.05, + "learning_rate": 4.974649649459691e-05, + "loss": 1.1149, + "step": 45464 + }, + { + "epoch": 0.05, + "learning_rate": 4.974648533698392e-05, + "loss": 0.8973, + "step": 45465 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746474179126656e-05, + "loss": 0.6577, + "step": 45466 + }, + { + "epoch": 0.05, + "learning_rate": 4.97464630210251e-05, + "loss": 0.7221, + "step": 45467 + }, + { + "epoch": 0.05, + "learning_rate": 4.974645186267926e-05, + "loss": 0.5604, + "step": 45468 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746440704089136e-05, + "loss": 0.9888, + "step": 45469 + }, + { + "epoch": 0.05, + "learning_rate": 4.974642954525472e-05, + "loss": 1.0218, + "step": 45470 + }, + { + "epoch": 0.05, + "learning_rate": 4.974641838617602e-05, + "loss": 1.2538, + "step": 45471 + }, + { + "epoch": 0.05, + "learning_rate": 4.974640722685303e-05, + "loss": 1.086, + "step": 45472 + }, + { + "epoch": 0.05, + "learning_rate": 4.974639606728576e-05, + "loss": 0.8737, + "step": 45473 + }, + { + "epoch": 0.05, + "learning_rate": 4.97463849074742e-05, + "loss": 1.0922, + "step": 45474 + }, + { + "epoch": 0.05, + "learning_rate": 4.974637374741836e-05, + "loss": 0.7719, + "step": 45475 + }, + { + "epoch": 0.05, + "learning_rate": 4.974636258711823e-05, + "loss": 1.131, + "step": 45476 + }, + { + "epoch": 0.05, + "learning_rate": 4.974635142657381e-05, + "loss": 0.8914, + "step": 45477 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746340265785106e-05, + "loss": 1.2749, + "step": 45478 + }, + { + "epoch": 0.05, + "learning_rate": 4.974632910475212e-05, + "loss": 1.6377, + "step": 45479 + }, + { + "epoch": 0.05, + "learning_rate": 4.974631794347485e-05, + "loss": 1.2082, + "step": 45480 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746306781953297e-05, + "loss": 1.3005, + "step": 45481 + }, + { + "epoch": 0.05, + "learning_rate": 4.974629562018745e-05, + "loss": 1.0055, + "step": 45482 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746284458177324e-05, + "loss": 1.1625, + "step": 45483 + }, + { + "epoch": 0.05, + "learning_rate": 4.974627329592291e-05, + "loss": 0.9574, + "step": 45484 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746262133424214e-05, + "loss": 1.1081, + "step": 45485 + }, + { + "epoch": 0.05, + "learning_rate": 4.974625097068123e-05, + "loss": 0.9516, + "step": 45486 + }, + { + "epoch": 0.05, + "learning_rate": 4.974623980769396e-05, + "loss": 1.1537, + "step": 45487 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746228644462414e-05, + "loss": 0.9005, + "step": 45488 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746217480986576e-05, + "loss": 0.7145, + "step": 45489 + }, + { + "epoch": 0.05, + "learning_rate": 4.974620631726646e-05, + "loss": 0.5629, + "step": 45490 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746195153302055e-05, + "loss": 1.2446, + "step": 45491 + }, + { + "epoch": 0.05, + "learning_rate": 4.974618398909337e-05, + "loss": 2.3399, + "step": 45492 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746172824640396e-05, + "loss": 1.0465, + "step": 45493 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746161659943144e-05, + "loss": 0.9775, + "step": 45494 + }, + { + "epoch": 0.05, + "learning_rate": 4.97461504950016e-05, + "loss": 1.1528, + "step": 45495 + }, + { + "epoch": 0.05, + "learning_rate": 4.974613932981578e-05, + "loss": 1.2708, + "step": 45496 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746128164385674e-05, + "loss": 0.932, + "step": 45497 + }, + { + "epoch": 0.05, + "learning_rate": 4.974611699871129e-05, + "loss": 0.9144, + "step": 45498 + }, + { + "epoch": 0.05, + "learning_rate": 4.974610583279261e-05, + "loss": 1.1509, + "step": 45499 + }, + { + "epoch": 0.05, + "learning_rate": 4.974609466662966e-05, + "loss": 0.7826, + "step": 45500 + }, + { + "epoch": 0.05, + "eval_loss": 1.0797648429870605, + "eval_runtime": 125.7599, + "eval_samples_per_second": 11.013, + "eval_steps_per_second": 5.511, + "step": 45500 + }, + { + "epoch": 0.05, + "learning_rate": 4.974608350022242e-05, + "loss": 1.0085, + "step": 45501 + }, + { + "epoch": 0.05, + "learning_rate": 4.97460723335709e-05, + "loss": 1.4251, + "step": 45502 + }, + { + "epoch": 0.05, + "learning_rate": 4.974606116667509e-05, + "loss": 0.9914, + "step": 45503 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746049999535004e-05, + "loss": 0.9361, + "step": 45504 + }, + { + "epoch": 0.05, + "learning_rate": 4.974603883215063e-05, + "loss": 1.0632, + "step": 45505 + }, + { + "epoch": 0.05, + "learning_rate": 4.974602766452198e-05, + "loss": 1.332, + "step": 45506 + }, + { + "epoch": 0.05, + "learning_rate": 4.974601649664905e-05, + "loss": 1.2818, + "step": 45507 + }, + { + "epoch": 0.05, + "learning_rate": 4.9746005328531833e-05, + "loss": 1.3326, + "step": 45508 + }, + { + "epoch": 0.05, + "learning_rate": 4.974599416017034e-05, + "loss": 0.7155, + "step": 45509 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745982991564556e-05, + "loss": 0.469, + "step": 45510 + }, + { + "epoch": 0.05, + "learning_rate": 4.974597182271449e-05, + "loss": 0.7329, + "step": 45511 + }, + { + "epoch": 0.05, + "learning_rate": 4.974596065362015e-05, + "loss": 0.4191, + "step": 45512 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745949484281514e-05, + "loss": 0.6019, + "step": 45513 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745938314698617e-05, + "loss": 0.7599, + "step": 45514 + }, + { + "epoch": 0.05, + "learning_rate": 4.974592714487142e-05, + "loss": 0.3362, + "step": 45515 + }, + { + "epoch": 0.05, + "learning_rate": 4.974591597479995e-05, + "loss": 0.3221, + "step": 45516 + }, + { + "epoch": 0.05, + "learning_rate": 4.97459048044842e-05, + "loss": 0.415, + "step": 45517 + }, + { + "epoch": 0.05, + "learning_rate": 4.974589363392417e-05, + "loss": 0.469, + "step": 45518 + }, + { + "epoch": 0.05, + "learning_rate": 4.974588246311985e-05, + "loss": 0.2685, + "step": 45519 + }, + { + "epoch": 0.05, + "learning_rate": 4.974587129207125e-05, + "loss": 0.6892, + "step": 45520 + }, + { + "epoch": 0.05, + "learning_rate": 4.974586012077838e-05, + "loss": 0.5863, + "step": 45521 + }, + { + "epoch": 0.05, + "learning_rate": 4.974584894924123e-05, + "loss": 0.5748, + "step": 45522 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745837777459784e-05, + "loss": 0.6229, + "step": 45523 + }, + { + "epoch": 0.05, + "learning_rate": 4.974582660543407e-05, + "loss": 0.6044, + "step": 45524 + }, + { + "epoch": 0.05, + "learning_rate": 4.974581543316407e-05, + "loss": 1.0355, + "step": 45525 + }, + { + "epoch": 0.05, + "learning_rate": 4.974580426064979e-05, + "loss": 0.5054, + "step": 45526 + }, + { + "epoch": 0.05, + "learning_rate": 4.974579308789124e-05, + "loss": 0.8264, + "step": 45527 + }, + { + "epoch": 0.05, + "learning_rate": 4.974578191488839e-05, + "loss": 0.9898, + "step": 45528 + }, + { + "epoch": 0.05, + "learning_rate": 4.974577074164127e-05, + "loss": 1.4393, + "step": 45529 + }, + { + "epoch": 0.05, + "learning_rate": 4.974575956814988e-05, + "loss": 1.0134, + "step": 45530 + }, + { + "epoch": 0.05, + "learning_rate": 4.97457483944142e-05, + "loss": 0.8625, + "step": 45531 + }, + { + "epoch": 0.05, + "learning_rate": 4.974573722043424e-05, + "loss": 0.9337, + "step": 45532 + }, + { + "epoch": 0.05, + "learning_rate": 4.974572604621001e-05, + "loss": 1.1906, + "step": 45533 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745714871741487e-05, + "loss": 0.8693, + "step": 45534 + }, + { + "epoch": 0.05, + "learning_rate": 4.974570369702869e-05, + "loss": 0.8957, + "step": 45535 + }, + { + "epoch": 0.05, + "learning_rate": 4.974569252207162e-05, + "loss": 0.745, + "step": 45536 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745681346870267e-05, + "loss": 0.9364, + "step": 45537 + }, + { + "epoch": 0.05, + "learning_rate": 4.974567017142463e-05, + "loss": 1.059, + "step": 45538 + }, + { + "epoch": 0.05, + "learning_rate": 4.974565899573472e-05, + "loss": 0.6451, + "step": 45539 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745647819800535e-05, + "loss": 0.7955, + "step": 45540 + }, + { + "epoch": 0.05, + "learning_rate": 4.974563664362206e-05, + "loss": 1.2032, + "step": 45541 + }, + { + "epoch": 0.05, + "learning_rate": 4.974562546719931e-05, + "loss": 1.0628, + "step": 45542 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745614290532286e-05, + "loss": 1.0687, + "step": 45543 + }, + { + "epoch": 0.05, + "learning_rate": 4.974560311362099e-05, + "loss": 1.0418, + "step": 45544 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745591936465405e-05, + "loss": 1.0584, + "step": 45545 + }, + { + "epoch": 0.05, + "learning_rate": 4.974558075906554e-05, + "loss": 0.7742, + "step": 45546 + }, + { + "epoch": 0.05, + "learning_rate": 4.97455695814214e-05, + "loss": 1.0509, + "step": 45547 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745558403532986e-05, + "loss": 0.4639, + "step": 45548 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745547225400294e-05, + "loss": 0.3128, + "step": 45549 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745536047023324e-05, + "loss": 0.4887, + "step": 45550 + }, + { + "epoch": 0.05, + "learning_rate": 4.974552486840207e-05, + "loss": 0.4655, + "step": 45551 + }, + { + "epoch": 0.05, + "learning_rate": 4.974551368953655e-05, + "loss": 0.4807, + "step": 45552 + }, + { + "epoch": 0.05, + "learning_rate": 4.974550251042674e-05, + "loss": 0.4912, + "step": 45553 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745491331072655e-05, + "loss": 0.4908, + "step": 45554 + }, + { + "epoch": 0.05, + "learning_rate": 4.97454801514743e-05, + "loss": 0.7878, + "step": 45555 + }, + { + "epoch": 0.05, + "learning_rate": 4.974546897163167e-05, + "loss": 1.1363, + "step": 45556 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745457791544756e-05, + "loss": 1.1923, + "step": 45557 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745446611213566e-05, + "loss": 1.2722, + "step": 45558 + }, + { + "epoch": 0.05, + "learning_rate": 4.97454354306381e-05, + "loss": 1.1871, + "step": 45559 + }, + { + "epoch": 0.05, + "learning_rate": 4.974542424981836e-05, + "loss": 1.3742, + "step": 45560 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745413068754344e-05, + "loss": 0.8521, + "step": 45561 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745401887446044e-05, + "loss": 1.0748, + "step": 45562 + }, + { + "epoch": 0.05, + "learning_rate": 4.974539070589348e-05, + "loss": 0.7302, + "step": 45563 + }, + { + "epoch": 0.05, + "learning_rate": 4.974537952409663e-05, + "loss": 0.4895, + "step": 45564 + }, + { + "epoch": 0.05, + "learning_rate": 4.974536834205551e-05, + "loss": 0.7053, + "step": 45565 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745357159770115e-05, + "loss": 0.8277, + "step": 45566 + }, + { + "epoch": 0.05, + "learning_rate": 4.974534597724044e-05, + "loss": 1.0419, + "step": 45567 + }, + { + "epoch": 0.05, + "learning_rate": 4.974533479446649e-05, + "loss": 1.1161, + "step": 45568 + }, + { + "epoch": 0.05, + "learning_rate": 4.974532361144826e-05, + "loss": 0.4913, + "step": 45569 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745312428185766e-05, + "loss": 0.8352, + "step": 45570 + }, + { + "epoch": 0.05, + "learning_rate": 4.974530124467899e-05, + "loss": 0.85, + "step": 45571 + }, + { + "epoch": 0.05, + "learning_rate": 4.974529006092793e-05, + "loss": 1.054, + "step": 45572 + }, + { + "epoch": 0.05, + "learning_rate": 4.974527887693261e-05, + "loss": 1.0815, + "step": 45573 + }, + { + "epoch": 0.05, + "learning_rate": 4.974526769269301e-05, + "loss": 0.8664, + "step": 45574 + }, + { + "epoch": 0.05, + "learning_rate": 4.974525650820913e-05, + "loss": 0.8523, + "step": 45575 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745245323480984e-05, + "loss": 0.9492, + "step": 45576 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745234138508554e-05, + "loss": 1.022, + "step": 45577 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745222953291854e-05, + "loss": 1.1837, + "step": 45578 + }, + { + "epoch": 0.05, + "learning_rate": 4.974521176783088e-05, + "loss": 1.0363, + "step": 45579 + }, + { + "epoch": 0.05, + "learning_rate": 4.974520058212563e-05, + "loss": 1.2925, + "step": 45580 + }, + { + "epoch": 0.05, + "learning_rate": 4.974518939617611e-05, + "loss": 0.9728, + "step": 45581 + }, + { + "epoch": 0.05, + "learning_rate": 4.974517820998231e-05, + "loss": 1.1876, + "step": 45582 + }, + { + "epoch": 0.05, + "learning_rate": 4.974516702354424e-05, + "loss": 0.936, + "step": 45583 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745155836861894e-05, + "loss": 1.1342, + "step": 45584 + }, + { + "epoch": 0.05, + "learning_rate": 4.974514464993528e-05, + "loss": 1.2303, + "step": 45585 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745133462764385e-05, + "loss": 0.8657, + "step": 45586 + }, + { + "epoch": 0.05, + "learning_rate": 4.974512227534922e-05, + "loss": 1.1593, + "step": 45587 + }, + { + "epoch": 0.05, + "learning_rate": 4.974511108768978e-05, + "loss": 1.0674, + "step": 45588 + }, + { + "epoch": 0.05, + "learning_rate": 4.974509989978607e-05, + "loss": 0.9926, + "step": 45589 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745088711638085e-05, + "loss": 1.0086, + "step": 45590 + }, + { + "epoch": 0.05, + "learning_rate": 4.974507752324582e-05, + "loss": 1.1706, + "step": 45591 + }, + { + "epoch": 0.05, + "learning_rate": 4.974506633460929e-05, + "loss": 1.0246, + "step": 45592 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745055145728484e-05, + "loss": 1.2272, + "step": 45593 + }, + { + "epoch": 0.05, + "learning_rate": 4.974504395660341e-05, + "loss": 1.1084, + "step": 45594 + }, + { + "epoch": 0.05, + "learning_rate": 4.9745032767234054e-05, + "loss": 1.2015, + "step": 45595 + }, + { + "epoch": 0.05, + "learning_rate": 4.974502157762044e-05, + "loss": 1.1233, + "step": 45596 + }, + { + "epoch": 0.05, + "learning_rate": 4.974501038776254e-05, + "loss": 0.6892, + "step": 45597 + }, + { + "epoch": 0.05, + "learning_rate": 4.974499919766037e-05, + "loss": 1.2991, + "step": 45598 + }, + { + "epoch": 0.05, + "learning_rate": 4.974498800731394e-05, + "loss": 1.1437, + "step": 45599 + }, + { + "epoch": 0.05, + "learning_rate": 4.974497681672322e-05, + "loss": 1.2367, + "step": 45600 + }, + { + "epoch": 0.05, + "learning_rate": 4.974496562588824e-05, + "loss": 1.0962, + "step": 45601 + }, + { + "epoch": 0.05, + "learning_rate": 4.974495443480899e-05, + "loss": 1.117, + "step": 45602 + }, + { + "epoch": 0.05, + "learning_rate": 4.974494324348546e-05, + "loss": 0.994, + "step": 45603 + }, + { + "epoch": 0.05, + "learning_rate": 4.974493205191766e-05, + "loss": 0.9934, + "step": 45604 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744920860105586e-05, + "loss": 1.1071, + "step": 45605 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744909668049244e-05, + "loss": 0.8971, + "step": 45606 + }, + { + "epoch": 0.05, + "learning_rate": 4.974489847574864e-05, + "loss": 1.0586, + "step": 45607 + }, + { + "epoch": 0.05, + "learning_rate": 4.974488728320375e-05, + "loss": 1.007, + "step": 45608 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744876090414594e-05, + "loss": 0.9642, + "step": 45609 + }, + { + "epoch": 0.05, + "learning_rate": 4.974486489738117e-05, + "loss": 1.1053, + "step": 45610 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744853704103466e-05, + "loss": 1.4651, + "step": 45611 + }, + { + "epoch": 0.05, + "learning_rate": 4.97448425105815e-05, + "loss": 1.2437, + "step": 45612 + }, + { + "epoch": 0.05, + "learning_rate": 4.974483131681527e-05, + "loss": 0.9803, + "step": 45613 + }, + { + "epoch": 0.05, + "learning_rate": 4.974482012280476e-05, + "loss": 0.9789, + "step": 45614 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744808928549975e-05, + "loss": 0.6698, + "step": 45615 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744797734050926e-05, + "loss": 0.907, + "step": 45616 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744786539307606e-05, + "loss": 0.8371, + "step": 45617 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744775344320015e-05, + "loss": 1.2622, + "step": 45618 + }, + { + "epoch": 0.05, + "learning_rate": 4.974476414908815e-05, + "loss": 1.0646, + "step": 45619 + }, + { + "epoch": 0.05, + "learning_rate": 4.974475295361202e-05, + "loss": 1.1624, + "step": 45620 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744741757891624e-05, + "loss": 1.0207, + "step": 45621 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744730561926957e-05, + "loss": 1.0858, + "step": 45622 + }, + { + "epoch": 0.05, + "learning_rate": 4.974471936571802e-05, + "loss": 1.0029, + "step": 45623 + }, + { + "epoch": 0.05, + "learning_rate": 4.974470816926481e-05, + "loss": 1.2861, + "step": 45624 + }, + { + "epoch": 0.05, + "learning_rate": 4.974469697256733e-05, + "loss": 1.1925, + "step": 45625 + }, + { + "epoch": 0.05, + "learning_rate": 4.974468577562558e-05, + "loss": 0.9977, + "step": 45626 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744674578439566e-05, + "loss": 1.1006, + "step": 45627 + }, + { + "epoch": 0.05, + "learning_rate": 4.974466338100928e-05, + "loss": 1.2871, + "step": 45628 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744652183334725e-05, + "loss": 1.0092, + "step": 45629 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744640985415906e-05, + "loss": 1.5077, + "step": 45630 + }, + { + "epoch": 0.05, + "learning_rate": 4.974462978725281e-05, + "loss": 1.2329, + "step": 45631 + }, + { + "epoch": 0.05, + "learning_rate": 4.974461858884545e-05, + "loss": 0.6615, + "step": 45632 + }, + { + "epoch": 0.05, + "learning_rate": 4.974460739019382e-05, + "loss": 0.5246, + "step": 45633 + }, + { + "epoch": 0.05, + "learning_rate": 4.974459619129792e-05, + "loss": 0.8487, + "step": 45634 + }, + { + "epoch": 0.05, + "learning_rate": 4.974458499215776e-05, + "loss": 1.0592, + "step": 45635 + }, + { + "epoch": 0.05, + "learning_rate": 4.974457379277332e-05, + "loss": 1.0532, + "step": 45636 + }, + { + "epoch": 0.05, + "learning_rate": 4.974456259314463e-05, + "loss": 0.8353, + "step": 45637 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744551393271655e-05, + "loss": 1.12, + "step": 45638 + }, + { + "epoch": 0.05, + "learning_rate": 4.974454019315442e-05, + "loss": 1.2937, + "step": 45639 + }, + { + "epoch": 0.05, + "learning_rate": 4.974452899279292e-05, + "loss": 1.1838, + "step": 45640 + }, + { + "epoch": 0.05, + "learning_rate": 4.974451779218714e-05, + "loss": 0.7565, + "step": 45641 + }, + { + "epoch": 0.05, + "learning_rate": 4.974450659133711e-05, + "loss": 0.9144, + "step": 45642 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744495390242794e-05, + "loss": 0.8899, + "step": 45643 + }, + { + "epoch": 0.05, + "learning_rate": 4.974448418890422e-05, + "loss": 1.1573, + "step": 45644 + }, + { + "epoch": 0.05, + "learning_rate": 4.974447298732138e-05, + "loss": 1.2918, + "step": 45645 + }, + { + "epoch": 0.05, + "learning_rate": 4.974446178549427e-05, + "loss": 1.0351, + "step": 45646 + }, + { + "epoch": 0.05, + "learning_rate": 4.97444505834229e-05, + "loss": 1.2496, + "step": 45647 + }, + { + "epoch": 0.05, + "learning_rate": 4.974443938110725e-05, + "loss": 1.53, + "step": 45648 + }, + { + "epoch": 0.05, + "learning_rate": 4.974442817854734e-05, + "loss": 1.176, + "step": 45649 + }, + { + "epoch": 0.05, + "learning_rate": 4.974441697574317e-05, + "loss": 1.1114, + "step": 45650 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744405772694725e-05, + "loss": 0.9394, + "step": 45651 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744394569402016e-05, + "loss": 1.0122, + "step": 45652 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744383365865044e-05, + "loss": 1.4037, + "step": 45653 + }, + { + "epoch": 0.05, + "learning_rate": 4.97443721620838e-05, + "loss": 1.4254, + "step": 45654 + }, + { + "epoch": 0.05, + "learning_rate": 4.97443609580583e-05, + "loss": 1.6696, + "step": 45655 + }, + { + "epoch": 0.05, + "learning_rate": 4.974434975378852e-05, + "loss": 0.8121, + "step": 45656 + }, + { + "epoch": 0.05, + "learning_rate": 4.974433854927448e-05, + "loss": 0.3647, + "step": 45657 + }, + { + "epoch": 0.05, + "learning_rate": 4.974432734451618e-05, + "loss": 0.8786, + "step": 45658 + }, + { + "epoch": 0.05, + "learning_rate": 4.974431613951361e-05, + "loss": 1.0896, + "step": 45659 + }, + { + "epoch": 0.05, + "learning_rate": 4.974430493426678e-05, + "loss": 1.1519, + "step": 45660 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744293728775674e-05, + "loss": 0.756, + "step": 45661 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744282523040305e-05, + "loss": 0.5171, + "step": 45662 + }, + { + "epoch": 0.05, + "learning_rate": 4.974427131706068e-05, + "loss": 0.8075, + "step": 45663 + }, + { + "epoch": 0.05, + "learning_rate": 4.974426011083678e-05, + "loss": 0.9396, + "step": 45664 + }, + { + "epoch": 0.05, + "learning_rate": 4.974424890436862e-05, + "loss": 1.2193, + "step": 45665 + }, + { + "epoch": 0.05, + "learning_rate": 4.97442376976562e-05, + "loss": 0.8489, + "step": 45666 + }, + { + "epoch": 0.05, + "learning_rate": 4.974422649069951e-05, + "loss": 0.8109, + "step": 45667 + }, + { + "epoch": 0.05, + "learning_rate": 4.974421528349855e-05, + "loss": 0.864, + "step": 45668 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744204076053335e-05, + "loss": 0.9593, + "step": 45669 + }, + { + "epoch": 0.05, + "learning_rate": 4.974419286836385e-05, + "loss": 0.9096, + "step": 45670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744181660430104e-05, + "loss": 1.0249, + "step": 45671 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744170452252095e-05, + "loss": 1.2112, + "step": 45672 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744159243829816e-05, + "loss": 0.9519, + "step": 45673 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744148035163273e-05, + "loss": 1.105, + "step": 45674 + }, + { + "epoch": 0.05, + "learning_rate": 4.974413682625247e-05, + "loss": 1.1696, + "step": 45675 + }, + { + "epoch": 0.05, + "learning_rate": 4.97441256170974e-05, + "loss": 0.9728, + "step": 45676 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744114407698074e-05, + "loss": 0.693, + "step": 45677 + }, + { + "epoch": 0.05, + "learning_rate": 4.974410319805448e-05, + "loss": 1.1799, + "step": 45678 + }, + { + "epoch": 0.05, + "learning_rate": 4.974409198816662e-05, + "loss": 1.1697, + "step": 45679 + }, + { + "epoch": 0.05, + "learning_rate": 4.97440807780345e-05, + "loss": 0.8715, + "step": 45680 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744069567658116e-05, + "loss": 1.1345, + "step": 45681 + }, + { + "epoch": 0.05, + "learning_rate": 4.974405835703747e-05, + "loss": 1.12, + "step": 45682 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744047146172556e-05, + "loss": 0.9811, + "step": 45683 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744035935063387e-05, + "loss": 0.9339, + "step": 45684 + }, + { + "epoch": 0.05, + "learning_rate": 4.974402472370995e-05, + "loss": 0.7979, + "step": 45685 + }, + { + "epoch": 0.05, + "learning_rate": 4.974401351211225e-05, + "loss": 0.9271, + "step": 45686 + }, + { + "epoch": 0.05, + "learning_rate": 4.9744002300270295e-05, + "loss": 0.8029, + "step": 45687 + }, + { + "epoch": 0.05, + "learning_rate": 4.974399108818407e-05, + "loss": 1.3723, + "step": 45688 + }, + { + "epoch": 0.05, + "learning_rate": 4.974397987585359e-05, + "loss": 0.9677, + "step": 45689 + }, + { + "epoch": 0.05, + "learning_rate": 4.974396866327884e-05, + "loss": 0.9619, + "step": 45690 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743957450459825e-05, + "loss": 1.1322, + "step": 45691 + }, + { + "epoch": 0.05, + "learning_rate": 4.974394623739656e-05, + "loss": 1.3438, + "step": 45692 + }, + { + "epoch": 0.05, + "learning_rate": 4.974393502408902e-05, + "loss": 1.3055, + "step": 45693 + }, + { + "epoch": 0.05, + "learning_rate": 4.974392381053723e-05, + "loss": 0.9067, + "step": 45694 + }, + { + "epoch": 0.05, + "learning_rate": 4.974391259674117e-05, + "loss": 0.6146, + "step": 45695 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743901382700855e-05, + "loss": 0.8327, + "step": 45696 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743890168416275e-05, + "loss": 0.9726, + "step": 45697 + }, + { + "epoch": 0.05, + "learning_rate": 4.974387895388744e-05, + "loss": 1.381, + "step": 45698 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743867739114336e-05, + "loss": 0.8859, + "step": 45699 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743856524096963e-05, + "loss": 0.8337, + "step": 45700 + }, + { + "epoch": 0.05, + "learning_rate": 4.974384530883535e-05, + "loss": 1.0273, + "step": 45701 + }, + { + "epoch": 0.05, + "learning_rate": 4.974383409332946e-05, + "loss": 1.1322, + "step": 45702 + }, + { + "epoch": 0.05, + "learning_rate": 4.974382287757932e-05, + "loss": 1.0251, + "step": 45703 + }, + { + "epoch": 0.05, + "learning_rate": 4.974381166158491e-05, + "loss": 1.1836, + "step": 45704 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743800445346244e-05, + "loss": 0.9265, + "step": 45705 + }, + { + "epoch": 0.05, + "learning_rate": 4.974378922886332e-05, + "loss": 1.336, + "step": 45706 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743778012136135e-05, + "loss": 1.3028, + "step": 45707 + }, + { + "epoch": 0.05, + "learning_rate": 4.974376679516468e-05, + "loss": 1.7094, + "step": 45708 + }, + { + "epoch": 0.05, + "learning_rate": 4.974375557794898e-05, + "loss": 1.4221, + "step": 45709 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743744360489006e-05, + "loss": 0.9569, + "step": 45710 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743733142784784e-05, + "loss": 1.1971, + "step": 45711 + }, + { + "epoch": 0.05, + "learning_rate": 4.97437219248363e-05, + "loss": 1.272, + "step": 45712 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743710706643555e-05, + "loss": 1.03, + "step": 45713 + }, + { + "epoch": 0.05, + "learning_rate": 4.974369948820654e-05, + "loss": 0.9234, + "step": 45714 + }, + { + "epoch": 0.05, + "learning_rate": 4.974368826952528e-05, + "loss": 0.6427, + "step": 45715 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743677050599756e-05, + "loss": 0.7138, + "step": 45716 + }, + { + "epoch": 0.05, + "learning_rate": 4.974366583142998e-05, + "loss": 0.7718, + "step": 45717 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743654612015935e-05, + "loss": 1.0889, + "step": 45718 + }, + { + "epoch": 0.05, + "learning_rate": 4.974364339235763e-05, + "loss": 1.1808, + "step": 45719 + }, + { + "epoch": 0.05, + "learning_rate": 4.974363217245507e-05, + "loss": 1.1002, + "step": 45720 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743620952308244e-05, + "loss": 1.1042, + "step": 45721 + }, + { + "epoch": 0.05, + "learning_rate": 4.974360973191717e-05, + "loss": 0.9851, + "step": 45722 + }, + { + "epoch": 0.05, + "learning_rate": 4.974359851128184e-05, + "loss": 1.0143, + "step": 45723 + }, + { + "epoch": 0.05, + "learning_rate": 4.974358729040224e-05, + "loss": 1.089, + "step": 45724 + }, + { + "epoch": 0.05, + "learning_rate": 4.974357606927839e-05, + "loss": 0.8771, + "step": 45725 + }, + { + "epoch": 0.05, + "learning_rate": 4.974356484791028e-05, + "loss": 1.1704, + "step": 45726 + }, + { + "epoch": 0.05, + "learning_rate": 4.974355362629791e-05, + "loss": 0.9515, + "step": 45727 + }, + { + "epoch": 0.05, + "learning_rate": 4.974354240444128e-05, + "loss": 1.2547, + "step": 45728 + }, + { + "epoch": 0.05, + "learning_rate": 4.97435311823404e-05, + "loss": 0.9652, + "step": 45729 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743519959995254e-05, + "loss": 0.7073, + "step": 45730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743508737405855e-05, + "loss": 1.2819, + "step": 45731 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743497514572205e-05, + "loss": 1.0575, + "step": 45732 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743486291494284e-05, + "loss": 1.2363, + "step": 45733 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743475068172106e-05, + "loss": 0.9336, + "step": 45734 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743463844605684e-05, + "loss": 1.1846, + "step": 45735 + }, + { + "epoch": 0.05, + "learning_rate": 4.974345262079499e-05, + "loss": 1.0551, + "step": 45736 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743441396740056e-05, + "loss": 1.1375, + "step": 45737 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743430172440855e-05, + "loss": 1.0266, + "step": 45738 + }, + { + "epoch": 0.05, + "learning_rate": 4.97434189478974e-05, + "loss": 0.9828, + "step": 45739 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743407723109684e-05, + "loss": 1.1194, + "step": 45740 + }, + { + "epoch": 0.05, + "learning_rate": 4.974339649807771e-05, + "loss": 0.9022, + "step": 45741 + }, + { + "epoch": 0.05, + "learning_rate": 4.974338527280149e-05, + "loss": 0.7958, + "step": 45742 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743374047281004e-05, + "loss": 1.1339, + "step": 45743 + }, + { + "epoch": 0.05, + "learning_rate": 4.974336282151627e-05, + "loss": 1.4615, + "step": 45744 + }, + { + "epoch": 0.05, + "learning_rate": 4.974335159550727e-05, + "loss": 1.382, + "step": 45745 + }, + { + "epoch": 0.05, + "learning_rate": 4.974334036925402e-05, + "loss": 1.0907, + "step": 45746 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743329142756515e-05, + "loss": 1.1728, + "step": 45747 + }, + { + "epoch": 0.05, + "learning_rate": 4.974331791601476e-05, + "loss": 0.973, + "step": 45748 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743306689028734e-05, + "loss": 0.1478, + "step": 45749 + }, + { + "epoch": 0.05, + "learning_rate": 4.974329546179846e-05, + "loss": 0.1046, + "step": 45750 + }, + { + "epoch": 0.05, + "learning_rate": 4.974328423432393e-05, + "loss": 0.0943, + "step": 45751 + }, + { + "epoch": 0.05, + "learning_rate": 4.974327300660515e-05, + "loss": 0.0735, + "step": 45752 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743261778642106e-05, + "loss": 0.0745, + "step": 45753 + }, + { + "epoch": 0.05, + "learning_rate": 4.974325055043482e-05, + "loss": 0.0798, + "step": 45754 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743239321983266e-05, + "loss": 0.0653, + "step": 45755 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743228093287464e-05, + "loss": 0.072, + "step": 45756 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743216864347404e-05, + "loss": 0.1125, + "step": 45757 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743205635163094e-05, + "loss": 0.0526, + "step": 45758 + }, + { + "epoch": 0.05, + "learning_rate": 4.974319440573452e-05, + "loss": 0.0356, + "step": 45759 + }, + { + "epoch": 0.05, + "learning_rate": 4.97431831760617e-05, + "loss": 0.0502, + "step": 45760 + }, + { + "epoch": 0.05, + "learning_rate": 4.974317194614462e-05, + "loss": 0.0739, + "step": 45761 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743160715983295e-05, + "loss": 0.1416, + "step": 45762 + }, + { + "epoch": 0.05, + "learning_rate": 4.974314948557771e-05, + "loss": 0.9306, + "step": 45763 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743138254927866e-05, + "loss": 0.9571, + "step": 45764 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743127024033776e-05, + "loss": 0.7926, + "step": 45765 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743115792895435e-05, + "loss": 1.0578, + "step": 45766 + }, + { + "epoch": 0.05, + "learning_rate": 4.974310456151283e-05, + "loss": 1.1989, + "step": 45767 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743093329885976e-05, + "loss": 1.0844, + "step": 45768 + }, + { + "epoch": 0.05, + "learning_rate": 4.974308209801487e-05, + "loss": 0.8869, + "step": 45769 + }, + { + "epoch": 0.05, + "learning_rate": 4.974307086589951e-05, + "loss": 1.0819, + "step": 45770 + }, + { + "epoch": 0.05, + "learning_rate": 4.97430596335399e-05, + "loss": 0.8452, + "step": 45771 + }, + { + "epoch": 0.05, + "learning_rate": 4.974304840093603e-05, + "loss": 0.7517, + "step": 45772 + }, + { + "epoch": 0.05, + "learning_rate": 4.974303716808791e-05, + "loss": 0.986, + "step": 45773 + }, + { + "epoch": 0.05, + "learning_rate": 4.974302593499554e-05, + "loss": 0.8584, + "step": 45774 + }, + { + "epoch": 0.05, + "learning_rate": 4.974301470165892e-05, + "loss": 1.2329, + "step": 45775 + }, + { + "epoch": 0.05, + "learning_rate": 4.974300346807804e-05, + "loss": 1.4424, + "step": 45776 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742992234252904e-05, + "loss": 1.5056, + "step": 45777 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742981000183524e-05, + "loss": 1.3299, + "step": 45778 + }, + { + "epoch": 0.05, + "learning_rate": 4.974296976586989e-05, + "loss": 0.7363, + "step": 45779 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742958531312e-05, + "loss": 1.261, + "step": 45780 + }, + { + "epoch": 0.05, + "learning_rate": 4.974294729650987e-05, + "loss": 1.5741, + "step": 45781 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742936061463475e-05, + "loss": 1.2628, + "step": 45782 + }, + { + "epoch": 0.05, + "learning_rate": 4.974292482617283e-05, + "loss": 0.8167, + "step": 45783 + }, + { + "epoch": 0.05, + "learning_rate": 4.974291359063794e-05, + "loss": 0.7425, + "step": 45784 + }, + { + "epoch": 0.05, + "learning_rate": 4.974290235485879e-05, + "loss": 1.3187, + "step": 45785 + }, + { + "epoch": 0.05, + "learning_rate": 4.974289111883539e-05, + "loss": 1.02, + "step": 45786 + }, + { + "epoch": 0.05, + "learning_rate": 4.974287988256775e-05, + "loss": 1.0194, + "step": 45787 + }, + { + "epoch": 0.05, + "learning_rate": 4.974286864605585e-05, + "loss": 1.492, + "step": 45788 + }, + { + "epoch": 0.05, + "learning_rate": 4.974285740929969e-05, + "loss": 0.9408, + "step": 45789 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742846172299295e-05, + "loss": 0.6201, + "step": 45790 + }, + { + "epoch": 0.05, + "learning_rate": 4.974283493505464e-05, + "loss": 0.9884, + "step": 45791 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742823697565734e-05, + "loss": 1.1657, + "step": 45792 + }, + { + "epoch": 0.05, + "learning_rate": 4.974281245983258e-05, + "loss": 1.0155, + "step": 45793 + }, + { + "epoch": 0.05, + "learning_rate": 4.974280122185517e-05, + "loss": 1.1231, + "step": 45794 + }, + { + "epoch": 0.05, + "learning_rate": 4.974278998363352e-05, + "loss": 1.0422, + "step": 45795 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742778745167614e-05, + "loss": 1.3371, + "step": 45796 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742767506457456e-05, + "loss": 1.1535, + "step": 45797 + }, + { + "epoch": 0.05, + "learning_rate": 4.974275626750305e-05, + "loss": 0.9949, + "step": 45798 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742745028304396e-05, + "loss": 1.1172, + "step": 45799 + }, + { + "epoch": 0.05, + "learning_rate": 4.974273378886149e-05, + "loss": 0.8733, + "step": 45800 + }, + { + "epoch": 0.05, + "learning_rate": 4.974272254917433e-05, + "loss": 1.3712, + "step": 45801 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742711309242924e-05, + "loss": 1.2341, + "step": 45802 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742700069067264e-05, + "loss": 1.2873, + "step": 45803 + }, + { + "epoch": 0.05, + "learning_rate": 4.974268882864736e-05, + "loss": 1.0733, + "step": 45804 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742677587983205e-05, + "loss": 0.8646, + "step": 45805 + }, + { + "epoch": 0.05, + "learning_rate": 4.97426663470748e-05, + "loss": 0.8734, + "step": 45806 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742655105922145e-05, + "loss": 1.0928, + "step": 45807 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742643864525246e-05, + "loss": 1.2, + "step": 45808 + }, + { + "epoch": 0.05, + "learning_rate": 4.97426326228841e-05, + "loss": 0.4103, + "step": 45809 + }, + { + "epoch": 0.05, + "learning_rate": 4.97426213809987e-05, + "loss": 0.8974, + "step": 45810 + }, + { + "epoch": 0.05, + "learning_rate": 4.974261013886905e-05, + "loss": 0.5257, + "step": 45811 + }, + { + "epoch": 0.05, + "learning_rate": 4.974259889649515e-05, + "loss": 0.6782, + "step": 45812 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742587653877e-05, + "loss": 0.8126, + "step": 45813 + }, + { + "epoch": 0.05, + "learning_rate": 4.974257641101461e-05, + "loss": 0.9808, + "step": 45814 + }, + { + "epoch": 0.05, + "learning_rate": 4.974256516790796e-05, + "loss": 1.3454, + "step": 45815 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742553924557074e-05, + "loss": 1.0497, + "step": 45816 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742542680961935e-05, + "loss": 1.1296, + "step": 45817 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742531437122545e-05, + "loss": 1.9507, + "step": 45818 + }, + { + "epoch": 0.05, + "learning_rate": 4.974252019303891e-05, + "loss": 1.121, + "step": 45819 + }, + { + "epoch": 0.05, + "learning_rate": 4.974250894871103e-05, + "loss": 1.0829, + "step": 45820 + }, + { + "epoch": 0.05, + "learning_rate": 4.97424977041389e-05, + "loss": 0.9769, + "step": 45821 + }, + { + "epoch": 0.05, + "learning_rate": 4.974248645932252e-05, + "loss": 1.2224, + "step": 45822 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742475214261895e-05, + "loss": 1.0138, + "step": 45823 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742463968957024e-05, + "loss": 1.1502, + "step": 45824 + }, + { + "epoch": 0.05, + "learning_rate": 4.974245272340791e-05, + "loss": 0.9323, + "step": 45825 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742441477614536e-05, + "loss": 0.9567, + "step": 45826 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742430231576926e-05, + "loss": 1.0271, + "step": 45827 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742418985295066e-05, + "loss": 0.9642, + "step": 45828 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742407738768956e-05, + "loss": 2.118, + "step": 45829 + }, + { + "epoch": 0.05, + "learning_rate": 4.97423964919986e-05, + "loss": 1.3345, + "step": 45830 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742385244984004e-05, + "loss": 0.8406, + "step": 45831 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742373997725156e-05, + "loss": 1.1666, + "step": 45832 + }, + { + "epoch": 0.05, + "learning_rate": 4.974236275022206e-05, + "loss": 0.8133, + "step": 45833 + }, + { + "epoch": 0.05, + "learning_rate": 4.974235150247473e-05, + "loss": 0.9438, + "step": 45834 + }, + { + "epoch": 0.05, + "learning_rate": 4.974234025448314e-05, + "loss": 0.8918, + "step": 45835 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742329006247314e-05, + "loss": 0.9676, + "step": 45836 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742317757767235e-05, + "loss": 1.1376, + "step": 45837 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742306509042904e-05, + "loss": 1.034, + "step": 45838 + }, + { + "epoch": 0.05, + "learning_rate": 4.974229526007434e-05, + "loss": 0.9938, + "step": 45839 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742284010861527e-05, + "loss": 0.9795, + "step": 45840 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742272761404465e-05, + "loss": 0.8263, + "step": 45841 + }, + { + "epoch": 0.05, + "learning_rate": 4.974226151170316e-05, + "loss": 1.0088, + "step": 45842 + }, + { + "epoch": 0.05, + "learning_rate": 4.974225026175761e-05, + "loss": 0.8927, + "step": 45843 + }, + { + "epoch": 0.05, + "learning_rate": 4.974223901156781e-05, + "loss": 0.9106, + "step": 45844 + }, + { + "epoch": 0.05, + "learning_rate": 4.974222776113378e-05, + "loss": 0.9508, + "step": 45845 + }, + { + "epoch": 0.05, + "learning_rate": 4.97422165104555e-05, + "loss": 0.908, + "step": 45846 + }, + { + "epoch": 0.05, + "learning_rate": 4.974220525953296e-05, + "loss": 1.1274, + "step": 45847 + }, + { + "epoch": 0.05, + "learning_rate": 4.974219400836619e-05, + "loss": 1.0245, + "step": 45848 + }, + { + "epoch": 0.05, + "learning_rate": 4.974218275695517e-05, + "loss": 1.3667, + "step": 45849 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742171505299904e-05, + "loss": 0.874, + "step": 45850 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742160253400406e-05, + "loss": 0.5435, + "step": 45851 + }, + { + "epoch": 0.05, + "learning_rate": 4.974214900125665e-05, + "loss": 0.9578, + "step": 45852 + }, + { + "epoch": 0.05, + "learning_rate": 4.974213774886865e-05, + "loss": 1.045, + "step": 45853 + }, + { + "epoch": 0.05, + "learning_rate": 4.974212649623642e-05, + "loss": 1.081, + "step": 45854 + }, + { + "epoch": 0.05, + "learning_rate": 4.974211524335993e-05, + "loss": 0.9487, + "step": 45855 + }, + { + "epoch": 0.05, + "learning_rate": 4.974210399023921e-05, + "loss": 0.9308, + "step": 45856 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742092736874236e-05, + "loss": 0.6461, + "step": 45857 + }, + { + "epoch": 0.05, + "learning_rate": 4.974208148326502e-05, + "loss": 0.9577, + "step": 45858 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742070229411566e-05, + "loss": 1.1438, + "step": 45859 + }, + { + "epoch": 0.05, + "learning_rate": 4.974205897531386e-05, + "loss": 1.1011, + "step": 45860 + }, + { + "epoch": 0.05, + "learning_rate": 4.974204772097192e-05, + "loss": 1.0869, + "step": 45861 + }, + { + "epoch": 0.05, + "learning_rate": 4.974203646638573e-05, + "loss": 0.9677, + "step": 45862 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742025211555295e-05, + "loss": 1.0718, + "step": 45863 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742013956480624e-05, + "loss": 0.9878, + "step": 45864 + }, + { + "epoch": 0.05, + "learning_rate": 4.974200270116171e-05, + "loss": 1.0713, + "step": 45865 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741991445598556e-05, + "loss": 1.0904, + "step": 45866 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741980189791147e-05, + "loss": 1.0928, + "step": 45867 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741968933739514e-05, + "loss": 1.1967, + "step": 45868 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741957677443624e-05, + "loss": 1.0833, + "step": 45869 + }, + { + "epoch": 0.05, + "learning_rate": 4.97419464209035e-05, + "loss": 1.035, + "step": 45870 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741935164119127e-05, + "loss": 1.2216, + "step": 45871 + }, + { + "epoch": 0.05, + "learning_rate": 4.974192390709051e-05, + "loss": 0.7986, + "step": 45872 + }, + { + "epoch": 0.05, + "learning_rate": 4.974191264981766e-05, + "loss": 1.1399, + "step": 45873 + }, + { + "epoch": 0.05, + "learning_rate": 4.974190139230057e-05, + "loss": 1.1012, + "step": 45874 + }, + { + "epoch": 0.05, + "learning_rate": 4.974189013453923e-05, + "loss": 0.863, + "step": 45875 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741878876533653e-05, + "loss": 0.8511, + "step": 45876 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741867618283835e-05, + "loss": 0.9568, + "step": 45877 + }, + { + "epoch": 0.05, + "learning_rate": 4.974185635978977e-05, + "loss": 0.7713, + "step": 45878 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741845101051466e-05, + "loss": 1.1167, + "step": 45879 + }, + { + "epoch": 0.05, + "learning_rate": 4.974183384206892e-05, + "loss": 0.7915, + "step": 45880 + }, + { + "epoch": 0.05, + "learning_rate": 4.974182258284214e-05, + "loss": 1.1323, + "step": 45881 + }, + { + "epoch": 0.05, + "learning_rate": 4.974181132337111e-05, + "loss": 1.3012, + "step": 45882 + }, + { + "epoch": 0.05, + "learning_rate": 4.974180006365585e-05, + "loss": 0.9903, + "step": 45883 + }, + { + "epoch": 0.05, + "learning_rate": 4.974178880369634e-05, + "loss": 0.7792, + "step": 45884 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741777543492594e-05, + "loss": 0.7715, + "step": 45885 + }, + { + "epoch": 0.05, + "learning_rate": 4.974176628304461e-05, + "loss": 0.8317, + "step": 45886 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741755022352375e-05, + "loss": 0.978, + "step": 45887 + }, + { + "epoch": 0.05, + "learning_rate": 4.974174376141592e-05, + "loss": 0.4814, + "step": 45888 + }, + { + "epoch": 0.05, + "learning_rate": 4.974173250023521e-05, + "loss": 0.6874, + "step": 45889 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741721238810256e-05, + "loss": 1.0783, + "step": 45890 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741709977141074e-05, + "loss": 1.0776, + "step": 45891 + }, + { + "epoch": 0.05, + "learning_rate": 4.974169871522764e-05, + "loss": 0.5224, + "step": 45892 + }, + { + "epoch": 0.05, + "learning_rate": 4.974168745306997e-05, + "loss": 1.3025, + "step": 45893 + }, + { + "epoch": 0.05, + "learning_rate": 4.974167619066807e-05, + "loss": 0.9844, + "step": 45894 + }, + { + "epoch": 0.05, + "learning_rate": 4.974166492802193e-05, + "loss": 1.1642, + "step": 45895 + }, + { + "epoch": 0.05, + "learning_rate": 4.974165366513154e-05, + "loss": 1.0715, + "step": 45896 + }, + { + "epoch": 0.05, + "learning_rate": 4.974164240199691e-05, + "loss": 1.086, + "step": 45897 + }, + { + "epoch": 0.05, + "learning_rate": 4.974163113861805e-05, + "loss": 1.0996, + "step": 45898 + }, + { + "epoch": 0.05, + "learning_rate": 4.974161987499495e-05, + "loss": 1.1369, + "step": 45899 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741608611127604e-05, + "loss": 0.9683, + "step": 45900 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741597347016026e-05, + "loss": 0.876, + "step": 45901 + }, + { + "epoch": 0.05, + "learning_rate": 4.974158608266021e-05, + "loss": 1.069, + "step": 45902 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741574818060145e-05, + "loss": 0.703, + "step": 45903 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741563553215856e-05, + "loss": 1.0914, + "step": 45904 + }, + { + "epoch": 0.05, + "learning_rate": 4.974155228812732e-05, + "loss": 1.0039, + "step": 45905 + }, + { + "epoch": 0.05, + "learning_rate": 4.974154102279455e-05, + "loss": 0.9149, + "step": 45906 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741529757217535e-05, + "loss": 0.7779, + "step": 45907 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741518491396285e-05, + "loss": 1.0896, + "step": 45908 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741507225330805e-05, + "loss": 1.4308, + "step": 45909 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741495959021075e-05, + "loss": 1.372, + "step": 45910 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741484692467115e-05, + "loss": 0.2683, + "step": 45911 + }, + { + "epoch": 0.05, + "learning_rate": 4.974147342566892e-05, + "loss": 0.2476, + "step": 45912 + }, + { + "epoch": 0.05, + "learning_rate": 4.974146215862648e-05, + "loss": 0.1859, + "step": 45913 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741450891339805e-05, + "loss": 0.2365, + "step": 45914 + }, + { + "epoch": 0.05, + "learning_rate": 4.97414396238089e-05, + "loss": 0.3636, + "step": 45915 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741428356033746e-05, + "loss": 1.1237, + "step": 45916 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741417088014364e-05, + "loss": 1.258, + "step": 45917 + }, + { + "epoch": 0.05, + "learning_rate": 4.974140581975074e-05, + "loss": 1.3424, + "step": 45918 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741394551242884e-05, + "loss": 1.3031, + "step": 45919 + }, + { + "epoch": 0.05, + "learning_rate": 4.974138328249079e-05, + "loss": 0.3535, + "step": 45920 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741372013494456e-05, + "loss": 1.01, + "step": 45921 + }, + { + "epoch": 0.05, + "learning_rate": 4.974136074425389e-05, + "loss": 1.2133, + "step": 45922 + }, + { + "epoch": 0.05, + "learning_rate": 4.974134947476908e-05, + "loss": 1.0329, + "step": 45923 + }, + { + "epoch": 0.05, + "learning_rate": 4.974133820504005e-05, + "loss": 0.8845, + "step": 45924 + }, + { + "epoch": 0.05, + "learning_rate": 4.974132693506677e-05, + "loss": 0.9524, + "step": 45925 + }, + { + "epoch": 0.05, + "learning_rate": 4.974131566484926e-05, + "loss": 1.1506, + "step": 45926 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741304394387514e-05, + "loss": 1.2181, + "step": 45927 + }, + { + "epoch": 0.05, + "learning_rate": 4.974129312368153e-05, + "loss": 1.0454, + "step": 45928 + }, + { + "epoch": 0.05, + "learning_rate": 4.97412818527313e-05, + "loss": 1.1344, + "step": 45929 + }, + { + "epoch": 0.05, + "learning_rate": 4.974127058153685e-05, + "loss": 0.8235, + "step": 45930 + }, + { + "epoch": 0.05, + "learning_rate": 4.974125931009816e-05, + "loss": 0.6094, + "step": 45931 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741248038415237e-05, + "loss": 0.8797, + "step": 45932 + }, + { + "epoch": 0.05, + "learning_rate": 4.974123676648807e-05, + "loss": 0.938, + "step": 45933 + }, + { + "epoch": 0.05, + "learning_rate": 4.974122549431668e-05, + "loss": 1.3387, + "step": 45934 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741214221901045e-05, + "loss": 1.095, + "step": 45935 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741202949241184e-05, + "loss": 0.8754, + "step": 45936 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741191676337085e-05, + "loss": 1.1047, + "step": 45937 + }, + { + "epoch": 0.05, + "learning_rate": 4.974118040318875e-05, + "loss": 1.0831, + "step": 45938 + }, + { + "epoch": 0.05, + "learning_rate": 4.974116912979617e-05, + "loss": 1.0271, + "step": 45939 + }, + { + "epoch": 0.05, + "learning_rate": 4.974115785615937e-05, + "loss": 0.7376, + "step": 45940 + }, + { + "epoch": 0.05, + "learning_rate": 4.974114658227833e-05, + "loss": 0.9695, + "step": 45941 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741135308153066e-05, + "loss": 1.0142, + "step": 45942 + }, + { + "epoch": 0.05, + "learning_rate": 4.974112403378355e-05, + "loss": 1.0095, + "step": 45943 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741112759169816e-05, + "loss": 0.8569, + "step": 45944 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741101484311836e-05, + "loss": 0.6981, + "step": 45945 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741090209209626e-05, + "loss": 0.8095, + "step": 45946 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741078933863186e-05, + "loss": 0.9455, + "step": 45947 + }, + { + "epoch": 0.05, + "learning_rate": 4.974106765827251e-05, + "loss": 1.0195, + "step": 45948 + }, + { + "epoch": 0.05, + "learning_rate": 4.974105638243761e-05, + "loss": 0.8667, + "step": 45949 + }, + { + "epoch": 0.05, + "learning_rate": 4.9741045106358465e-05, + "loss": 0.8944, + "step": 45950 + }, + { + "epoch": 0.05, + "learning_rate": 4.974103383003509e-05, + "loss": 0.4769, + "step": 45951 + }, + { + "epoch": 0.05, + "learning_rate": 4.974102255346748e-05, + "loss": 0.31, + "step": 45952 + }, + { + "epoch": 0.05, + "learning_rate": 4.974101127665564e-05, + "loss": 0.3219, + "step": 45953 + }, + { + "epoch": 0.05, + "learning_rate": 4.974099999959957e-05, + "loss": 0.4292, + "step": 45954 + }, + { + "epoch": 0.05, + "learning_rate": 4.974098872229926e-05, + "loss": 0.2631, + "step": 45955 + }, + { + "epoch": 0.05, + "learning_rate": 4.974097744475472e-05, + "loss": 0.2322, + "step": 45956 + }, + { + "epoch": 0.05, + "learning_rate": 4.974096616696595e-05, + "loss": 0.3061, + "step": 45957 + }, + { + "epoch": 0.05, + "learning_rate": 4.974095488893295e-05, + "loss": 0.5083, + "step": 45958 + }, + { + "epoch": 0.05, + "learning_rate": 4.974094361065571e-05, + "loss": 1.1491, + "step": 45959 + }, + { + "epoch": 0.05, + "learning_rate": 4.974093233213424e-05, + "loss": 1.4093, + "step": 45960 + }, + { + "epoch": 0.05, + "learning_rate": 4.974092105336854e-05, + "loss": 0.9001, + "step": 45961 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740909774358606e-05, + "loss": 1.1521, + "step": 45962 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740898495104444e-05, + "loss": 1.1171, + "step": 45963 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740887215606044e-05, + "loss": 1.1675, + "step": 45964 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740875935863415e-05, + "loss": 0.813, + "step": 45965 + }, + { + "epoch": 0.05, + "learning_rate": 4.974086465587656e-05, + "loss": 0.9882, + "step": 45966 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740853375645466e-05, + "loss": 1.0423, + "step": 45967 + }, + { + "epoch": 0.05, + "learning_rate": 4.974084209517015e-05, + "loss": 0.783, + "step": 45968 + }, + { + "epoch": 0.05, + "learning_rate": 4.974083081445059e-05, + "loss": 1.1214, + "step": 45969 + }, + { + "epoch": 0.05, + "learning_rate": 4.974081953348681e-05, + "loss": 1.2906, + "step": 45970 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740808252278794e-05, + "loss": 0.9702, + "step": 45971 + }, + { + "epoch": 0.05, + "learning_rate": 4.974079697082655e-05, + "loss": 0.8121, + "step": 45972 + }, + { + "epoch": 0.05, + "learning_rate": 4.974078568913007e-05, + "loss": 0.96, + "step": 45973 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740774407189363e-05, + "loss": 1.1197, + "step": 45974 + }, + { + "epoch": 0.05, + "learning_rate": 4.974076312500443e-05, + "loss": 0.6518, + "step": 45975 + }, + { + "epoch": 0.05, + "learning_rate": 4.974075184257526e-05, + "loss": 0.8561, + "step": 45976 + }, + { + "epoch": 0.05, + "learning_rate": 4.974074055990186e-05, + "loss": 1.384, + "step": 45977 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740729276984235e-05, + "loss": 1.6848, + "step": 45978 + }, + { + "epoch": 0.05, + "learning_rate": 4.974071799382237e-05, + "loss": 1.0627, + "step": 45979 + }, + { + "epoch": 0.05, + "learning_rate": 4.974070671041628e-05, + "loss": 0.6708, + "step": 45980 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740695426765966e-05, + "loss": 0.4451, + "step": 45981 + }, + { + "epoch": 0.05, + "learning_rate": 4.974068414287142e-05, + "loss": 0.4959, + "step": 45982 + }, + { + "epoch": 0.05, + "learning_rate": 4.974067285873264e-05, + "loss": 0.5437, + "step": 45983 + }, + { + "epoch": 0.05, + "learning_rate": 4.974066157434963e-05, + "loss": 1.3283, + "step": 45984 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740650289722395e-05, + "loss": 0.9597, + "step": 45985 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740639004850933e-05, + "loss": 1.1042, + "step": 45986 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740627719735235e-05, + "loss": 0.9494, + "step": 45987 + }, + { + "epoch": 0.05, + "learning_rate": 4.974061643437531e-05, + "loss": 0.976, + "step": 45988 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740605148771156e-05, + "loss": 1.0132, + "step": 45989 + }, + { + "epoch": 0.05, + "learning_rate": 4.974059386292278e-05, + "loss": 0.7278, + "step": 45990 + }, + { + "epoch": 0.05, + "learning_rate": 4.974058257683016e-05, + "loss": 1.103, + "step": 45991 + }, + { + "epoch": 0.05, + "learning_rate": 4.974057129049333e-05, + "loss": 1.5215, + "step": 45992 + }, + { + "epoch": 0.05, + "learning_rate": 4.974056000391226e-05, + "loss": 1.048, + "step": 45993 + }, + { + "epoch": 0.05, + "learning_rate": 4.974054871708697e-05, + "loss": 0.8238, + "step": 45994 + }, + { + "epoch": 0.05, + "learning_rate": 4.974053743001744e-05, + "loss": 1.0089, + "step": 45995 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740526142703686e-05, + "loss": 1.2374, + "step": 45996 + }, + { + "epoch": 0.05, + "learning_rate": 4.97405148551457e-05, + "loss": 1.025, + "step": 45997 + }, + { + "epoch": 0.05, + "learning_rate": 4.97405035673435e-05, + "loss": 1.1101, + "step": 45998 + }, + { + "epoch": 0.05, + "learning_rate": 4.974049227929706e-05, + "loss": 1.1553, + "step": 45999 + }, + { + "epoch": 0.05, + "learning_rate": 4.97404809910064e-05, + "loss": 1.0135, + "step": 46000 + }, + { + "epoch": 0.05, + "eval_loss": 1.088128924369812, + "eval_runtime": 128.4713, + "eval_samples_per_second": 10.781, + "eval_steps_per_second": 5.394, + "step": 46000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740469702471506e-05, + "loss": 1.1221, + "step": 46001 + }, + { + "epoch": 0.05, + "learning_rate": 4.974045841369239e-05, + "loss": 1.6817, + "step": 46002 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740447124669044e-05, + "loss": 1.159, + "step": 46003 + }, + { + "epoch": 0.05, + "learning_rate": 4.974043583540147e-05, + "loss": 1.0178, + "step": 46004 + }, + { + "epoch": 0.05, + "learning_rate": 4.974042454588966e-05, + "loss": 0.9866, + "step": 46005 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740413256133633e-05, + "loss": 0.9903, + "step": 46006 + }, + { + "epoch": 0.05, + "learning_rate": 4.974040196613338e-05, + "loss": 1.1353, + "step": 46007 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740390675888905e-05, + "loss": 1.9398, + "step": 46008 + }, + { + "epoch": 0.05, + "learning_rate": 4.974037938540019e-05, + "loss": 1.5592, + "step": 46009 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740368094667256e-05, + "loss": 0.697, + "step": 46010 + }, + { + "epoch": 0.05, + "learning_rate": 4.974035680369009e-05, + "loss": 0.4358, + "step": 46011 + }, + { + "epoch": 0.05, + "learning_rate": 4.974034551246871e-05, + "loss": 0.8685, + "step": 46012 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740334221003095e-05, + "loss": 0.7438, + "step": 46013 + }, + { + "epoch": 0.05, + "learning_rate": 4.974032292929325e-05, + "loss": 0.4028, + "step": 46014 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740311637339186e-05, + "loss": 0.3808, + "step": 46015 + }, + { + "epoch": 0.05, + "learning_rate": 4.97403003451409e-05, + "loss": 0.5626, + "step": 46016 + }, + { + "epoch": 0.05, + "learning_rate": 4.974028905269838e-05, + "loss": 0.406, + "step": 46017 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740277760011634e-05, + "loss": 0.3933, + "step": 46018 + }, + { + "epoch": 0.05, + "learning_rate": 4.974026646708067e-05, + "loss": 0.3818, + "step": 46019 + }, + { + "epoch": 0.05, + "learning_rate": 4.974025517390547e-05, + "loss": 0.3951, + "step": 46020 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740243880486045e-05, + "loss": 0.8342, + "step": 46021 + }, + { + "epoch": 0.05, + "learning_rate": 4.97402325868224e-05, + "loss": 0.8004, + "step": 46022 + }, + { + "epoch": 0.05, + "learning_rate": 4.974022129291453e-05, + "loss": 0.8296, + "step": 46023 + }, + { + "epoch": 0.05, + "learning_rate": 4.974020999876243e-05, + "loss": 0.9706, + "step": 46024 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740198704366114e-05, + "loss": 1.1784, + "step": 46025 + }, + { + "epoch": 0.05, + "learning_rate": 4.974018740972557e-05, + "loss": 0.7079, + "step": 46026 + }, + { + "epoch": 0.05, + "learning_rate": 4.97401761148408e-05, + "loss": 0.7575, + "step": 46027 + }, + { + "epoch": 0.05, + "learning_rate": 4.97401648197118e-05, + "loss": 0.6604, + "step": 46028 + }, + { + "epoch": 0.05, + "learning_rate": 4.974015352433858e-05, + "loss": 0.6784, + "step": 46029 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740142228721146e-05, + "loss": 1.0898, + "step": 46030 + }, + { + "epoch": 0.05, + "learning_rate": 4.974013093285947e-05, + "loss": 0.9284, + "step": 46031 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740119636753576e-05, + "loss": 0.9147, + "step": 46032 + }, + { + "epoch": 0.05, + "learning_rate": 4.974010834040347e-05, + "loss": 0.7191, + "step": 46033 + }, + { + "epoch": 0.05, + "learning_rate": 4.974009704380913e-05, + "loss": 0.8236, + "step": 46034 + }, + { + "epoch": 0.05, + "learning_rate": 4.974008574697056e-05, + "loss": 1.0677, + "step": 46035 + }, + { + "epoch": 0.05, + "learning_rate": 4.974007444988778e-05, + "loss": 1.1961, + "step": 46036 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740063152560765e-05, + "loss": 1.2584, + "step": 46037 + }, + { + "epoch": 0.05, + "learning_rate": 4.974005185498953e-05, + "loss": 1.3924, + "step": 46038 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740040557174076e-05, + "loss": 0.5072, + "step": 46039 + }, + { + "epoch": 0.05, + "learning_rate": 4.974002925911439e-05, + "loss": 0.548, + "step": 46040 + }, + { + "epoch": 0.05, + "learning_rate": 4.974001796081048e-05, + "loss": 0.8184, + "step": 46041 + }, + { + "epoch": 0.05, + "learning_rate": 4.974000666226236e-05, + "loss": 0.9166, + "step": 46042 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739995363470004e-05, + "loss": 0.6774, + "step": 46043 + }, + { + "epoch": 0.05, + "learning_rate": 4.973998406443343e-05, + "loss": 0.8406, + "step": 46044 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739972765152635e-05, + "loss": 1.1376, + "step": 46045 + }, + { + "epoch": 0.05, + "learning_rate": 4.973996146562761e-05, + "loss": 1.1313, + "step": 46046 + }, + { + "epoch": 0.05, + "learning_rate": 4.973995016585838e-05, + "loss": 0.8024, + "step": 46047 + }, + { + "epoch": 0.05, + "learning_rate": 4.973993886584492e-05, + "loss": 0.7777, + "step": 46048 + }, + { + "epoch": 0.05, + "learning_rate": 4.973992756558723e-05, + "loss": 0.9044, + "step": 46049 + }, + { + "epoch": 0.05, + "learning_rate": 4.973991626508532e-05, + "loss": 1.1868, + "step": 46050 + }, + { + "epoch": 0.05, + "learning_rate": 4.973990496433919e-05, + "loss": 1.2981, + "step": 46051 + }, + { + "epoch": 0.05, + "learning_rate": 4.973989366334884e-05, + "loss": 1.3062, + "step": 46052 + }, + { + "epoch": 0.05, + "learning_rate": 4.973988236211427e-05, + "loss": 2.3369, + "step": 46053 + }, + { + "epoch": 0.05, + "learning_rate": 4.973987106063547e-05, + "loss": 1.7122, + "step": 46054 + }, + { + "epoch": 0.05, + "learning_rate": 4.973985975891245e-05, + "loss": 1.0447, + "step": 46055 + }, + { + "epoch": 0.05, + "learning_rate": 4.973984845694522e-05, + "loss": 1.0017, + "step": 46056 + }, + { + "epoch": 0.05, + "learning_rate": 4.973983715473376e-05, + "loss": 1.1155, + "step": 46057 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739825852278076e-05, + "loss": 0.8497, + "step": 46058 + }, + { + "epoch": 0.05, + "learning_rate": 4.973981454957818e-05, + "loss": 1.1196, + "step": 46059 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739803246634055e-05, + "loss": 0.9252, + "step": 46060 + }, + { + "epoch": 0.05, + "learning_rate": 4.973979194344571e-05, + "loss": 0.9025, + "step": 46061 + }, + { + "epoch": 0.05, + "learning_rate": 4.973978064001315e-05, + "loss": 0.8338, + "step": 46062 + }, + { + "epoch": 0.05, + "learning_rate": 4.973976933633636e-05, + "loss": 0.8667, + "step": 46063 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739758032415354e-05, + "loss": 1.0324, + "step": 46064 + }, + { + "epoch": 0.05, + "learning_rate": 4.973974672825012e-05, + "loss": 1.0069, + "step": 46065 + }, + { + "epoch": 0.05, + "learning_rate": 4.973973542384068e-05, + "loss": 0.8318, + "step": 46066 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739724119187015e-05, + "loss": 1.0496, + "step": 46067 + }, + { + "epoch": 0.05, + "learning_rate": 4.973971281428913e-05, + "loss": 1.0526, + "step": 46068 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739701509147016e-05, + "loss": 1.122, + "step": 46069 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739690203760694e-05, + "loss": 0.5792, + "step": 46070 + }, + { + "epoch": 0.05, + "learning_rate": 4.973967889813015e-05, + "loss": 1.052, + "step": 46071 + }, + { + "epoch": 0.05, + "learning_rate": 4.973966759225538e-05, + "loss": 1.3143, + "step": 46072 + }, + { + "epoch": 0.05, + "learning_rate": 4.973965628613639e-05, + "loss": 1.3642, + "step": 46073 + }, + { + "epoch": 0.05, + "learning_rate": 4.973964497977319e-05, + "loss": 0.845, + "step": 46074 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739633673165765e-05, + "loss": 0.9264, + "step": 46075 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739622366314124e-05, + "loss": 1.0567, + "step": 46076 + }, + { + "epoch": 0.05, + "learning_rate": 4.973961105921825e-05, + "loss": 1.0427, + "step": 46077 + }, + { + "epoch": 0.05, + "learning_rate": 4.973959975187817e-05, + "loss": 1.0494, + "step": 46078 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739588444293874e-05, + "loss": 0.9011, + "step": 46079 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739577136465354e-05, + "loss": 0.9637, + "step": 46080 + }, + { + "epoch": 0.05, + "learning_rate": 4.973956582839261e-05, + "loss": 1.1452, + "step": 46081 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739554520075656e-05, + "loss": 0.7763, + "step": 46082 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739543211514486e-05, + "loss": 0.5964, + "step": 46083 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739531902709085e-05, + "loss": 1.0182, + "step": 46084 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739520593659475e-05, + "loss": 1.0009, + "step": 46085 + }, + { + "epoch": 0.05, + "learning_rate": 4.973950928436564e-05, + "loss": 1.3538, + "step": 46086 + }, + { + "epoch": 0.05, + "learning_rate": 4.97394979748276e-05, + "loss": 0.8107, + "step": 46087 + }, + { + "epoch": 0.05, + "learning_rate": 4.973948666504533e-05, + "loss": 0.9013, + "step": 46088 + }, + { + "epoch": 0.05, + "learning_rate": 4.973947535501884e-05, + "loss": 1.0986, + "step": 46089 + }, + { + "epoch": 0.05, + "learning_rate": 4.973946404474814e-05, + "loss": 0.8554, + "step": 46090 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739452734233227e-05, + "loss": 1.1617, + "step": 46091 + }, + { + "epoch": 0.05, + "learning_rate": 4.973944142347409e-05, + "loss": 1.2396, + "step": 46092 + }, + { + "epoch": 0.05, + "learning_rate": 4.973943011247073e-05, + "loss": 1.0736, + "step": 46093 + }, + { + "epoch": 0.05, + "learning_rate": 4.973941880122316e-05, + "loss": 0.7158, + "step": 46094 + }, + { + "epoch": 0.05, + "learning_rate": 4.973940748973137e-05, + "loss": 0.997, + "step": 46095 + }, + { + "epoch": 0.05, + "learning_rate": 4.973939617799537e-05, + "loss": 1.0141, + "step": 46096 + }, + { + "epoch": 0.05, + "learning_rate": 4.973938486601515e-05, + "loss": 0.8931, + "step": 46097 + }, + { + "epoch": 0.05, + "learning_rate": 4.97393735537907e-05, + "loss": 0.8572, + "step": 46098 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739362241322055e-05, + "loss": 0.8827, + "step": 46099 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739350928609176e-05, + "loss": 0.9366, + "step": 46100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739339615652094e-05, + "loss": 0.8838, + "step": 46101 + }, + { + "epoch": 0.05, + "learning_rate": 4.973932830245078e-05, + "loss": 0.9295, + "step": 46102 + }, + { + "epoch": 0.05, + "learning_rate": 4.973931698900526e-05, + "loss": 0.9193, + "step": 46103 + }, + { + "epoch": 0.05, + "learning_rate": 4.973930567531553e-05, + "loss": 0.8719, + "step": 46104 + }, + { + "epoch": 0.05, + "learning_rate": 4.973929436138157e-05, + "loss": 0.8795, + "step": 46105 + }, + { + "epoch": 0.05, + "learning_rate": 4.97392830472034e-05, + "loss": 0.9739, + "step": 46106 + }, + { + "epoch": 0.05, + "learning_rate": 4.973927173278102e-05, + "loss": 0.7565, + "step": 46107 + }, + { + "epoch": 0.05, + "learning_rate": 4.973926041811442e-05, + "loss": 0.937, + "step": 46108 + }, + { + "epoch": 0.05, + "learning_rate": 4.97392491032036e-05, + "loss": 0.836, + "step": 46109 + }, + { + "epoch": 0.05, + "learning_rate": 4.973923778804856e-05, + "loss": 0.8792, + "step": 46110 + }, + { + "epoch": 0.05, + "learning_rate": 4.973922647264932e-05, + "loss": 0.9263, + "step": 46111 + }, + { + "epoch": 0.05, + "learning_rate": 4.973921515700586e-05, + "loss": 0.858, + "step": 46112 + }, + { + "epoch": 0.05, + "learning_rate": 4.973920384111818e-05, + "loss": 0.9985, + "step": 46113 + }, + { + "epoch": 0.05, + "learning_rate": 4.973919252498629e-05, + "loss": 0.9714, + "step": 46114 + }, + { + "epoch": 0.05, + "learning_rate": 4.973918120861018e-05, + "loss": 0.9285, + "step": 46115 + }, + { + "epoch": 0.05, + "learning_rate": 4.973916989198986e-05, + "loss": 1.0976, + "step": 46116 + }, + { + "epoch": 0.05, + "learning_rate": 4.973915857512532e-05, + "loss": 1.0602, + "step": 46117 + }, + { + "epoch": 0.05, + "learning_rate": 4.973914725801657e-05, + "loss": 1.0151, + "step": 46118 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739135940663605e-05, + "loss": 1.1747, + "step": 46119 + }, + { + "epoch": 0.05, + "learning_rate": 4.973912462306642e-05, + "loss": 0.7409, + "step": 46120 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739113305225024e-05, + "loss": 0.7317, + "step": 46121 + }, + { + "epoch": 0.05, + "learning_rate": 4.973910198713942e-05, + "loss": 0.9588, + "step": 46122 + }, + { + "epoch": 0.05, + "learning_rate": 4.9739090668809604e-05, + "loss": 0.8149, + "step": 46123 + }, + { + "epoch": 0.05, + "learning_rate": 4.973907935023556e-05, + "loss": 1.1251, + "step": 46124 + }, + { + "epoch": 0.05, + "learning_rate": 4.973906803141731e-05, + "loss": 1.0758, + "step": 46125 + }, + { + "epoch": 0.05, + "learning_rate": 4.973905671235485e-05, + "loss": 1.2384, + "step": 46126 + }, + { + "epoch": 0.05, + "learning_rate": 4.973904539304817e-05, + "loss": 1.0873, + "step": 46127 + }, + { + "epoch": 0.05, + "learning_rate": 4.973903407349728e-05, + "loss": 1.67, + "step": 46128 + }, + { + "epoch": 0.05, + "learning_rate": 4.973902275370218e-05, + "loss": 1.0478, + "step": 46129 + }, + { + "epoch": 0.05, + "learning_rate": 4.973901143366286e-05, + "loss": 0.9046, + "step": 46130 + }, + { + "epoch": 0.05, + "learning_rate": 4.973900011337933e-05, + "loss": 0.7203, + "step": 46131 + }, + { + "epoch": 0.05, + "learning_rate": 4.973898879285159e-05, + "loss": 0.1906, + "step": 46132 + }, + { + "epoch": 0.05, + "learning_rate": 4.973897747207963e-05, + "loss": 0.1606, + "step": 46133 + }, + { + "epoch": 0.05, + "learning_rate": 4.973896615106346e-05, + "loss": 0.2426, + "step": 46134 + }, + { + "epoch": 0.05, + "learning_rate": 4.973895482980308e-05, + "loss": 0.1797, + "step": 46135 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738943508298485e-05, + "loss": 0.1832, + "step": 46136 + }, + { + "epoch": 0.05, + "learning_rate": 4.973893218654968e-05, + "loss": 0.2536, + "step": 46137 + }, + { + "epoch": 0.05, + "learning_rate": 4.973892086455666e-05, + "loss": 0.2213, + "step": 46138 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738909542319435e-05, + "loss": 0.0821, + "step": 46139 + }, + { + "epoch": 0.05, + "learning_rate": 4.973889821983799e-05, + "loss": 0.1015, + "step": 46140 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738886897112335e-05, + "loss": 0.0643, + "step": 46141 + }, + { + "epoch": 0.05, + "learning_rate": 4.973887557414246e-05, + "loss": 0.0834, + "step": 46142 + }, + { + "epoch": 0.05, + "learning_rate": 4.973886425092839e-05, + "loss": 0.0991, + "step": 46143 + }, + { + "epoch": 0.05, + "learning_rate": 4.973885292747009e-05, + "loss": 0.0719, + "step": 46144 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738841603767596e-05, + "loss": 0.3461, + "step": 46145 + }, + { + "epoch": 0.05, + "learning_rate": 4.973883027982088e-05, + "loss": 0.0725, + "step": 46146 + }, + { + "epoch": 0.05, + "learning_rate": 4.973881895562996e-05, + "loss": 0.0637, + "step": 46147 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738807631194815e-05, + "loss": 0.7106, + "step": 46148 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738796306515475e-05, + "loss": 1.0168, + "step": 46149 + }, + { + "epoch": 0.05, + "learning_rate": 4.973878498159191e-05, + "loss": 0.7833, + "step": 46150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738773656424146e-05, + "loss": 1.6028, + "step": 46151 + }, + { + "epoch": 0.05, + "learning_rate": 4.973876233101217e-05, + "loss": 0.8583, + "step": 46152 + }, + { + "epoch": 0.05, + "learning_rate": 4.973875100535598e-05, + "loss": 1.1071, + "step": 46153 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738739679455575e-05, + "loss": 1.1527, + "step": 46154 + }, + { + "epoch": 0.05, + "learning_rate": 4.973872835331096e-05, + "loss": 0.9847, + "step": 46155 + }, + { + "epoch": 0.05, + "learning_rate": 4.973871702692214e-05, + "loss": 1.0121, + "step": 46156 + }, + { + "epoch": 0.05, + "learning_rate": 4.973870570028911e-05, + "loss": 0.9653, + "step": 46157 + }, + { + "epoch": 0.05, + "learning_rate": 4.973869437341187e-05, + "loss": 1.184, + "step": 46158 + }, + { + "epoch": 0.05, + "learning_rate": 4.973868304629042e-05, + "loss": 0.874, + "step": 46159 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738671718924756e-05, + "loss": 0.9355, + "step": 46160 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738660391314886e-05, + "loss": 0.9987, + "step": 46161 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738649063460805e-05, + "loss": 0.9877, + "step": 46162 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738637735362515e-05, + "loss": 1.0705, + "step": 46163 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738626407020015e-05, + "loss": 1.2803, + "step": 46164 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738615078433306e-05, + "loss": 1.2143, + "step": 46165 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738603749602386e-05, + "loss": 1.017, + "step": 46166 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738592420527264e-05, + "loss": 1.0055, + "step": 46167 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738581091207925e-05, + "loss": 1.5356, + "step": 46168 + }, + { + "epoch": 0.05, + "learning_rate": 4.973856976164438e-05, + "loss": 0.9599, + "step": 46169 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738558431836624e-05, + "loss": 1.2514, + "step": 46170 + }, + { + "epoch": 0.05, + "learning_rate": 4.973854710178466e-05, + "loss": 1.3564, + "step": 46171 + }, + { + "epoch": 0.05, + "learning_rate": 4.973853577148849e-05, + "loss": 1.0899, + "step": 46172 + }, + { + "epoch": 0.05, + "learning_rate": 4.973852444094811e-05, + "loss": 0.9837, + "step": 46173 + }, + { + "epoch": 0.05, + "learning_rate": 4.973851311016353e-05, + "loss": 2.2271, + "step": 46174 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738501779134726e-05, + "loss": 2.2199, + "step": 46175 + }, + { + "epoch": 0.05, + "learning_rate": 4.973849044786173e-05, + "loss": 1.0326, + "step": 46176 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738479116344516e-05, + "loss": 0.6776, + "step": 46177 + }, + { + "epoch": 0.05, + "learning_rate": 4.973846778458309e-05, + "loss": 1.3234, + "step": 46178 + }, + { + "epoch": 0.05, + "learning_rate": 4.973845645257747e-05, + "loss": 0.8785, + "step": 46179 + }, + { + "epoch": 0.05, + "learning_rate": 4.973844512032763e-05, + "loss": 1.1466, + "step": 46180 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738433787833586e-05, + "loss": 1.0675, + "step": 46181 + }, + { + "epoch": 0.05, + "learning_rate": 4.973842245509534e-05, + "loss": 0.8971, + "step": 46182 + }, + { + "epoch": 0.05, + "learning_rate": 4.973841112211288e-05, + "loss": 0.9322, + "step": 46183 + }, + { + "epoch": 0.05, + "learning_rate": 4.973839978888621e-05, + "loss": 0.9082, + "step": 46184 + }, + { + "epoch": 0.05, + "learning_rate": 4.973838845541535e-05, + "loss": 1.0472, + "step": 46185 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738377121700266e-05, + "loss": 1.0173, + "step": 46186 + }, + { + "epoch": 0.05, + "learning_rate": 4.973836578774098e-05, + "loss": 0.8577, + "step": 46187 + }, + { + "epoch": 0.05, + "learning_rate": 4.973835445353749e-05, + "loss": 1.0348, + "step": 46188 + }, + { + "epoch": 0.05, + "learning_rate": 4.973834311908979e-05, + "loss": 0.9792, + "step": 46189 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738331784397886e-05, + "loss": 1.1632, + "step": 46190 + }, + { + "epoch": 0.05, + "learning_rate": 4.973832044946177e-05, + "loss": 1.4496, + "step": 46191 + }, + { + "epoch": 0.05, + "learning_rate": 4.973830911428146e-05, + "loss": 2.0339, + "step": 46192 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738297778856935e-05, + "loss": 1.5185, + "step": 46193 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738286443188203e-05, + "loss": 0.8782, + "step": 46194 + }, + { + "epoch": 0.05, + "learning_rate": 4.973827510727527e-05, + "loss": 1.2152, + "step": 46195 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738263771118124e-05, + "loss": 1.2991, + "step": 46196 + }, + { + "epoch": 0.05, + "learning_rate": 4.973825243471678e-05, + "loss": 0.5049, + "step": 46197 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738241098071226e-05, + "loss": 0.904, + "step": 46198 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738229761181466e-05, + "loss": 1.1702, + "step": 46199 + }, + { + "epoch": 0.05, + "learning_rate": 4.97382184240475e-05, + "loss": 1.0171, + "step": 46200 + }, + { + "epoch": 0.05, + "learning_rate": 4.973820708666933e-05, + "loss": 1.8488, + "step": 46201 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738195749046953e-05, + "loss": 1.3397, + "step": 46202 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738184411180374e-05, + "loss": 0.9884, + "step": 46203 + }, + { + "epoch": 0.05, + "learning_rate": 4.973817307306959e-05, + "loss": 1.0864, + "step": 46204 + }, + { + "epoch": 0.05, + "learning_rate": 4.973816173471461e-05, + "loss": 0.9165, + "step": 46205 + }, + { + "epoch": 0.05, + "learning_rate": 4.973815039611541e-05, + "loss": 1.1529, + "step": 46206 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738139057272015e-05, + "loss": 0.4871, + "step": 46207 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738127718184414e-05, + "loss": 0.7614, + "step": 46208 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738116378852604e-05, + "loss": 1.0647, + "step": 46209 + }, + { + "epoch": 0.05, + "learning_rate": 4.97381050392766e-05, + "loss": 1.1256, + "step": 46210 + }, + { + "epoch": 0.05, + "learning_rate": 4.973809369945638e-05, + "loss": 1.0561, + "step": 46211 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738082359391954e-05, + "loss": 0.5093, + "step": 46212 + }, + { + "epoch": 0.05, + "learning_rate": 4.973807101908333e-05, + "loss": 0.552, + "step": 46213 + }, + { + "epoch": 0.05, + "learning_rate": 4.97380596785305e-05, + "loss": 0.9909, + "step": 46214 + }, + { + "epoch": 0.05, + "learning_rate": 4.973804833773348e-05, + "loss": 1.1633, + "step": 46215 + }, + { + "epoch": 0.05, + "learning_rate": 4.973803699669224e-05, + "loss": 0.9237, + "step": 46216 + }, + { + "epoch": 0.05, + "learning_rate": 4.97380256554068e-05, + "loss": 0.9836, + "step": 46217 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738014313877155e-05, + "loss": 0.6749, + "step": 46218 + }, + { + "epoch": 0.05, + "learning_rate": 4.9738002972103314e-05, + "loss": 1.1439, + "step": 46219 + }, + { + "epoch": 0.05, + "learning_rate": 4.973799163008527e-05, + "loss": 0.9177, + "step": 46220 + }, + { + "epoch": 0.05, + "learning_rate": 4.973798028782302e-05, + "loss": 0.6738, + "step": 46221 + }, + { + "epoch": 0.05, + "learning_rate": 4.973796894531656e-05, + "loss": 1.135, + "step": 46222 + }, + { + "epoch": 0.05, + "learning_rate": 4.97379576025659e-05, + "loss": 1.1731, + "step": 46223 + }, + { + "epoch": 0.05, + "learning_rate": 4.973794625957105e-05, + "loss": 1.0592, + "step": 46224 + }, + { + "epoch": 0.05, + "learning_rate": 4.973793491633198e-05, + "loss": 0.6199, + "step": 46225 + }, + { + "epoch": 0.05, + "learning_rate": 4.973792357284872e-05, + "loss": 0.4928, + "step": 46226 + }, + { + "epoch": 0.05, + "learning_rate": 4.973791222912125e-05, + "loss": 0.95, + "step": 46227 + }, + { + "epoch": 0.05, + "learning_rate": 4.973790088514958e-05, + "loss": 0.7266, + "step": 46228 + }, + { + "epoch": 0.05, + "learning_rate": 4.973788954093371e-05, + "loss": 1.0482, + "step": 46229 + }, + { + "epoch": 0.05, + "learning_rate": 4.973787819647364e-05, + "loss": 0.8721, + "step": 46230 + }, + { + "epoch": 0.05, + "learning_rate": 4.973786685176936e-05, + "loss": 0.846, + "step": 46231 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737855506820884e-05, + "loss": 0.9536, + "step": 46232 + }, + { + "epoch": 0.05, + "learning_rate": 4.973784416162821e-05, + "loss": 0.9815, + "step": 46233 + }, + { + "epoch": 0.05, + "learning_rate": 4.973783281619132e-05, + "loss": 0.9185, + "step": 46234 + }, + { + "epoch": 0.05, + "learning_rate": 4.973782147051025e-05, + "loss": 0.8916, + "step": 46235 + }, + { + "epoch": 0.05, + "learning_rate": 4.973781012458496e-05, + "loss": 0.9762, + "step": 46236 + }, + { + "epoch": 0.05, + "learning_rate": 4.973779877841548e-05, + "loss": 1.2532, + "step": 46237 + }, + { + "epoch": 0.05, + "learning_rate": 4.973778743200179e-05, + "loss": 1.0191, + "step": 46238 + }, + { + "epoch": 0.05, + "learning_rate": 4.973777608534391e-05, + "loss": 1.1298, + "step": 46239 + }, + { + "epoch": 0.05, + "learning_rate": 4.973776473844182e-05, + "loss": 0.983, + "step": 46240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737753391295525e-05, + "loss": 1.0815, + "step": 46241 + }, + { + "epoch": 0.05, + "learning_rate": 4.973774204390504e-05, + "loss": 0.9752, + "step": 46242 + }, + { + "epoch": 0.05, + "learning_rate": 4.973773069627035e-05, + "loss": 0.9854, + "step": 46243 + }, + { + "epoch": 0.05, + "learning_rate": 4.973771934839146e-05, + "loss": 1.0567, + "step": 46244 + }, + { + "epoch": 0.05, + "learning_rate": 4.973770800026837e-05, + "loss": 0.7954, + "step": 46245 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737696651901085e-05, + "loss": 0.7213, + "step": 46246 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737685303289595e-05, + "loss": 0.8084, + "step": 46247 + }, + { + "epoch": 0.05, + "learning_rate": 4.97376739544339e-05, + "loss": 1.0332, + "step": 46248 + }, + { + "epoch": 0.05, + "learning_rate": 4.973766260533401e-05, + "loss": 0.9661, + "step": 46249 + }, + { + "epoch": 0.05, + "learning_rate": 4.973765125598992e-05, + "loss": 0.8562, + "step": 46250 + }, + { + "epoch": 0.05, + "learning_rate": 4.973763990640163e-05, + "loss": 1.0692, + "step": 46251 + }, + { + "epoch": 0.05, + "learning_rate": 4.973762855656914e-05, + "loss": 0.7944, + "step": 46252 + }, + { + "epoch": 0.05, + "learning_rate": 4.973761720649245e-05, + "loss": 0.5033, + "step": 46253 + }, + { + "epoch": 0.05, + "learning_rate": 4.973760585617156e-05, + "loss": 0.3683, + "step": 46254 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737594505606475e-05, + "loss": 0.4505, + "step": 46255 + }, + { + "epoch": 0.05, + "learning_rate": 4.973758315479719e-05, + "loss": 0.3684, + "step": 46256 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737571803743706e-05, + "loss": 0.2726, + "step": 46257 + }, + { + "epoch": 0.05, + "learning_rate": 4.973756045244602e-05, + "loss": 0.4562, + "step": 46258 + }, + { + "epoch": 0.05, + "learning_rate": 4.973754910090414e-05, + "loss": 0.5241, + "step": 46259 + }, + { + "epoch": 0.05, + "learning_rate": 4.973753774911806e-05, + "loss": 1.0508, + "step": 46260 + }, + { + "epoch": 0.05, + "learning_rate": 4.973752639708777e-05, + "loss": 0.7512, + "step": 46261 + }, + { + "epoch": 0.05, + "learning_rate": 4.97375150448133e-05, + "loss": 0.5845, + "step": 46262 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737503692294615e-05, + "loss": 0.4788, + "step": 46263 + }, + { + "epoch": 0.05, + "learning_rate": 4.973749233953174e-05, + "loss": 1.0201, + "step": 46264 + }, + { + "epoch": 0.05, + "learning_rate": 4.973748098652467e-05, + "loss": 1.0509, + "step": 46265 + }, + { + "epoch": 0.05, + "learning_rate": 4.97374696332734e-05, + "loss": 0.8961, + "step": 46266 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737458279777925e-05, + "loss": 0.3393, + "step": 46267 + }, + { + "epoch": 0.05, + "learning_rate": 4.973744692603826e-05, + "loss": 1.062, + "step": 46268 + }, + { + "epoch": 0.05, + "learning_rate": 4.97374355720544e-05, + "loss": 0.8744, + "step": 46269 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737424217826335e-05, + "loss": 1.1657, + "step": 46270 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737412863354075e-05, + "loss": 0.97, + "step": 46271 + }, + { + "epoch": 0.05, + "learning_rate": 4.973740150863762e-05, + "loss": 1.0532, + "step": 46272 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737390153676966e-05, + "loss": 1.345, + "step": 46273 + }, + { + "epoch": 0.05, + "learning_rate": 4.973737879847211e-05, + "loss": 0.7529, + "step": 46274 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737367443023065e-05, + "loss": 0.9781, + "step": 46275 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737356087329824e-05, + "loss": 1.1741, + "step": 46276 + }, + { + "epoch": 0.05, + "learning_rate": 4.973734473139238e-05, + "loss": 1.1168, + "step": 46277 + }, + { + "epoch": 0.05, + "learning_rate": 4.973733337521074e-05, + "loss": 1.1063, + "step": 46278 + }, + { + "epoch": 0.05, + "learning_rate": 4.97373220187849e-05, + "loss": 0.7657, + "step": 46279 + }, + { + "epoch": 0.05, + "learning_rate": 4.973731066211487e-05, + "loss": 0.9569, + "step": 46280 + }, + { + "epoch": 0.05, + "learning_rate": 4.973729930520065e-05, + "loss": 1.0302, + "step": 46281 + }, + { + "epoch": 0.05, + "learning_rate": 4.973728794804222e-05, + "loss": 1.3288, + "step": 46282 + }, + { + "epoch": 0.05, + "learning_rate": 4.97372765906396e-05, + "loss": 1.1471, + "step": 46283 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737265232992783e-05, + "loss": 0.7637, + "step": 46284 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737253875101776e-05, + "loss": 1.2097, + "step": 46285 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737242516966566e-05, + "loss": 1.0065, + "step": 46286 + }, + { + "epoch": 0.05, + "learning_rate": 4.973723115858716e-05, + "loss": 1.0142, + "step": 46287 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737219799963564e-05, + "loss": 1.1505, + "step": 46288 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737208441095766e-05, + "loss": 0.881, + "step": 46289 + }, + { + "epoch": 0.05, + "learning_rate": 4.973719708198378e-05, + "loss": 1.092, + "step": 46290 + }, + { + "epoch": 0.05, + "learning_rate": 4.973718572262759e-05, + "loss": 0.9132, + "step": 46291 + }, + { + "epoch": 0.05, + "learning_rate": 4.973717436302722e-05, + "loss": 0.9412, + "step": 46292 + }, + { + "epoch": 0.05, + "learning_rate": 4.973716300318264e-05, + "loss": 1.2363, + "step": 46293 + }, + { + "epoch": 0.05, + "learning_rate": 4.973715164309387e-05, + "loss": 1.0782, + "step": 46294 + }, + { + "epoch": 0.05, + "learning_rate": 4.973714028276091e-05, + "loss": 0.7016, + "step": 46295 + }, + { + "epoch": 0.05, + "learning_rate": 4.973712892218375e-05, + "loss": 0.983, + "step": 46296 + }, + { + "epoch": 0.05, + "learning_rate": 4.973711756136239e-05, + "loss": 0.8563, + "step": 46297 + }, + { + "epoch": 0.05, + "learning_rate": 4.973710620029684e-05, + "loss": 0.9371, + "step": 46298 + }, + { + "epoch": 0.05, + "learning_rate": 4.97370948389871e-05, + "loss": 1.0689, + "step": 46299 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737083477433165e-05, + "loss": 1.0107, + "step": 46300 + }, + { + "epoch": 0.05, + "learning_rate": 4.973707211563504e-05, + "loss": 1.0353, + "step": 46301 + }, + { + "epoch": 0.05, + "learning_rate": 4.973706075359271e-05, + "loss": 1.3347, + "step": 46302 + }, + { + "epoch": 0.05, + "learning_rate": 4.97370493913062e-05, + "loss": 1.3477, + "step": 46303 + }, + { + "epoch": 0.05, + "learning_rate": 4.9737038028775484e-05, + "loss": 1.3379, + "step": 46304 + }, + { + "epoch": 0.05, + "learning_rate": 4.973702666600057e-05, + "loss": 1.6801, + "step": 46305 + }, + { + "epoch": 0.05, + "learning_rate": 4.973701530298148e-05, + "loss": 1.257, + "step": 46306 + }, + { + "epoch": 0.05, + "learning_rate": 4.973700393971818e-05, + "loss": 1.0325, + "step": 46307 + }, + { + "epoch": 0.05, + "learning_rate": 4.97369925762107e-05, + "loss": 1.1076, + "step": 46308 + }, + { + "epoch": 0.05, + "learning_rate": 4.973698121245902e-05, + "loss": 1.0162, + "step": 46309 + }, + { + "epoch": 0.05, + "learning_rate": 4.973696984846315e-05, + "loss": 1.2093, + "step": 46310 + }, + { + "epoch": 0.05, + "learning_rate": 4.973695848422308e-05, + "loss": 0.8726, + "step": 46311 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736947119738824e-05, + "loss": 0.7606, + "step": 46312 + }, + { + "epoch": 0.05, + "learning_rate": 4.973693575501037e-05, + "loss": 1.1793, + "step": 46313 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736924390037734e-05, + "loss": 1.8193, + "step": 46314 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736913024820895e-05, + "loss": 1.9494, + "step": 46315 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736901659359866e-05, + "loss": 1.6954, + "step": 46316 + }, + { + "epoch": 0.05, + "learning_rate": 4.973689029365465e-05, + "loss": 1.9333, + "step": 46317 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736878927705234e-05, + "loss": 1.006, + "step": 46318 + }, + { + "epoch": 0.05, + "learning_rate": 4.973686756151164e-05, + "loss": 0.447, + "step": 46319 + }, + { + "epoch": 0.05, + "learning_rate": 4.973685619507383e-05, + "loss": 1.1141, + "step": 46320 + }, + { + "epoch": 0.05, + "learning_rate": 4.973684482839185e-05, + "loss": 1.2987, + "step": 46321 + }, + { + "epoch": 0.05, + "learning_rate": 4.973683346146567e-05, + "loss": 1.0013, + "step": 46322 + }, + { + "epoch": 0.05, + "learning_rate": 4.97368220942953e-05, + "loss": 1.117, + "step": 46323 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736810726880736e-05, + "loss": 1.4363, + "step": 46324 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736799359221975e-05, + "loss": 0.9702, + "step": 46325 + }, + { + "epoch": 0.05, + "learning_rate": 4.973678799131903e-05, + "loss": 1.25, + "step": 46326 + }, + { + "epoch": 0.05, + "learning_rate": 4.97367766231719e-05, + "loss": 0.7162, + "step": 46327 + }, + { + "epoch": 0.05, + "learning_rate": 4.973676525478057e-05, + "loss": 1.3407, + "step": 46328 + }, + { + "epoch": 0.05, + "learning_rate": 4.973675388614505e-05, + "loss": 0.6739, + "step": 46329 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736742517265344e-05, + "loss": 0.747, + "step": 46330 + }, + { + "epoch": 0.05, + "learning_rate": 4.973673114814144e-05, + "loss": 0.8427, + "step": 46331 + }, + { + "epoch": 0.05, + "learning_rate": 4.973671977877335e-05, + "loss": 0.795, + "step": 46332 + }, + { + "epoch": 0.05, + "learning_rate": 4.973670840916107e-05, + "loss": 0.8365, + "step": 46333 + }, + { + "epoch": 0.05, + "learning_rate": 4.97366970393046e-05, + "loss": 1.9248, + "step": 46334 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736685669203937e-05, + "loss": 0.734, + "step": 46335 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736674298859085e-05, + "loss": 1.0033, + "step": 46336 + }, + { + "epoch": 0.05, + "learning_rate": 4.973666292827004e-05, + "loss": 0.9307, + "step": 46337 + }, + { + "epoch": 0.05, + "learning_rate": 4.973665155743681e-05, + "loss": 0.9305, + "step": 46338 + }, + { + "epoch": 0.05, + "learning_rate": 4.973664018635938e-05, + "loss": 0.7755, + "step": 46339 + }, + { + "epoch": 0.05, + "learning_rate": 4.973662881503778e-05, + "loss": 0.9885, + "step": 46340 + }, + { + "epoch": 0.05, + "learning_rate": 4.973661744347198e-05, + "loss": 0.9172, + "step": 46341 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736606071661986e-05, + "loss": 0.8834, + "step": 46342 + }, + { + "epoch": 0.05, + "learning_rate": 4.973659469960781e-05, + "loss": 0.3965, + "step": 46343 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736583327309436e-05, + "loss": 0.8758, + "step": 46344 + }, + { + "epoch": 0.05, + "learning_rate": 4.973657195476687e-05, + "loss": 1.0212, + "step": 46345 + }, + { + "epoch": 0.05, + "learning_rate": 4.973656058198013e-05, + "loss": 0.774, + "step": 46346 + }, + { + "epoch": 0.05, + "learning_rate": 4.973654920894919e-05, + "loss": 0.8512, + "step": 46347 + }, + { + "epoch": 0.05, + "learning_rate": 4.973653783567407e-05, + "loss": 0.6885, + "step": 46348 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736526462154754e-05, + "loss": 1.3706, + "step": 46349 + }, + { + "epoch": 0.05, + "learning_rate": 4.973651508839126e-05, + "loss": 1.3365, + "step": 46350 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736503714383565e-05, + "loss": 1.0506, + "step": 46351 + }, + { + "epoch": 0.05, + "learning_rate": 4.973649234013168e-05, + "loss": 0.5063, + "step": 46352 + }, + { + "epoch": 0.05, + "learning_rate": 4.973648096563562e-05, + "loss": 1.0994, + "step": 46353 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736469590895364e-05, + "loss": 0.9101, + "step": 46354 + }, + { + "epoch": 0.05, + "learning_rate": 4.973645821591092e-05, + "loss": 1.7018, + "step": 46355 + }, + { + "epoch": 0.05, + "learning_rate": 4.973644684068229e-05, + "loss": 1.5184, + "step": 46356 + }, + { + "epoch": 0.05, + "learning_rate": 4.973643546520947e-05, + "loss": 1.226, + "step": 46357 + }, + { + "epoch": 0.05, + "learning_rate": 4.973642408949246e-05, + "loss": 0.904, + "step": 46358 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736412713531264e-05, + "loss": 1.1249, + "step": 46359 + }, + { + "epoch": 0.05, + "learning_rate": 4.973640133732589e-05, + "loss": 1.0737, + "step": 46360 + }, + { + "epoch": 0.05, + "learning_rate": 4.973638996087632e-05, + "loss": 0.9519, + "step": 46361 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736378584182564e-05, + "loss": 1.1093, + "step": 46362 + }, + { + "epoch": 0.05, + "learning_rate": 4.973636720724462e-05, + "loss": 1.0679, + "step": 46363 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736355830062484e-05, + "loss": 1.0799, + "step": 46364 + }, + { + "epoch": 0.05, + "learning_rate": 4.973634445263617e-05, + "loss": 0.7797, + "step": 46365 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736333074965665e-05, + "loss": 1.0284, + "step": 46366 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736321697050976e-05, + "loss": 1.3851, + "step": 46367 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736310318892096e-05, + "loss": 0.7082, + "step": 46368 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736298940489035e-05, + "loss": 0.8834, + "step": 46369 + }, + { + "epoch": 0.05, + "learning_rate": 4.973628756184179e-05, + "loss": 1.0504, + "step": 46370 + }, + { + "epoch": 0.05, + "learning_rate": 4.973627618295035e-05, + "loss": 1.1537, + "step": 46371 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736264803814726e-05, + "loss": 1.0044, + "step": 46372 + }, + { + "epoch": 0.05, + "learning_rate": 4.973625342443492e-05, + "loss": 0.9307, + "step": 46373 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736242044810924e-05, + "loss": 1.5932, + "step": 46374 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736230664942746e-05, + "loss": 1.1222, + "step": 46375 + }, + { + "epoch": 0.05, + "learning_rate": 4.973621928483038e-05, + "loss": 0.8802, + "step": 46376 + }, + { + "epoch": 0.05, + "learning_rate": 4.973620790447383e-05, + "loss": 1.1988, + "step": 46377 + }, + { + "epoch": 0.05, + "learning_rate": 4.973619652387309e-05, + "loss": 1.2271, + "step": 46378 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736185143028166e-05, + "loss": 1.3287, + "step": 46379 + }, + { + "epoch": 0.05, + "learning_rate": 4.973617376193906e-05, + "loss": 1.2064, + "step": 46380 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736162380605774e-05, + "loss": 1.5395, + "step": 46381 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736150999028297e-05, + "loss": 1.2781, + "step": 46382 + }, + { + "epoch": 0.05, + "learning_rate": 4.973613961720664e-05, + "loss": 0.9619, + "step": 46383 + }, + { + "epoch": 0.05, + "learning_rate": 4.973612823514079e-05, + "loss": 0.8049, + "step": 46384 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736116852830756e-05, + "loss": 1.5093, + "step": 46385 + }, + { + "epoch": 0.05, + "learning_rate": 4.973610547027654e-05, + "loss": 1.132, + "step": 46386 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736094087478144e-05, + "loss": 1.0031, + "step": 46387 + }, + { + "epoch": 0.05, + "learning_rate": 4.973608270443556e-05, + "loss": 1.2378, + "step": 46388 + }, + { + "epoch": 0.05, + "learning_rate": 4.973607132114879e-05, + "loss": 1.2285, + "step": 46389 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736059937617836e-05, + "loss": 1.2417, + "step": 46390 + }, + { + "epoch": 0.05, + "learning_rate": 4.9736048553842695e-05, + "loss": 0.6836, + "step": 46391 + }, + { + "epoch": 0.05, + "learning_rate": 4.973603716982338e-05, + "loss": 0.9024, + "step": 46392 + }, + { + "epoch": 0.05, + "learning_rate": 4.973602578555987e-05, + "loss": 0.9566, + "step": 46393 + }, + { + "epoch": 0.05, + "learning_rate": 4.973601440105219e-05, + "loss": 1.1139, + "step": 46394 + }, + { + "epoch": 0.05, + "learning_rate": 4.973600301630032e-05, + "loss": 0.9578, + "step": 46395 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735991631304255e-05, + "loss": 1.0779, + "step": 46396 + }, + { + "epoch": 0.05, + "learning_rate": 4.973598024606402e-05, + "loss": 1.0705, + "step": 46397 + }, + { + "epoch": 0.05, + "learning_rate": 4.97359688605796e-05, + "loss": 0.9484, + "step": 46398 + }, + { + "epoch": 0.05, + "learning_rate": 4.973595747485099e-05, + "loss": 0.8725, + "step": 46399 + }, + { + "epoch": 0.05, + "learning_rate": 4.97359460888782e-05, + "loss": 1.1308, + "step": 46400 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735934702661235e-05, + "loss": 0.9828, + "step": 46401 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735923316200085e-05, + "loss": 0.6468, + "step": 46402 + }, + { + "epoch": 0.05, + "learning_rate": 4.973591192949475e-05, + "loss": 1.278, + "step": 46403 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735900542545236e-05, + "loss": 1.1535, + "step": 46404 + }, + { + "epoch": 0.05, + "learning_rate": 4.973588915535154e-05, + "loss": 0.7204, + "step": 46405 + }, + { + "epoch": 0.05, + "learning_rate": 4.973587776791365e-05, + "loss": 1.0619, + "step": 46406 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735866380231585e-05, + "loss": 0.9273, + "step": 46407 + }, + { + "epoch": 0.05, + "learning_rate": 4.973585499230534e-05, + "loss": 0.8803, + "step": 46408 + }, + { + "epoch": 0.05, + "learning_rate": 4.973584360413491e-05, + "loss": 0.9216, + "step": 46409 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735832215720305e-05, + "loss": 0.5891, + "step": 46410 + }, + { + "epoch": 0.05, + "learning_rate": 4.973582082706151e-05, + "loss": 1.0461, + "step": 46411 + }, + { + "epoch": 0.05, + "learning_rate": 4.973580943815853e-05, + "loss": 1.1986, + "step": 46412 + }, + { + "epoch": 0.05, + "learning_rate": 4.973579804901138e-05, + "loss": 1.3352, + "step": 46413 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735786659620044e-05, + "loss": 0.8653, + "step": 46414 + }, + { + "epoch": 0.05, + "learning_rate": 4.973577526998452e-05, + "loss": 1.279, + "step": 46415 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735763880104825e-05, + "loss": 0.9058, + "step": 46416 + }, + { + "epoch": 0.05, + "learning_rate": 4.973575248998095e-05, + "loss": 1.1943, + "step": 46417 + }, + { + "epoch": 0.05, + "learning_rate": 4.973574109961288e-05, + "loss": 1.0147, + "step": 46418 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735729709000646e-05, + "loss": 0.9596, + "step": 46419 + }, + { + "epoch": 0.05, + "learning_rate": 4.973571831814422e-05, + "loss": 1.1248, + "step": 46420 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735706927043614e-05, + "loss": 0.9395, + "step": 46421 + }, + { + "epoch": 0.05, + "learning_rate": 4.973569553569884e-05, + "loss": 1.512, + "step": 46422 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735684144109864e-05, + "loss": 1.0922, + "step": 46423 + }, + { + "epoch": 0.05, + "learning_rate": 4.973567275227672e-05, + "loss": 0.9132, + "step": 46424 + }, + { + "epoch": 0.05, + "learning_rate": 4.97356613601994e-05, + "loss": 1.1931, + "step": 46425 + }, + { + "epoch": 0.05, + "learning_rate": 4.97356499678779e-05, + "loss": 0.9477, + "step": 46426 + }, + { + "epoch": 0.05, + "learning_rate": 4.973563857531221e-05, + "loss": 1.3849, + "step": 46427 + }, + { + "epoch": 0.05, + "learning_rate": 4.973562718250234e-05, + "loss": 1.1385, + "step": 46428 + }, + { + "epoch": 0.05, + "learning_rate": 4.97356157894483e-05, + "loss": 0.9256, + "step": 46429 + }, + { + "epoch": 0.05, + "learning_rate": 4.973560439615008e-05, + "loss": 1.1821, + "step": 46430 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735593002607675e-05, + "loss": 0.9724, + "step": 46431 + }, + { + "epoch": 0.05, + "learning_rate": 4.973558160882109e-05, + "loss": 0.7902, + "step": 46432 + }, + { + "epoch": 0.05, + "learning_rate": 4.973557021479033e-05, + "loss": 1.0813, + "step": 46433 + }, + { + "epoch": 0.05, + "learning_rate": 4.973555882051539e-05, + "loss": 1.1857, + "step": 46434 + }, + { + "epoch": 0.05, + "learning_rate": 4.973554742599627e-05, + "loss": 1.2496, + "step": 46435 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735536031232975e-05, + "loss": 0.9935, + "step": 46436 + }, + { + "epoch": 0.05, + "learning_rate": 4.973552463622549e-05, + "loss": 0.853, + "step": 46437 + }, + { + "epoch": 0.05, + "learning_rate": 4.973551324097384e-05, + "loss": 1.1495, + "step": 46438 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735501845478006e-05, + "loss": 1.0196, + "step": 46439 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735490449737996e-05, + "loss": 0.962, + "step": 46440 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735479053753796e-05, + "loss": 1.2145, + "step": 46441 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735467657525434e-05, + "loss": 0.9584, + "step": 46442 + }, + { + "epoch": 0.05, + "learning_rate": 4.973545626105288e-05, + "loss": 1.0392, + "step": 46443 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735444864336155e-05, + "loss": 0.876, + "step": 46444 + }, + { + "epoch": 0.05, + "learning_rate": 4.973543346737526e-05, + "loss": 1.1968, + "step": 46445 + }, + { + "epoch": 0.05, + "learning_rate": 4.973542207017017e-05, + "loss": 1.2865, + "step": 46446 + }, + { + "epoch": 0.05, + "learning_rate": 4.973541067272092e-05, + "loss": 1.1246, + "step": 46447 + }, + { + "epoch": 0.05, + "learning_rate": 4.973539927502748e-05, + "loss": 0.9601, + "step": 46448 + }, + { + "epoch": 0.05, + "learning_rate": 4.973538787708987e-05, + "loss": 0.2531, + "step": 46449 + }, + { + "epoch": 0.05, + "learning_rate": 4.973537647890807e-05, + "loss": 0.5886, + "step": 46450 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735365080482105e-05, + "loss": 1.1662, + "step": 46451 + }, + { + "epoch": 0.05, + "learning_rate": 4.973535368181196e-05, + "loss": 0.8357, + "step": 46452 + }, + { + "epoch": 0.05, + "learning_rate": 4.973534228289763e-05, + "loss": 1.3111, + "step": 46453 + }, + { + "epoch": 0.05, + "learning_rate": 4.973533088373914e-05, + "loss": 1.1791, + "step": 46454 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735319484336465e-05, + "loss": 0.9882, + "step": 46455 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735308084689607e-05, + "loss": 0.9216, + "step": 46456 + }, + { + "epoch": 0.05, + "learning_rate": 4.973529668479858e-05, + "loss": 0.9443, + "step": 46457 + }, + { + "epoch": 0.05, + "learning_rate": 4.973528528466337e-05, + "loss": 1.1031, + "step": 46458 + }, + { + "epoch": 0.05, + "learning_rate": 4.973527388428399e-05, + "loss": 0.8972, + "step": 46459 + }, + { + "epoch": 0.05, + "learning_rate": 4.973526248366043e-05, + "loss": 1.3304, + "step": 46460 + }, + { + "epoch": 0.05, + "learning_rate": 4.97352510827927e-05, + "loss": 1.0836, + "step": 46461 + }, + { + "epoch": 0.05, + "learning_rate": 4.973523968168079e-05, + "loss": 1.0194, + "step": 46462 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735228280324694e-05, + "loss": 0.8921, + "step": 46463 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735216878724436e-05, + "loss": 1.0685, + "step": 46464 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735205476879995e-05, + "loss": 1.0872, + "step": 46465 + }, + { + "epoch": 0.05, + "learning_rate": 4.973519407479139e-05, + "loss": 1.08, + "step": 46466 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735182672458605e-05, + "loss": 1.1265, + "step": 46467 + }, + { + "epoch": 0.05, + "learning_rate": 4.973517126988163e-05, + "loss": 1.2189, + "step": 46468 + }, + { + "epoch": 0.05, + "learning_rate": 4.97351598670605e-05, + "loss": 0.5707, + "step": 46469 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735148463995184e-05, + "loss": 0.7611, + "step": 46470 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735137060685695e-05, + "loss": 2.3374, + "step": 46471 + }, + { + "epoch": 0.05, + "learning_rate": 4.973512565713203e-05, + "loss": 5.8022, + "step": 46472 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735114253334194e-05, + "loss": 5.7224, + "step": 46473 + }, + { + "epoch": 0.05, + "learning_rate": 4.973510284929218e-05, + "loss": 5.7382, + "step": 46474 + }, + { + "epoch": 0.05, + "learning_rate": 4.973509144500599e-05, + "loss": 5.6999, + "step": 46475 + }, + { + "epoch": 0.05, + "learning_rate": 4.973508004047562e-05, + "loss": 5.7315, + "step": 46476 + }, + { + "epoch": 0.05, + "learning_rate": 4.973506863570109e-05, + "loss": 5.6639, + "step": 46477 + }, + { + "epoch": 0.05, + "learning_rate": 4.973505723068238e-05, + "loss": 5.7273, + "step": 46478 + }, + { + "epoch": 0.05, + "learning_rate": 4.973504582541949e-05, + "loss": 5.5088, + "step": 46479 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735034419912434e-05, + "loss": 4.8965, + "step": 46480 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735023014161205e-05, + "loss": 2.1468, + "step": 46481 + }, + { + "epoch": 0.05, + "learning_rate": 4.9735011608165794e-05, + "loss": 1.1111, + "step": 46482 + }, + { + "epoch": 0.05, + "learning_rate": 4.973500020192622e-05, + "loss": 1.0256, + "step": 46483 + }, + { + "epoch": 0.05, + "learning_rate": 4.973498879544246e-05, + "loss": 1.0077, + "step": 46484 + }, + { + "epoch": 0.05, + "learning_rate": 4.973497738871454e-05, + "loss": 1.1965, + "step": 46485 + }, + { + "epoch": 0.05, + "learning_rate": 4.973496598174244e-05, + "loss": 1.1001, + "step": 46486 + }, + { + "epoch": 0.05, + "learning_rate": 4.973495457452616e-05, + "loss": 1.3605, + "step": 46487 + }, + { + "epoch": 0.05, + "learning_rate": 4.973494316706572e-05, + "loss": 1.4458, + "step": 46488 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734931759361095e-05, + "loss": 1.4362, + "step": 46489 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734920351412305e-05, + "loss": 1.1518, + "step": 46490 + }, + { + "epoch": 0.05, + "learning_rate": 4.973490894321934e-05, + "loss": 0.9296, + "step": 46491 + }, + { + "epoch": 0.05, + "learning_rate": 4.97348975347822e-05, + "loss": 0.9329, + "step": 46492 + }, + { + "epoch": 0.05, + "learning_rate": 4.973488612610089e-05, + "loss": 1.0036, + "step": 46493 + }, + { + "epoch": 0.05, + "learning_rate": 4.973487471717541e-05, + "loss": 0.6148, + "step": 46494 + }, + { + "epoch": 0.05, + "learning_rate": 4.973486330800576e-05, + "loss": 1.0302, + "step": 46495 + }, + { + "epoch": 0.05, + "learning_rate": 4.973485189859193e-05, + "loss": 0.752, + "step": 46496 + }, + { + "epoch": 0.05, + "learning_rate": 4.973484048893393e-05, + "loss": 1.0577, + "step": 46497 + }, + { + "epoch": 0.05, + "learning_rate": 4.973482907903176e-05, + "loss": 1.2112, + "step": 46498 + }, + { + "epoch": 0.05, + "learning_rate": 4.973481766888542e-05, + "loss": 1.6273, + "step": 46499 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734806258494906e-05, + "loss": 1.4102, + "step": 46500 + }, + { + "epoch": 0.05, + "eval_loss": 1.0785138607025146, + "eval_runtime": 129.0337, + "eval_samples_per_second": 10.734, + "eval_steps_per_second": 5.371, + "step": 46500 + }, + { + "epoch": 0.05, + "learning_rate": 4.973479484786022e-05, + "loss": 1.0806, + "step": 46501 + }, + { + "epoch": 0.05, + "learning_rate": 4.973478343698136e-05, + "loss": 0.8284, + "step": 46502 + }, + { + "epoch": 0.05, + "learning_rate": 4.973477202585833e-05, + "loss": 1.0333, + "step": 46503 + }, + { + "epoch": 0.05, + "learning_rate": 4.973476061449113e-05, + "loss": 1.0965, + "step": 46504 + }, + { + "epoch": 0.05, + "learning_rate": 4.973474920287976e-05, + "loss": 1.0654, + "step": 46505 + }, + { + "epoch": 0.05, + "learning_rate": 4.973473779102422e-05, + "loss": 1.0715, + "step": 46506 + }, + { + "epoch": 0.05, + "learning_rate": 4.973472637892451e-05, + "loss": 0.9679, + "step": 46507 + }, + { + "epoch": 0.05, + "learning_rate": 4.973471496658062e-05, + "loss": 1.3209, + "step": 46508 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734703553992566e-05, + "loss": 0.7037, + "step": 46509 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734692141160346e-05, + "loss": 0.913, + "step": 46510 + }, + { + "epoch": 0.05, + "learning_rate": 4.973468072808395e-05, + "loss": 1.0524, + "step": 46511 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734669314763385e-05, + "loss": 0.9175, + "step": 46512 + }, + { + "epoch": 0.05, + "learning_rate": 4.973465790119864e-05, + "loss": 1.3814, + "step": 46513 + }, + { + "epoch": 0.05, + "learning_rate": 4.973464648738974e-05, + "loss": 0.942, + "step": 46514 + }, + { + "epoch": 0.05, + "learning_rate": 4.973463507333667e-05, + "loss": 0.7225, + "step": 46515 + }, + { + "epoch": 0.05, + "learning_rate": 4.973462365903942e-05, + "loss": 0.9198, + "step": 46516 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734612244498e-05, + "loss": 0.8454, + "step": 46517 + }, + { + "epoch": 0.05, + "learning_rate": 4.973460082971242e-05, + "loss": 1.4438, + "step": 46518 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734589414682665e-05, + "loss": 1.0214, + "step": 46519 + }, + { + "epoch": 0.05, + "learning_rate": 4.973457799940874e-05, + "loss": 0.9507, + "step": 46520 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734566583890646e-05, + "loss": 1.1482, + "step": 46521 + }, + { + "epoch": 0.05, + "learning_rate": 4.973455516812838e-05, + "loss": 0.9526, + "step": 46522 + }, + { + "epoch": 0.05, + "learning_rate": 4.973454375212195e-05, + "loss": 1.0082, + "step": 46523 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734532335871356e-05, + "loss": 0.7951, + "step": 46524 + }, + { + "epoch": 0.05, + "learning_rate": 4.973452091937658e-05, + "loss": 0.9545, + "step": 46525 + }, + { + "epoch": 0.05, + "learning_rate": 4.973450950263764e-05, + "loss": 0.5459, + "step": 46526 + }, + { + "epoch": 0.05, + "learning_rate": 4.973449808565454e-05, + "loss": 0.5543, + "step": 46527 + }, + { + "epoch": 0.05, + "learning_rate": 4.973448666842726e-05, + "loss": 1.1954, + "step": 46528 + }, + { + "epoch": 0.05, + "learning_rate": 4.973447525095581e-05, + "loss": 1.0337, + "step": 46529 + }, + { + "epoch": 0.05, + "learning_rate": 4.97344638332402e-05, + "loss": 0.81, + "step": 46530 + }, + { + "epoch": 0.05, + "learning_rate": 4.973445241528042e-05, + "loss": 0.882, + "step": 46531 + }, + { + "epoch": 0.05, + "learning_rate": 4.973444099707647e-05, + "loss": 0.8892, + "step": 46532 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734429578628354e-05, + "loss": 0.5623, + "step": 46533 + }, + { + "epoch": 0.05, + "learning_rate": 4.973441815993607e-05, + "loss": 2.6433, + "step": 46534 + }, + { + "epoch": 0.05, + "learning_rate": 4.973440674099962e-05, + "loss": 5.7738, + "step": 46535 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734395321819e-05, + "loss": 4.7702, + "step": 46536 + }, + { + "epoch": 0.05, + "learning_rate": 4.973438390239421e-05, + "loss": 5.2295, + "step": 46537 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734372482725255e-05, + "loss": 5.3617, + "step": 46538 + }, + { + "epoch": 0.05, + "learning_rate": 4.973436106281213e-05, + "loss": 5.4507, + "step": 46539 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734349642654844e-05, + "loss": 5.7327, + "step": 46540 + }, + { + "epoch": 0.05, + "learning_rate": 4.973433822225339e-05, + "loss": 5.4235, + "step": 46541 + }, + { + "epoch": 0.05, + "learning_rate": 4.973432680160776e-05, + "loss": 5.3958, + "step": 46542 + }, + { + "epoch": 0.05, + "learning_rate": 4.973431538071797e-05, + "loss": 4.9642, + "step": 46543 + }, + { + "epoch": 0.05, + "learning_rate": 4.973430395958402e-05, + "loss": 5.0323, + "step": 46544 + }, + { + "epoch": 0.05, + "learning_rate": 4.973429253820589e-05, + "loss": 5.3324, + "step": 46545 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734281116583606e-05, + "loss": 5.2316, + "step": 46546 + }, + { + "epoch": 0.05, + "learning_rate": 4.973426969471714e-05, + "loss": 5.7913, + "step": 46547 + }, + { + "epoch": 0.05, + "learning_rate": 4.973425827260652e-05, + "loss": 1.9559, + "step": 46548 + }, + { + "epoch": 0.05, + "learning_rate": 4.973424685025173e-05, + "loss": 1.2, + "step": 46549 + }, + { + "epoch": 0.05, + "learning_rate": 4.973423542765278e-05, + "loss": 0.7442, + "step": 46550 + }, + { + "epoch": 0.05, + "learning_rate": 4.973422400480965e-05, + "loss": 0.8795, + "step": 46551 + }, + { + "epoch": 0.05, + "learning_rate": 4.973421258172237e-05, + "loss": 1.1944, + "step": 46552 + }, + { + "epoch": 0.05, + "learning_rate": 4.973420115839091e-05, + "loss": 0.9698, + "step": 46553 + }, + { + "epoch": 0.05, + "learning_rate": 4.97341897348153e-05, + "loss": 1.0681, + "step": 46554 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734178310995515e-05, + "loss": 1.316, + "step": 46555 + }, + { + "epoch": 0.05, + "learning_rate": 4.973416688693156e-05, + "loss": 1.1466, + "step": 46556 + }, + { + "epoch": 0.05, + "learning_rate": 4.973415546262345e-05, + "loss": 0.9823, + "step": 46557 + }, + { + "epoch": 0.05, + "learning_rate": 4.973414403807117e-05, + "loss": 0.8277, + "step": 46558 + }, + { + "epoch": 0.05, + "learning_rate": 4.973413261327473e-05, + "loss": 0.8502, + "step": 46559 + }, + { + "epoch": 0.05, + "learning_rate": 4.973412118823412e-05, + "loss": 0.9019, + "step": 46560 + }, + { + "epoch": 0.05, + "learning_rate": 4.973410976294934e-05, + "loss": 0.9357, + "step": 46561 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734098337420396e-05, + "loss": 0.9232, + "step": 46562 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734086911647294e-05, + "loss": 1.1242, + "step": 46563 + }, + { + "epoch": 0.05, + "learning_rate": 4.973407548563003e-05, + "loss": 0.9862, + "step": 46564 + }, + { + "epoch": 0.05, + "learning_rate": 4.97340640593686e-05, + "loss": 1.1819, + "step": 46565 + }, + { + "epoch": 0.05, + "learning_rate": 4.9734052632863e-05, + "loss": 1.1612, + "step": 46566 + }, + { + "epoch": 0.05, + "learning_rate": 4.973404120611324e-05, + "loss": 0.9152, + "step": 46567 + }, + { + "epoch": 0.05, + "learning_rate": 4.973402977911931e-05, + "loss": 0.9735, + "step": 46568 + }, + { + "epoch": 0.05, + "learning_rate": 4.973401835188122e-05, + "loss": 1.3106, + "step": 46569 + }, + { + "epoch": 0.05, + "learning_rate": 4.973400692439897e-05, + "loss": 1.4258, + "step": 46570 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733995496672556e-05, + "loss": 0.9391, + "step": 46571 + }, + { + "epoch": 0.05, + "learning_rate": 4.973398406870198e-05, + "loss": 1.1377, + "step": 46572 + }, + { + "epoch": 0.05, + "learning_rate": 4.973397264048723e-05, + "loss": 1.0024, + "step": 46573 + }, + { + "epoch": 0.05, + "learning_rate": 4.973396121202833e-05, + "loss": 0.6597, + "step": 46574 + }, + { + "epoch": 0.05, + "learning_rate": 4.973394978332525e-05, + "loss": 0.7872, + "step": 46575 + }, + { + "epoch": 0.05, + "learning_rate": 4.973393835437802e-05, + "loss": 0.8694, + "step": 46576 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733926925186624e-05, + "loss": 0.8812, + "step": 46577 + }, + { + "epoch": 0.05, + "learning_rate": 4.973391549575107e-05, + "loss": 0.393, + "step": 46578 + }, + { + "epoch": 0.05, + "learning_rate": 4.973390406607135e-05, + "loss": 0.2731, + "step": 46579 + }, + { + "epoch": 0.05, + "learning_rate": 4.973389263614746e-05, + "loss": 0.23, + "step": 46580 + }, + { + "epoch": 0.05, + "learning_rate": 4.973388120597941e-05, + "loss": 0.1797, + "step": 46581 + }, + { + "epoch": 0.05, + "learning_rate": 4.97338697755672e-05, + "loss": 0.2259, + "step": 46582 + }, + { + "epoch": 0.05, + "learning_rate": 4.973385834491083e-05, + "loss": 0.2549, + "step": 46583 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733846914010296e-05, + "loss": 0.2244, + "step": 46584 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733835482865596e-05, + "loss": 0.2502, + "step": 46585 + }, + { + "epoch": 0.05, + "learning_rate": 4.973382405147674e-05, + "loss": 0.1483, + "step": 46586 + }, + { + "epoch": 0.05, + "learning_rate": 4.973381261984372e-05, + "loss": 0.3304, + "step": 46587 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733801187966536e-05, + "loss": 0.4186, + "step": 46588 + }, + { + "epoch": 0.05, + "learning_rate": 4.973378975584519e-05, + "loss": 0.2229, + "step": 46589 + }, + { + "epoch": 0.05, + "learning_rate": 4.973377832347969e-05, + "loss": 0.2258, + "step": 46590 + }, + { + "epoch": 0.05, + "learning_rate": 4.973376689087002e-05, + "loss": 0.3232, + "step": 46591 + }, + { + "epoch": 0.05, + "learning_rate": 4.97337554580162e-05, + "loss": 0.3122, + "step": 46592 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733744024918207e-05, + "loss": 0.3181, + "step": 46593 + }, + { + "epoch": 0.05, + "learning_rate": 4.973373259157605e-05, + "loss": 0.4033, + "step": 46594 + }, + { + "epoch": 0.05, + "learning_rate": 4.973372115798974e-05, + "loss": 0.2973, + "step": 46595 + }, + { + "epoch": 0.05, + "learning_rate": 4.973370972415927e-05, + "loss": 0.226, + "step": 46596 + }, + { + "epoch": 0.05, + "learning_rate": 4.973369829008464e-05, + "loss": 0.2062, + "step": 46597 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733686855765846e-05, + "loss": 0.7977, + "step": 46598 + }, + { + "epoch": 0.05, + "learning_rate": 4.973367542120289e-05, + "loss": 0.874, + "step": 46599 + }, + { + "epoch": 0.05, + "learning_rate": 4.973366398639577e-05, + "loss": 1.0736, + "step": 46600 + }, + { + "epoch": 0.05, + "learning_rate": 4.97336525513445e-05, + "loss": 1.0152, + "step": 46601 + }, + { + "epoch": 0.05, + "learning_rate": 4.973364111604907e-05, + "loss": 1.0549, + "step": 46602 + }, + { + "epoch": 0.05, + "learning_rate": 4.973362968050947e-05, + "loss": 0.8208, + "step": 46603 + }, + { + "epoch": 0.05, + "learning_rate": 4.973361824472572e-05, + "loss": 0.7641, + "step": 46604 + }, + { + "epoch": 0.05, + "learning_rate": 4.97336068086978e-05, + "loss": 0.5105, + "step": 46605 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733595372425724e-05, + "loss": 0.6064, + "step": 46606 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733583935909486e-05, + "loss": 0.9666, + "step": 46607 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733572499149094e-05, + "loss": 1.0368, + "step": 46608 + }, + { + "epoch": 0.05, + "learning_rate": 4.973356106214454e-05, + "loss": 1.3611, + "step": 46609 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733549624895834e-05, + "loss": 1.0411, + "step": 46610 + }, + { + "epoch": 0.05, + "learning_rate": 4.973353818740296e-05, + "loss": 1.0885, + "step": 46611 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733526749665926e-05, + "loss": 1.3389, + "step": 46612 + }, + { + "epoch": 0.05, + "learning_rate": 4.973351531168474e-05, + "loss": 1.9099, + "step": 46613 + }, + { + "epoch": 0.05, + "learning_rate": 4.973350387345939e-05, + "loss": 1.3757, + "step": 46614 + }, + { + "epoch": 0.05, + "learning_rate": 4.973349243498988e-05, + "loss": 1.3561, + "step": 46615 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733480996276215e-05, + "loss": 1.1844, + "step": 46616 + }, + { + "epoch": 0.05, + "learning_rate": 4.973346955731839e-05, + "loss": 0.8844, + "step": 46617 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733458118116406e-05, + "loss": 0.8704, + "step": 46618 + }, + { + "epoch": 0.05, + "learning_rate": 4.973344667867027e-05, + "loss": 0.9211, + "step": 46619 + }, + { + "epoch": 0.05, + "learning_rate": 4.973343523897997e-05, + "loss": 1.1064, + "step": 46620 + }, + { + "epoch": 0.05, + "learning_rate": 4.973342379904551e-05, + "loss": 1.4081, + "step": 46621 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733412358866894e-05, + "loss": 1.1625, + "step": 46622 + }, + { + "epoch": 0.05, + "learning_rate": 4.973340091844412e-05, + "loss": 1.0208, + "step": 46623 + }, + { + "epoch": 0.05, + "learning_rate": 4.973338947777719e-05, + "loss": 0.966, + "step": 46624 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733378036866107e-05, + "loss": 0.7655, + "step": 46625 + }, + { + "epoch": 0.05, + "learning_rate": 4.973336659571086e-05, + "loss": 0.1796, + "step": 46626 + }, + { + "epoch": 0.05, + "learning_rate": 4.973335515431146e-05, + "loss": 0.2652, + "step": 46627 + }, + { + "epoch": 0.05, + "learning_rate": 4.973334371266789e-05, + "loss": 0.8616, + "step": 46628 + }, + { + "epoch": 0.05, + "learning_rate": 4.973333227078018e-05, + "loss": 1.3758, + "step": 46629 + }, + { + "epoch": 0.05, + "learning_rate": 4.97333208286483e-05, + "loss": 0.9558, + "step": 46630 + }, + { + "epoch": 0.05, + "learning_rate": 4.973330938627227e-05, + "loss": 0.9247, + "step": 46631 + }, + { + "epoch": 0.05, + "learning_rate": 4.973329794365208e-05, + "loss": 1.2246, + "step": 46632 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733286500787734e-05, + "loss": 1.1295, + "step": 46633 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733275057679236e-05, + "loss": 1.0887, + "step": 46634 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733263614326575e-05, + "loss": 1.309, + "step": 46635 + }, + { + "epoch": 0.05, + "learning_rate": 4.973325217072976e-05, + "loss": 1.0508, + "step": 46636 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733240726888794e-05, + "loss": 0.9934, + "step": 46637 + }, + { + "epoch": 0.05, + "learning_rate": 4.973322928280366e-05, + "loss": 0.7807, + "step": 46638 + }, + { + "epoch": 0.05, + "learning_rate": 4.973321783847438e-05, + "loss": 0.6085, + "step": 46639 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733206393900946e-05, + "loss": 0.6529, + "step": 46640 + }, + { + "epoch": 0.05, + "learning_rate": 4.973319494908335e-05, + "loss": 0.1293, + "step": 46641 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733183504021596e-05, + "loss": 0.2818, + "step": 46642 + }, + { + "epoch": 0.05, + "learning_rate": 4.973317205871569e-05, + "loss": 0.6572, + "step": 46643 + }, + { + "epoch": 0.05, + "learning_rate": 4.973316061316563e-05, + "loss": 1.1081, + "step": 46644 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733149167371414e-05, + "loss": 1.3347, + "step": 46645 + }, + { + "epoch": 0.05, + "learning_rate": 4.973313772133304e-05, + "loss": 1.342, + "step": 46646 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733126275050515e-05, + "loss": 1.0764, + "step": 46647 + }, + { + "epoch": 0.05, + "learning_rate": 4.973311482852383e-05, + "loss": 0.8538, + "step": 46648 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733103381753e-05, + "loss": 0.4709, + "step": 46649 + }, + { + "epoch": 0.05, + "learning_rate": 4.973309193473801e-05, + "loss": 0.2897, + "step": 46650 + }, + { + "epoch": 0.05, + "learning_rate": 4.973308048747886e-05, + "loss": 0.613, + "step": 46651 + }, + { + "epoch": 0.05, + "learning_rate": 4.973306903997555e-05, + "loss": 0.6245, + "step": 46652 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733057592228105e-05, + "loss": 0.7232, + "step": 46653 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733046144236496e-05, + "loss": 0.54, + "step": 46654 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733034696000724e-05, + "loss": 0.6075, + "step": 46655 + }, + { + "epoch": 0.05, + "learning_rate": 4.973302324752081e-05, + "loss": 0.738, + "step": 46656 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733011798796735e-05, + "loss": 1.2266, + "step": 46657 + }, + { + "epoch": 0.05, + "learning_rate": 4.973300034982851e-05, + "loss": 1.1684, + "step": 46658 + }, + { + "epoch": 0.05, + "learning_rate": 4.973298890061613e-05, + "loss": 0.9949, + "step": 46659 + }, + { + "epoch": 0.05, + "learning_rate": 4.97329774511596e-05, + "loss": 1.056, + "step": 46660 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732966001458916e-05, + "loss": 0.8461, + "step": 46661 + }, + { + "epoch": 0.05, + "learning_rate": 4.973295455151407e-05, + "loss": 0.8546, + "step": 46662 + }, + { + "epoch": 0.05, + "learning_rate": 4.973294310132508e-05, + "loss": 1.0671, + "step": 46663 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732931650891936e-05, + "loss": 0.9849, + "step": 46664 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732920200214636e-05, + "loss": 0.9218, + "step": 46665 + }, + { + "epoch": 0.05, + "learning_rate": 4.973290874929319e-05, + "loss": 1.2051, + "step": 46666 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732897298127576e-05, + "loss": 1.1887, + "step": 46667 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732885846717816e-05, + "loss": 0.8639, + "step": 46668 + }, + { + "epoch": 0.05, + "learning_rate": 4.973287439506391e-05, + "loss": 1.06, + "step": 46669 + }, + { + "epoch": 0.05, + "learning_rate": 4.973286294316584e-05, + "loss": 0.99, + "step": 46670 + }, + { + "epoch": 0.05, + "learning_rate": 4.973285149102362e-05, + "loss": 1.149, + "step": 46671 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732840038637254e-05, + "loss": 1.2756, + "step": 46672 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732828586006743e-05, + "loss": 0.749, + "step": 46673 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732817133132064e-05, + "loss": 1.0265, + "step": 46674 + }, + { + "epoch": 0.05, + "learning_rate": 4.973280568001324e-05, + "loss": 1.1019, + "step": 46675 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732794226650265e-05, + "loss": 0.8911, + "step": 46676 + }, + { + "epoch": 0.05, + "learning_rate": 4.973278277304313e-05, + "loss": 1.0674, + "step": 46677 + }, + { + "epoch": 0.05, + "learning_rate": 4.973277131919186e-05, + "loss": 0.9934, + "step": 46678 + }, + { + "epoch": 0.05, + "learning_rate": 4.973275986509642e-05, + "loss": 0.9585, + "step": 46679 + }, + { + "epoch": 0.05, + "learning_rate": 4.973274841075685e-05, + "loss": 1.0028, + "step": 46680 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732736956173106e-05, + "loss": 1.1547, + "step": 46681 + }, + { + "epoch": 0.05, + "learning_rate": 4.973272550134522e-05, + "loss": 0.8946, + "step": 46682 + }, + { + "epoch": 0.05, + "learning_rate": 4.973271404627319e-05, + "loss": 1.135, + "step": 46683 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732702590957e-05, + "loss": 0.9844, + "step": 46684 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732691135396666e-05, + "loss": 1.1561, + "step": 46685 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732679679592174e-05, + "loss": 0.8871, + "step": 46686 + }, + { + "epoch": 0.05, + "learning_rate": 4.973266822354354e-05, + "loss": 1.3042, + "step": 46687 + }, + { + "epoch": 0.05, + "learning_rate": 4.973265676725075e-05, + "loss": 0.8676, + "step": 46688 + }, + { + "epoch": 0.05, + "learning_rate": 4.973264531071381e-05, + "loss": 0.8407, + "step": 46689 + }, + { + "epoch": 0.05, + "learning_rate": 4.973263385393272e-05, + "loss": 1.1181, + "step": 46690 + }, + { + "epoch": 0.05, + "learning_rate": 4.973262239690748e-05, + "loss": 0.9836, + "step": 46691 + }, + { + "epoch": 0.05, + "learning_rate": 4.973261093963809e-05, + "loss": 1.1248, + "step": 46692 + }, + { + "epoch": 0.05, + "learning_rate": 4.973259948212455e-05, + "loss": 1.1306, + "step": 46693 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732588024366854e-05, + "loss": 0.6313, + "step": 46694 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732576566365016e-05, + "loss": 1.1517, + "step": 46695 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732565108119036e-05, + "loss": 1.097, + "step": 46696 + }, + { + "epoch": 0.05, + "learning_rate": 4.973255364962889e-05, + "loss": 1.0944, + "step": 46697 + }, + { + "epoch": 0.05, + "learning_rate": 4.97325421908946e-05, + "loss": 1.1806, + "step": 46698 + }, + { + "epoch": 0.05, + "learning_rate": 4.973253073191616e-05, + "loss": 1.1391, + "step": 46699 + }, + { + "epoch": 0.05, + "learning_rate": 4.973251927269358e-05, + "loss": 0.7181, + "step": 46700 + }, + { + "epoch": 0.05, + "learning_rate": 4.973250781322685e-05, + "loss": 0.7595, + "step": 46701 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732496353515964e-05, + "loss": 0.7945, + "step": 46702 + }, + { + "epoch": 0.05, + "learning_rate": 4.973248489356093e-05, + "loss": 0.2656, + "step": 46703 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732473433361746e-05, + "loss": 0.8799, + "step": 46704 + }, + { + "epoch": 0.05, + "learning_rate": 4.973246197291841e-05, + "loss": 1.8386, + "step": 46705 + }, + { + "epoch": 0.05, + "learning_rate": 4.973245051223094e-05, + "loss": 1.0615, + "step": 46706 + }, + { + "epoch": 0.05, + "learning_rate": 4.973243905129931e-05, + "loss": 1.0302, + "step": 46707 + }, + { + "epoch": 0.05, + "learning_rate": 4.973242759012353e-05, + "loss": 1.0769, + "step": 46708 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732416128703616e-05, + "loss": 0.7702, + "step": 46709 + }, + { + "epoch": 0.05, + "learning_rate": 4.973240466703954e-05, + "loss": 1.0576, + "step": 46710 + }, + { + "epoch": 0.05, + "learning_rate": 4.973239320513132e-05, + "loss": 0.8687, + "step": 46711 + }, + { + "epoch": 0.05, + "learning_rate": 4.973238174297895e-05, + "loss": 0.883, + "step": 46712 + }, + { + "epoch": 0.05, + "learning_rate": 4.973237028058244e-05, + "loss": 1.1978, + "step": 46713 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732358817941774e-05, + "loss": 1.0713, + "step": 46714 + }, + { + "epoch": 0.05, + "learning_rate": 4.973234735505697e-05, + "loss": 0.8373, + "step": 46715 + }, + { + "epoch": 0.05, + "learning_rate": 4.973233589192801e-05, + "loss": 0.7134, + "step": 46716 + }, + { + "epoch": 0.05, + "learning_rate": 4.973232442855491e-05, + "loss": 1.1786, + "step": 46717 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732312964937654e-05, + "loss": 0.8498, + "step": 46718 + }, + { + "epoch": 0.05, + "learning_rate": 4.973230150107626e-05, + "loss": 1.0504, + "step": 46719 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732290036970716e-05, + "loss": 1.139, + "step": 46720 + }, + { + "epoch": 0.05, + "learning_rate": 4.973227857262102e-05, + "loss": 0.9855, + "step": 46721 + }, + { + "epoch": 0.05, + "learning_rate": 4.973226710802719e-05, + "loss": 1.2741, + "step": 46722 + }, + { + "epoch": 0.05, + "learning_rate": 4.97322556431892e-05, + "loss": 0.961, + "step": 46723 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732244178107066e-05, + "loss": 1.1023, + "step": 46724 + }, + { + "epoch": 0.05, + "learning_rate": 4.973223271278079e-05, + "loss": 0.7815, + "step": 46725 + }, + { + "epoch": 0.05, + "learning_rate": 4.973222124721036e-05, + "loss": 0.9996, + "step": 46726 + }, + { + "epoch": 0.05, + "learning_rate": 4.97322097813958e-05, + "loss": 0.9157, + "step": 46727 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732198315337084e-05, + "loss": 1.1413, + "step": 46728 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732186849034215e-05, + "loss": 1.0296, + "step": 46729 + }, + { + "epoch": 0.05, + "learning_rate": 4.973217538248721e-05, + "loss": 0.9206, + "step": 46730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732163915696064e-05, + "loss": 1.1169, + "step": 46731 + }, + { + "epoch": 0.05, + "learning_rate": 4.973215244866076e-05, + "loss": 0.731, + "step": 46732 + }, + { + "epoch": 0.05, + "learning_rate": 4.973214098138131e-05, + "loss": 0.42, + "step": 46733 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732129513857725e-05, + "loss": 0.3542, + "step": 46734 + }, + { + "epoch": 0.05, + "learning_rate": 4.973211804608999e-05, + "loss": 1.0023, + "step": 46735 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732106578078114e-05, + "loss": 1.1668, + "step": 46736 + }, + { + "epoch": 0.05, + "learning_rate": 4.973209510982209e-05, + "loss": 0.9243, + "step": 46737 + }, + { + "epoch": 0.05, + "learning_rate": 4.973208364132192e-05, + "loss": 1.1761, + "step": 46738 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732072172577604e-05, + "loss": 0.6972, + "step": 46739 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732060703589146e-05, + "loss": 1.0578, + "step": 46740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732049234356546e-05, + "loss": 1.3072, + "step": 46741 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732037764879796e-05, + "loss": 1.5675, + "step": 46742 + }, + { + "epoch": 0.05, + "learning_rate": 4.9732026295158905e-05, + "loss": 0.9957, + "step": 46743 + }, + { + "epoch": 0.05, + "learning_rate": 4.973201482519387e-05, + "loss": 1.2489, + "step": 46744 + }, + { + "epoch": 0.05, + "learning_rate": 4.973200335498469e-05, + "loss": 0.8307, + "step": 46745 + }, + { + "epoch": 0.05, + "learning_rate": 4.973199188453137e-05, + "loss": 0.8556, + "step": 46746 + }, + { + "epoch": 0.05, + "learning_rate": 4.97319804138339e-05, + "loss": 1.0933, + "step": 46747 + }, + { + "epoch": 0.05, + "learning_rate": 4.973196894289229e-05, + "loss": 1.0209, + "step": 46748 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731957471706536e-05, + "loss": 1.1098, + "step": 46749 + }, + { + "epoch": 0.05, + "learning_rate": 4.973194600027664e-05, + "loss": 0.9708, + "step": 46750 + }, + { + "epoch": 0.05, + "learning_rate": 4.97319345286026e-05, + "loss": 1.1982, + "step": 46751 + }, + { + "epoch": 0.05, + "learning_rate": 4.973192305668441e-05, + "loss": 1.0341, + "step": 46752 + }, + { + "epoch": 0.05, + "learning_rate": 4.973191158452208e-05, + "loss": 1.0286, + "step": 46753 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731900112115616e-05, + "loss": 0.9246, + "step": 46754 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731888639464994e-05, + "loss": 0.4614, + "step": 46755 + }, + { + "epoch": 0.05, + "learning_rate": 4.973187716657024e-05, + "loss": 0.5219, + "step": 46756 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731865693431344e-05, + "loss": 0.6697, + "step": 46757 + }, + { + "epoch": 0.05, + "learning_rate": 4.97318542200483e-05, + "loss": 0.3688, + "step": 46758 + }, + { + "epoch": 0.05, + "learning_rate": 4.973184274642112e-05, + "loss": 0.5242, + "step": 46759 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731831272549787e-05, + "loss": 0.5399, + "step": 46760 + }, + { + "epoch": 0.05, + "learning_rate": 4.973181979843432e-05, + "loss": 0.7789, + "step": 46761 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731808324074717e-05, + "loss": 0.4979, + "step": 46762 + }, + { + "epoch": 0.05, + "learning_rate": 4.973179684947096e-05, + "loss": 0.4754, + "step": 46763 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731785374623065e-05, + "loss": 0.2627, + "step": 46764 + }, + { + "epoch": 0.05, + "learning_rate": 4.973177389953102e-05, + "loss": 0.1803, + "step": 46765 + }, + { + "epoch": 0.05, + "learning_rate": 4.973176242419485e-05, + "loss": 0.2866, + "step": 46766 + }, + { + "epoch": 0.05, + "learning_rate": 4.973175094861453e-05, + "loss": 0.6873, + "step": 46767 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731739472790064e-05, + "loss": 1.1016, + "step": 46768 + }, + { + "epoch": 0.05, + "learning_rate": 4.973172799672147e-05, + "loss": 1.3152, + "step": 46769 + }, + { + "epoch": 0.05, + "learning_rate": 4.973171652040872e-05, + "loss": 1.0043, + "step": 46770 + }, + { + "epoch": 0.05, + "learning_rate": 4.973170504385184e-05, + "loss": 0.9972, + "step": 46771 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731693567050805e-05, + "loss": 1.0042, + "step": 46772 + }, + { + "epoch": 0.05, + "learning_rate": 4.973168209000565e-05, + "loss": 1.4145, + "step": 46773 + }, + { + "epoch": 0.05, + "learning_rate": 4.973167061271634e-05, + "loss": 1.0306, + "step": 46774 + }, + { + "epoch": 0.05, + "learning_rate": 4.973165913518289e-05, + "loss": 0.7173, + "step": 46775 + }, + { + "epoch": 0.05, + "learning_rate": 4.97316476574053e-05, + "loss": 1.0803, + "step": 46776 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731636179383576e-05, + "loss": 1.1117, + "step": 46777 + }, + { + "epoch": 0.05, + "learning_rate": 4.973162470111771e-05, + "loss": 0.6644, + "step": 46778 + }, + { + "epoch": 0.05, + "learning_rate": 4.97316132226077e-05, + "loss": 1.0483, + "step": 46779 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731601743853546e-05, + "loss": 0.7209, + "step": 46780 + }, + { + "epoch": 0.05, + "learning_rate": 4.973159026485525e-05, + "loss": 0.7714, + "step": 46781 + }, + { + "epoch": 0.05, + "learning_rate": 4.973157878561283e-05, + "loss": 0.9761, + "step": 46782 + }, + { + "epoch": 0.05, + "learning_rate": 4.973156730612626e-05, + "loss": 1.1103, + "step": 46783 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731555826395545e-05, + "loss": 1.0433, + "step": 46784 + }, + { + "epoch": 0.05, + "learning_rate": 4.97315443464207e-05, + "loss": 1.3227, + "step": 46785 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731532866201713e-05, + "loss": 0.8871, + "step": 46786 + }, + { + "epoch": 0.05, + "learning_rate": 4.973152138573859e-05, + "loss": 0.8551, + "step": 46787 + }, + { + "epoch": 0.05, + "learning_rate": 4.973150990503133e-05, + "loss": 1.1944, + "step": 46788 + }, + { + "epoch": 0.05, + "learning_rate": 4.973149842407992e-05, + "loss": 1.0882, + "step": 46789 + }, + { + "epoch": 0.05, + "learning_rate": 4.973148694288437e-05, + "loss": 1.1272, + "step": 46790 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731475461444686e-05, + "loss": 1.2566, + "step": 46791 + }, + { + "epoch": 0.05, + "learning_rate": 4.973146397976087e-05, + "loss": 0.9677, + "step": 46792 + }, + { + "epoch": 0.05, + "learning_rate": 4.973145249783291e-05, + "loss": 0.7219, + "step": 46793 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731441015660814e-05, + "loss": 1.2106, + "step": 46794 + }, + { + "epoch": 0.05, + "learning_rate": 4.973142953324458e-05, + "loss": 0.9041, + "step": 46795 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731418050584197e-05, + "loss": 0.9151, + "step": 46796 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731406567679685e-05, + "loss": 0.9766, + "step": 46797 + }, + { + "epoch": 0.05, + "learning_rate": 4.973139508453104e-05, + "loss": 1.0827, + "step": 46798 + }, + { + "epoch": 0.05, + "learning_rate": 4.973138360113825e-05, + "loss": 0.8827, + "step": 46799 + }, + { + "epoch": 0.05, + "learning_rate": 4.973137211750132e-05, + "loss": 1.2311, + "step": 46800 + }, + { + "epoch": 0.05, + "learning_rate": 4.973136063362025e-05, + "loss": 1.0502, + "step": 46801 + }, + { + "epoch": 0.05, + "learning_rate": 4.973134914949506e-05, + "loss": 0.9703, + "step": 46802 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731337665125714e-05, + "loss": 1.2957, + "step": 46803 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731326180512235e-05, + "loss": 1.0583, + "step": 46804 + }, + { + "epoch": 0.05, + "learning_rate": 4.973131469565462e-05, + "loss": 1.2198, + "step": 46805 + }, + { + "epoch": 0.05, + "learning_rate": 4.973130321055287e-05, + "loss": 1.5094, + "step": 46806 + }, + { + "epoch": 0.05, + "learning_rate": 4.973129172520699e-05, + "loss": 0.9233, + "step": 46807 + }, + { + "epoch": 0.05, + "learning_rate": 4.973128023961696e-05, + "loss": 0.8252, + "step": 46808 + }, + { + "epoch": 0.05, + "learning_rate": 4.97312687537828e-05, + "loss": 0.8277, + "step": 46809 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731257267704504e-05, + "loss": 0.7632, + "step": 46810 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731245781382065e-05, + "loss": 0.957, + "step": 46811 + }, + { + "epoch": 0.05, + "learning_rate": 4.973123429481549e-05, + "loss": 0.9164, + "step": 46812 + }, + { + "epoch": 0.05, + "learning_rate": 4.973122280800478e-05, + "loss": 0.9575, + "step": 46813 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731211320949944e-05, + "loss": 0.9238, + "step": 46814 + }, + { + "epoch": 0.05, + "learning_rate": 4.973119983365096e-05, + "loss": 1.1146, + "step": 46815 + }, + { + "epoch": 0.05, + "learning_rate": 4.973118834610784e-05, + "loss": 1.1634, + "step": 46816 + }, + { + "epoch": 0.05, + "learning_rate": 4.973117685832059e-05, + "loss": 1.3754, + "step": 46817 + }, + { + "epoch": 0.05, + "learning_rate": 4.97311653702892e-05, + "loss": 1.1222, + "step": 46818 + }, + { + "epoch": 0.05, + "learning_rate": 4.973115388201368e-05, + "loss": 1.3254, + "step": 46819 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731142393494025e-05, + "loss": 1.305, + "step": 46820 + }, + { + "epoch": 0.05, + "learning_rate": 4.973113090473023e-05, + "loss": 1.116, + "step": 46821 + }, + { + "epoch": 0.05, + "learning_rate": 4.97311194157223e-05, + "loss": 1.1355, + "step": 46822 + }, + { + "epoch": 0.05, + "learning_rate": 4.973110792647023e-05, + "loss": 0.8888, + "step": 46823 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731096436974036e-05, + "loss": 0.9408, + "step": 46824 + }, + { + "epoch": 0.05, + "learning_rate": 4.97310849472337e-05, + "loss": 0.9845, + "step": 46825 + }, + { + "epoch": 0.05, + "learning_rate": 4.973107345724923e-05, + "loss": 0.8871, + "step": 46826 + }, + { + "epoch": 0.05, + "learning_rate": 4.973106196702063e-05, + "loss": 0.6896, + "step": 46827 + }, + { + "epoch": 0.05, + "learning_rate": 4.9731050476547884e-05, + "loss": 0.6807, + "step": 46828 + }, + { + "epoch": 0.05, + "learning_rate": 4.973103898583101e-05, + "loss": 0.809, + "step": 46829 + }, + { + "epoch": 0.05, + "learning_rate": 4.973102749487e-05, + "loss": 0.585, + "step": 46830 + }, + { + "epoch": 0.05, + "learning_rate": 4.973101600366487e-05, + "loss": 0.9881, + "step": 46831 + }, + { + "epoch": 0.05, + "learning_rate": 4.973100451221558e-05, + "loss": 1.2657, + "step": 46832 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730993020522175e-05, + "loss": 1.0129, + "step": 46833 + }, + { + "epoch": 0.05, + "learning_rate": 4.973098152858463e-05, + "loss": 1.4926, + "step": 46834 + }, + { + "epoch": 0.05, + "learning_rate": 4.973097003640295e-05, + "loss": 1.3421, + "step": 46835 + }, + { + "epoch": 0.05, + "learning_rate": 4.973095854397714e-05, + "loss": 1.1046, + "step": 46836 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730947051307195e-05, + "loss": 0.952, + "step": 46837 + }, + { + "epoch": 0.05, + "learning_rate": 4.973093555839312e-05, + "loss": 1.0169, + "step": 46838 + }, + { + "epoch": 0.05, + "learning_rate": 4.97309240652349e-05, + "loss": 0.9079, + "step": 46839 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730912571832554e-05, + "loss": 0.866, + "step": 46840 + }, + { + "epoch": 0.05, + "learning_rate": 4.973090107818608e-05, + "loss": 1.4544, + "step": 46841 + }, + { + "epoch": 0.05, + "learning_rate": 4.973088958429547e-05, + "loss": 2.5718, + "step": 46842 + }, + { + "epoch": 0.05, + "learning_rate": 4.973087809016072e-05, + "loss": 2.5029, + "step": 46843 + }, + { + "epoch": 0.05, + "learning_rate": 4.973086659578184e-05, + "loss": 2.5318, + "step": 46844 + }, + { + "epoch": 0.05, + "learning_rate": 4.973085510115883e-05, + "loss": 1.1463, + "step": 46845 + }, + { + "epoch": 0.05, + "learning_rate": 4.973084360629169e-05, + "loss": 0.6873, + "step": 46846 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730832111180416e-05, + "loss": 1.19, + "step": 46847 + }, + { + "epoch": 0.05, + "learning_rate": 4.973082061582501e-05, + "loss": 1.937, + "step": 46848 + }, + { + "epoch": 0.05, + "learning_rate": 4.973080912022547e-05, + "loss": 0.5436, + "step": 46849 + }, + { + "epoch": 0.05, + "learning_rate": 4.97307976243818e-05, + "loss": 0.9534, + "step": 46850 + }, + { + "epoch": 0.05, + "learning_rate": 4.973078612829399e-05, + "loss": 0.9234, + "step": 46851 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730774631962057e-05, + "loss": 1.2125, + "step": 46852 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730763135385986e-05, + "loss": 1.5367, + "step": 46853 + }, + { + "epoch": 0.05, + "learning_rate": 4.973075163856579e-05, + "loss": 1.4872, + "step": 46854 + }, + { + "epoch": 0.05, + "learning_rate": 4.973074014150146e-05, + "loss": 1.5188, + "step": 46855 + }, + { + "epoch": 0.05, + "learning_rate": 4.973072864419299e-05, + "loss": 1.0713, + "step": 46856 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730717146640405e-05, + "loss": 0.8549, + "step": 46857 + }, + { + "epoch": 0.05, + "learning_rate": 4.973070564884368e-05, + "loss": 1.5679, + "step": 46858 + }, + { + "epoch": 0.05, + "learning_rate": 4.973069415080282e-05, + "loss": 3.9997, + "step": 46859 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730682652517834e-05, + "loss": 1.4048, + "step": 46860 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730671153988715e-05, + "loss": 1.199, + "step": 46861 + }, + { + "epoch": 0.05, + "learning_rate": 4.973065965521547e-05, + "loss": 0.9233, + "step": 46862 + }, + { + "epoch": 0.05, + "learning_rate": 4.973064815619809e-05, + "loss": 1.1845, + "step": 46863 + }, + { + "epoch": 0.05, + "learning_rate": 4.973063665693658e-05, + "loss": 0.4973, + "step": 46864 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730625157430936e-05, + "loss": 0.5063, + "step": 46865 + }, + { + "epoch": 0.05, + "learning_rate": 4.973061365768117e-05, + "loss": 0.3843, + "step": 46866 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730602157687265e-05, + "loss": 0.6689, + "step": 46867 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730590657449234e-05, + "loss": 0.5886, + "step": 46868 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730579156967074e-05, + "loss": 0.5114, + "step": 46869 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730567656240786e-05, + "loss": 0.4535, + "step": 46870 + }, + { + "epoch": 0.05, + "learning_rate": 4.973055615527036e-05, + "loss": 0.419, + "step": 46871 + }, + { + "epoch": 0.05, + "learning_rate": 4.973054465405581e-05, + "loss": 1.1382, + "step": 46872 + }, + { + "epoch": 0.05, + "learning_rate": 4.973053315259714e-05, + "loss": 1.0448, + "step": 46873 + }, + { + "epoch": 0.05, + "learning_rate": 4.973052165089432e-05, + "loss": 1.1127, + "step": 46874 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730510148947384e-05, + "loss": 0.8564, + "step": 46875 + }, + { + "epoch": 0.05, + "learning_rate": 4.973049864675632e-05, + "loss": 0.5005, + "step": 46876 + }, + { + "epoch": 0.05, + "learning_rate": 4.973048714432112e-05, + "loss": 0.9025, + "step": 46877 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730475641641795e-05, + "loss": 1.0457, + "step": 46878 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730464138718344e-05, + "loss": 1.0114, + "step": 46879 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730452635550765e-05, + "loss": 1.0049, + "step": 46880 + }, + { + "epoch": 0.05, + "learning_rate": 4.973044113213905e-05, + "loss": 1.0725, + "step": 46881 + }, + { + "epoch": 0.05, + "learning_rate": 4.973042962848321e-05, + "loss": 0.9277, + "step": 46882 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730418124583236e-05, + "loss": 1.2002, + "step": 46883 + }, + { + "epoch": 0.05, + "learning_rate": 4.973040662043914e-05, + "loss": 1.0002, + "step": 46884 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730395116050914e-05, + "loss": 0.8721, + "step": 46885 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730383611418564e-05, + "loss": 0.9231, + "step": 46886 + }, + { + "epoch": 0.05, + "learning_rate": 4.973037210654208e-05, + "loss": 0.8628, + "step": 46887 + }, + { + "epoch": 0.05, + "learning_rate": 4.973036060142147e-05, + "loss": 1.1176, + "step": 46888 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730349096056736e-05, + "loss": 1.1107, + "step": 46889 + }, + { + "epoch": 0.05, + "learning_rate": 4.973033759044787e-05, + "loss": 0.3149, + "step": 46890 + }, + { + "epoch": 0.05, + "learning_rate": 4.973032608459488e-05, + "loss": 0.6816, + "step": 46891 + }, + { + "epoch": 0.05, + "learning_rate": 4.973031457849776e-05, + "loss": 0.5162, + "step": 46892 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730303072156516e-05, + "loss": 0.4873, + "step": 46893 + }, + { + "epoch": 0.05, + "learning_rate": 4.973029156557114e-05, + "loss": 0.2094, + "step": 46894 + }, + { + "epoch": 0.05, + "learning_rate": 4.973028005874164e-05, + "loss": 0.1541, + "step": 46895 + }, + { + "epoch": 0.05, + "learning_rate": 4.973026855166801e-05, + "loss": 0.1072, + "step": 46896 + }, + { + "epoch": 0.05, + "learning_rate": 4.973025704435026e-05, + "loss": 0.4122, + "step": 46897 + }, + { + "epoch": 0.05, + "learning_rate": 4.973024553678838e-05, + "loss": 0.8716, + "step": 46898 + }, + { + "epoch": 0.05, + "learning_rate": 4.973023402898237e-05, + "loss": 1.1602, + "step": 46899 + }, + { + "epoch": 0.05, + "learning_rate": 4.973022252093224e-05, + "loss": 1.0753, + "step": 46900 + }, + { + "epoch": 0.05, + "learning_rate": 4.973021101263798e-05, + "loss": 1.056, + "step": 46901 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730199504099594e-05, + "loss": 1.215, + "step": 46902 + }, + { + "epoch": 0.05, + "learning_rate": 4.973018799531708e-05, + "loss": 1.4891, + "step": 46903 + }, + { + "epoch": 0.05, + "learning_rate": 4.973017648629044e-05, + "loss": 1.4404, + "step": 46904 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730164977019676e-05, + "loss": 1.073, + "step": 46905 + }, + { + "epoch": 0.05, + "learning_rate": 4.973015346750479e-05, + "loss": 0.7088, + "step": 46906 + }, + { + "epoch": 0.05, + "learning_rate": 4.973014195774578e-05, + "loss": 0.7515, + "step": 46907 + }, + { + "epoch": 0.05, + "learning_rate": 4.973013044774263e-05, + "loss": 0.9139, + "step": 46908 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730118937495364e-05, + "loss": 1.0907, + "step": 46909 + }, + { + "epoch": 0.05, + "learning_rate": 4.973010742700397e-05, + "loss": 0.8861, + "step": 46910 + }, + { + "epoch": 0.05, + "learning_rate": 4.973009591626846e-05, + "loss": 1.304, + "step": 46911 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730084405288816e-05, + "loss": 1.1009, + "step": 46912 + }, + { + "epoch": 0.05, + "learning_rate": 4.973007289406505e-05, + "loss": 1.2148, + "step": 46913 + }, + { + "epoch": 0.05, + "learning_rate": 4.973006138259715e-05, + "loss": 0.8415, + "step": 46914 + }, + { + "epoch": 0.05, + "learning_rate": 4.973004987088514e-05, + "loss": 2.2397, + "step": 46915 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730038358928996e-05, + "loss": 3.0074, + "step": 46916 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730026846728725e-05, + "loss": 2.4613, + "step": 46917 + }, + { + "epoch": 0.05, + "learning_rate": 4.973001533428434e-05, + "loss": 1.8411, + "step": 46918 + }, + { + "epoch": 0.05, + "learning_rate": 4.9730003821595824e-05, + "loss": 2.2903, + "step": 46919 + }, + { + "epoch": 0.05, + "learning_rate": 4.972999230866319e-05, + "loss": 1.0625, + "step": 46920 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729980795486424e-05, + "loss": 0.9147, + "step": 46921 + }, + { + "epoch": 0.05, + "learning_rate": 4.972996928206554e-05, + "loss": 1.0866, + "step": 46922 + }, + { + "epoch": 0.05, + "learning_rate": 4.972995776840052e-05, + "loss": 0.6549, + "step": 46923 + }, + { + "epoch": 0.05, + "learning_rate": 4.972994625449139e-05, + "loss": 0.8927, + "step": 46924 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729934740338135e-05, + "loss": 0.9663, + "step": 46925 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729923225940756e-05, + "loss": 1.0619, + "step": 46926 + }, + { + "epoch": 0.05, + "learning_rate": 4.972991171129925e-05, + "loss": 1.3093, + "step": 46927 + }, + { + "epoch": 0.05, + "learning_rate": 4.972990019641362e-05, + "loss": 1.1976, + "step": 46928 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729888681283867e-05, + "loss": 1.2076, + "step": 46929 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729877165909994e-05, + "loss": 1.4542, + "step": 46930 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729865650292e-05, + "loss": 1.1851, + "step": 46931 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729854134429876e-05, + "loss": 1.2524, + "step": 46932 + }, + { + "epoch": 0.05, + "learning_rate": 4.972984261832364e-05, + "loss": 1.0978, + "step": 46933 + }, + { + "epoch": 0.05, + "learning_rate": 4.972983110197327e-05, + "loss": 1.1628, + "step": 46934 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729819585378776e-05, + "loss": 1.187, + "step": 46935 + }, + { + "epoch": 0.05, + "learning_rate": 4.972980806854017e-05, + "loss": 1.0774, + "step": 46936 + }, + { + "epoch": 0.05, + "learning_rate": 4.972979655145744e-05, + "loss": 0.618, + "step": 46937 + }, + { + "epoch": 0.05, + "learning_rate": 4.972978503413058e-05, + "loss": 0.8438, + "step": 46938 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729773516559606e-05, + "loss": 1.0914, + "step": 46939 + }, + { + "epoch": 0.05, + "learning_rate": 4.97297619987445e-05, + "loss": 1.1903, + "step": 46940 + }, + { + "epoch": 0.05, + "learning_rate": 4.972975048068529e-05, + "loss": 1.0966, + "step": 46941 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729738962381945e-05, + "loss": 0.5892, + "step": 46942 + }, + { + "epoch": 0.05, + "learning_rate": 4.972972744383448e-05, + "loss": 1.2508, + "step": 46943 + }, + { + "epoch": 0.05, + "learning_rate": 4.97297159250429e-05, + "loss": 0.9213, + "step": 46944 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729704406007195e-05, + "loss": 0.9855, + "step": 46945 + }, + { + "epoch": 0.05, + "learning_rate": 4.972969288672736e-05, + "loss": 1.2215, + "step": 46946 + }, + { + "epoch": 0.05, + "learning_rate": 4.972968136720342e-05, + "loss": 0.4112, + "step": 46947 + }, + { + "epoch": 0.05, + "learning_rate": 4.972966984743534e-05, + "loss": 0.6981, + "step": 46948 + }, + { + "epoch": 0.05, + "learning_rate": 4.972965832742316e-05, + "loss": 0.6046, + "step": 46949 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729646807166845e-05, + "loss": 1.3887, + "step": 46950 + }, + { + "epoch": 0.05, + "learning_rate": 4.972963528666641e-05, + "loss": 0.976, + "step": 46951 + }, + { + "epoch": 0.05, + "learning_rate": 4.972962376592186e-05, + "loss": 0.6874, + "step": 46952 + }, + { + "epoch": 0.05, + "learning_rate": 4.972961224493319e-05, + "loss": 0.358, + "step": 46953 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729600723700396e-05, + "loss": 0.6867, + "step": 46954 + }, + { + "epoch": 0.05, + "learning_rate": 4.972958920222348e-05, + "loss": 0.97, + "step": 46955 + }, + { + "epoch": 0.05, + "learning_rate": 4.972957768050245e-05, + "loss": 0.9862, + "step": 46956 + }, + { + "epoch": 0.05, + "learning_rate": 4.972956615853729e-05, + "loss": 2.9339, + "step": 46957 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729554636328026e-05, + "loss": 1.9905, + "step": 46958 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729543113874625e-05, + "loss": 0.8556, + "step": 46959 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729531591177115e-05, + "loss": 1.068, + "step": 46960 + }, + { + "epoch": 0.05, + "learning_rate": 4.972952006823548e-05, + "loss": 0.9461, + "step": 46961 + }, + { + "epoch": 0.05, + "learning_rate": 4.972950854504973e-05, + "loss": 0.8989, + "step": 46962 + }, + { + "epoch": 0.05, + "learning_rate": 4.972949702161986e-05, + "loss": 1.0685, + "step": 46963 + }, + { + "epoch": 0.05, + "learning_rate": 4.972948549794587e-05, + "loss": 0.8232, + "step": 46964 + }, + { + "epoch": 0.05, + "learning_rate": 4.972947397402776e-05, + "loss": 0.8161, + "step": 46965 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729462449865536e-05, + "loss": 0.9024, + "step": 46966 + }, + { + "epoch": 0.05, + "learning_rate": 4.972945092545919e-05, + "loss": 1.3549, + "step": 46967 + }, + { + "epoch": 0.05, + "learning_rate": 4.972943940080872e-05, + "loss": 1.4177, + "step": 46968 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729427875914134e-05, + "loss": 1.0952, + "step": 46969 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729416350775435e-05, + "loss": 1.0067, + "step": 46970 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729404825392614e-05, + "loss": 0.9874, + "step": 46971 + }, + { + "epoch": 0.05, + "learning_rate": 4.972939329976568e-05, + "loss": 0.8352, + "step": 46972 + }, + { + "epoch": 0.05, + "learning_rate": 4.972938177389462e-05, + "loss": 0.6069, + "step": 46973 + }, + { + "epoch": 0.05, + "learning_rate": 4.972937024777944e-05, + "loss": 0.9588, + "step": 46974 + }, + { + "epoch": 0.05, + "learning_rate": 4.972935872142015e-05, + "loss": 1.0045, + "step": 46975 + }, + { + "epoch": 0.05, + "learning_rate": 4.972934719481673e-05, + "loss": 0.7635, + "step": 46976 + }, + { + "epoch": 0.05, + "learning_rate": 4.972933566796921e-05, + "loss": 0.5785, + "step": 46977 + }, + { + "epoch": 0.05, + "learning_rate": 4.972932414087756e-05, + "loss": 1.1007, + "step": 46978 + }, + { + "epoch": 0.05, + "learning_rate": 4.97293126135418e-05, + "loss": 1.1936, + "step": 46979 + }, + { + "epoch": 0.05, + "learning_rate": 4.972930108596192e-05, + "loss": 1.0258, + "step": 46980 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729289558137915e-05, + "loss": 1.1514, + "step": 46981 + }, + { + "epoch": 0.05, + "learning_rate": 4.97292780300698e-05, + "loss": 1.2222, + "step": 46982 + }, + { + "epoch": 0.05, + "learning_rate": 4.972926650175756e-05, + "loss": 1.1286, + "step": 46983 + }, + { + "epoch": 0.05, + "learning_rate": 4.972925497320122e-05, + "loss": 0.7899, + "step": 46984 + }, + { + "epoch": 0.05, + "learning_rate": 4.972924344440075e-05, + "loss": 0.7675, + "step": 46985 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729231915356166e-05, + "loss": 1.0589, + "step": 46986 + }, + { + "epoch": 0.05, + "learning_rate": 4.972922038606747e-05, + "loss": 1.1395, + "step": 46987 + }, + { + "epoch": 0.05, + "learning_rate": 4.972920885653465e-05, + "loss": 1.1627, + "step": 46988 + }, + { + "epoch": 0.05, + "learning_rate": 4.972919732675772e-05, + "loss": 0.7004, + "step": 46989 + }, + { + "epoch": 0.05, + "learning_rate": 4.972918579673668e-05, + "loss": 0.8704, + "step": 46990 + }, + { + "epoch": 0.05, + "learning_rate": 4.972917426647151e-05, + "loss": 0.9194, + "step": 46991 + }, + { + "epoch": 0.05, + "learning_rate": 4.972916273596223e-05, + "loss": 0.6611, + "step": 46992 + }, + { + "epoch": 0.05, + "learning_rate": 4.972915120520883e-05, + "loss": 1.2238, + "step": 46993 + }, + { + "epoch": 0.05, + "learning_rate": 4.972913967421132e-05, + "loss": 0.9342, + "step": 46994 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729128142969696e-05, + "loss": 1.2023, + "step": 46995 + }, + { + "epoch": 0.05, + "learning_rate": 4.972911661148395e-05, + "loss": 0.784, + "step": 46996 + }, + { + "epoch": 0.05, + "learning_rate": 4.972910507975409e-05, + "loss": 1.2068, + "step": 46997 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729093547780116e-05, + "loss": 0.9346, + "step": 46998 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729082015562023e-05, + "loss": 1.0641, + "step": 46999 + }, + { + "epoch": 0.05, + "learning_rate": 4.972907048309982e-05, + "loss": 1.2464, + "step": 47000 + }, + { + "epoch": 0.05, + "eval_loss": 1.0832467079162598, + "eval_runtime": 125.52, + "eval_samples_per_second": 11.034, + "eval_steps_per_second": 5.521, + "step": 47000 + }, + { + "epoch": 0.05, + "learning_rate": 4.97290589503935e-05, + "loss": 1.199, + "step": 47001 + }, + { + "epoch": 0.05, + "learning_rate": 4.9729047417443064e-05, + "loss": 1.1167, + "step": 47002 + }, + { + "epoch": 0.05, + "learning_rate": 4.972903588424852e-05, + "loss": 0.9475, + "step": 47003 + }, + { + "epoch": 0.05, + "learning_rate": 4.972902435080985e-05, + "loss": 0.7281, + "step": 47004 + }, + { + "epoch": 0.05, + "learning_rate": 4.972901281712708e-05, + "loss": 0.7719, + "step": 47005 + }, + { + "epoch": 0.05, + "learning_rate": 4.972900128320018e-05, + "loss": 1.1034, + "step": 47006 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728989749029176e-05, + "loss": 0.913, + "step": 47007 + }, + { + "epoch": 0.05, + "learning_rate": 4.972897821461405e-05, + "loss": 0.9636, + "step": 47008 + }, + { + "epoch": 0.05, + "learning_rate": 4.972896667995482e-05, + "loss": 1.096, + "step": 47009 + }, + { + "epoch": 0.05, + "learning_rate": 4.972895514505147e-05, + "loss": 1.259, + "step": 47010 + }, + { + "epoch": 0.05, + "learning_rate": 4.972894360990401e-05, + "loss": 1.6639, + "step": 47011 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728932074512436e-05, + "loss": 1.492, + "step": 47012 + }, + { + "epoch": 0.05, + "learning_rate": 4.972892053887674e-05, + "loss": 1.3719, + "step": 47013 + }, + { + "epoch": 0.05, + "learning_rate": 4.972890900299694e-05, + "loss": 1.2596, + "step": 47014 + }, + { + "epoch": 0.05, + "learning_rate": 4.972889746687302e-05, + "loss": 1.2879, + "step": 47015 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728885930504985e-05, + "loss": 0.5291, + "step": 47016 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728874393892845e-05, + "loss": 1.1409, + "step": 47017 + }, + { + "epoch": 0.05, + "learning_rate": 4.972886285703659e-05, + "loss": 1.2415, + "step": 47018 + }, + { + "epoch": 0.05, + "learning_rate": 4.972885131993622e-05, + "loss": 1.1618, + "step": 47019 + }, + { + "epoch": 0.05, + "learning_rate": 4.972883978259174e-05, + "loss": 1.0615, + "step": 47020 + }, + { + "epoch": 0.05, + "learning_rate": 4.972882824500314e-05, + "loss": 0.8954, + "step": 47021 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728816707170434e-05, + "loss": 0.7671, + "step": 47022 + }, + { + "epoch": 0.05, + "learning_rate": 4.972880516909362e-05, + "loss": 1.1007, + "step": 47023 + }, + { + "epoch": 0.05, + "learning_rate": 4.972879363077269e-05, + "loss": 1.322, + "step": 47024 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728782092207635e-05, + "loss": 1.2125, + "step": 47025 + }, + { + "epoch": 0.05, + "learning_rate": 4.972877055339848e-05, + "loss": 0.9197, + "step": 47026 + }, + { + "epoch": 0.05, + "learning_rate": 4.972875901434522e-05, + "loss": 0.6871, + "step": 47027 + }, + { + "epoch": 0.05, + "learning_rate": 4.972874747504783e-05, + "loss": 0.7462, + "step": 47028 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728735935506344e-05, + "loss": 0.8313, + "step": 47029 + }, + { + "epoch": 0.05, + "learning_rate": 4.972872439572074e-05, + "loss": 0.7531, + "step": 47030 + }, + { + "epoch": 0.05, + "learning_rate": 4.972871285569102e-05, + "loss": 0.7917, + "step": 47031 + }, + { + "epoch": 0.05, + "learning_rate": 4.97287013154172e-05, + "loss": 1.298, + "step": 47032 + }, + { + "epoch": 0.05, + "learning_rate": 4.972868977489926e-05, + "loss": 1.6845, + "step": 47033 + }, + { + "epoch": 0.05, + "learning_rate": 4.97286782341372e-05, + "loss": 1.0196, + "step": 47034 + }, + { + "epoch": 0.05, + "learning_rate": 4.972866669313105e-05, + "loss": 0.9721, + "step": 47035 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728655151880774e-05, + "loss": 0.9609, + "step": 47036 + }, + { + "epoch": 0.05, + "learning_rate": 4.97286436103864e-05, + "loss": 1.3054, + "step": 47037 + }, + { + "epoch": 0.05, + "learning_rate": 4.97286320686479e-05, + "loss": 1.3264, + "step": 47038 + }, + { + "epoch": 0.05, + "learning_rate": 4.97286205266653e-05, + "loss": 1.4144, + "step": 47039 + }, + { + "epoch": 0.05, + "learning_rate": 4.972860898443858e-05, + "loss": 1.1372, + "step": 47040 + }, + { + "epoch": 0.05, + "learning_rate": 4.972859744196776e-05, + "loss": 0.9477, + "step": 47041 + }, + { + "epoch": 0.05, + "learning_rate": 4.972858589925282e-05, + "loss": 1.1287, + "step": 47042 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728574356293776e-05, + "loss": 1.1192, + "step": 47043 + }, + { + "epoch": 0.05, + "learning_rate": 4.972856281309062e-05, + "loss": 0.973, + "step": 47044 + }, + { + "epoch": 0.05, + "learning_rate": 4.972855126964336e-05, + "loss": 0.9251, + "step": 47045 + }, + { + "epoch": 0.05, + "learning_rate": 4.972853972595198e-05, + "loss": 0.7252, + "step": 47046 + }, + { + "epoch": 0.05, + "learning_rate": 4.97285281820165e-05, + "loss": 0.8821, + "step": 47047 + }, + { + "epoch": 0.05, + "learning_rate": 4.972851663783691e-05, + "loss": 0.8827, + "step": 47048 + }, + { + "epoch": 0.05, + "learning_rate": 4.97285050934132e-05, + "loss": 0.9439, + "step": 47049 + }, + { + "epoch": 0.05, + "learning_rate": 4.972849354874539e-05, + "loss": 0.7531, + "step": 47050 + }, + { + "epoch": 0.05, + "learning_rate": 4.972848200383347e-05, + "loss": 1.0091, + "step": 47051 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728470458677436e-05, + "loss": 0.8216, + "step": 47052 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728458913277295e-05, + "loss": 1.2661, + "step": 47053 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728447367633046e-05, + "loss": 0.9695, + "step": 47054 + }, + { + "epoch": 0.05, + "learning_rate": 4.972843582174468e-05, + "loss": 1.2277, + "step": 47055 + }, + { + "epoch": 0.05, + "learning_rate": 4.972842427561222e-05, + "loss": 1.1753, + "step": 47056 + }, + { + "epoch": 0.05, + "learning_rate": 4.972841272923564e-05, + "loss": 0.9361, + "step": 47057 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728401182614956e-05, + "loss": 1.1397, + "step": 47058 + }, + { + "epoch": 0.05, + "learning_rate": 4.972838963575017e-05, + "loss": 1.0203, + "step": 47059 + }, + { + "epoch": 0.05, + "learning_rate": 4.972837808864126e-05, + "loss": 1.1982, + "step": 47060 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728366541288257e-05, + "loss": 1.0815, + "step": 47061 + }, + { + "epoch": 0.05, + "learning_rate": 4.972835499369114e-05, + "loss": 1.6886, + "step": 47062 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728343445849914e-05, + "loss": 1.4611, + "step": 47063 + }, + { + "epoch": 0.05, + "learning_rate": 4.972833189776458e-05, + "loss": 0.9569, + "step": 47064 + }, + { + "epoch": 0.05, + "learning_rate": 4.972832034943514e-05, + "loss": 1.2162, + "step": 47065 + }, + { + "epoch": 0.05, + "learning_rate": 4.972830880086159e-05, + "loss": 0.8322, + "step": 47066 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728297252043945e-05, + "loss": 1.0358, + "step": 47067 + }, + { + "epoch": 0.05, + "learning_rate": 4.972828570298218e-05, + "loss": 1.2759, + "step": 47068 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728274153676305e-05, + "loss": 0.802, + "step": 47069 + }, + { + "epoch": 0.05, + "learning_rate": 4.972826260412633e-05, + "loss": 0.2533, + "step": 47070 + }, + { + "epoch": 0.05, + "learning_rate": 4.972825105433224e-05, + "loss": 0.2427, + "step": 47071 + }, + { + "epoch": 0.05, + "learning_rate": 4.972823950429406e-05, + "loss": 0.5786, + "step": 47072 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728227954011755e-05, + "loss": 0.3443, + "step": 47073 + }, + { + "epoch": 0.05, + "learning_rate": 4.972821640348535e-05, + "loss": 1.1077, + "step": 47074 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728204852714845e-05, + "loss": 1.0583, + "step": 47075 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728193301700224e-05, + "loss": 0.9253, + "step": 47076 + }, + { + "epoch": 0.05, + "learning_rate": 4.97281817504415e-05, + "loss": 0.9445, + "step": 47077 + }, + { + "epoch": 0.05, + "learning_rate": 4.972817019893867e-05, + "loss": 1.2061, + "step": 47078 + }, + { + "epoch": 0.05, + "learning_rate": 4.972815864719173e-05, + "loss": 1.1884, + "step": 47079 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728147095200694e-05, + "loss": 1.3653, + "step": 47080 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728135542965546e-05, + "loss": 0.9087, + "step": 47081 + }, + { + "epoch": 0.05, + "learning_rate": 4.97281239904863e-05, + "loss": 0.4792, + "step": 47082 + }, + { + "epoch": 0.05, + "learning_rate": 4.972811243776293e-05, + "loss": 1.0403, + "step": 47083 + }, + { + "epoch": 0.05, + "learning_rate": 4.972810088479547e-05, + "loss": 1.2227, + "step": 47084 + }, + { + "epoch": 0.05, + "learning_rate": 4.972808933158391e-05, + "loss": 1.0643, + "step": 47085 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728077778128226e-05, + "loss": 1.1178, + "step": 47086 + }, + { + "epoch": 0.05, + "learning_rate": 4.972806622442845e-05, + "loss": 0.6254, + "step": 47087 + }, + { + "epoch": 0.05, + "learning_rate": 4.972805467048456e-05, + "loss": 0.3162, + "step": 47088 + }, + { + "epoch": 0.05, + "learning_rate": 4.972804311629657e-05, + "loss": 0.9986, + "step": 47089 + }, + { + "epoch": 0.05, + "learning_rate": 4.972803156186447e-05, + "loss": 0.9555, + "step": 47090 + }, + { + "epoch": 0.05, + "learning_rate": 4.972802000718828e-05, + "loss": 1.1352, + "step": 47091 + }, + { + "epoch": 0.05, + "learning_rate": 4.9728008452267974e-05, + "loss": 0.7067, + "step": 47092 + }, + { + "epoch": 0.05, + "learning_rate": 4.972799689710356e-05, + "loss": 1.0502, + "step": 47093 + }, + { + "epoch": 0.05, + "learning_rate": 4.972798534169505e-05, + "loss": 0.9527, + "step": 47094 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727973786042437e-05, + "loss": 0.8445, + "step": 47095 + }, + { + "epoch": 0.05, + "learning_rate": 4.972796223014571e-05, + "loss": 0.9892, + "step": 47096 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727950674004884e-05, + "loss": 1.0811, + "step": 47097 + }, + { + "epoch": 0.05, + "learning_rate": 4.972793911761996e-05, + "loss": 1.3623, + "step": 47098 + }, + { + "epoch": 0.05, + "learning_rate": 4.972792756099092e-05, + "loss": 0.9791, + "step": 47099 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727916004117785e-05, + "loss": 1.0467, + "step": 47100 + }, + { + "epoch": 0.05, + "learning_rate": 4.972790444700054e-05, + "loss": 0.8634, + "step": 47101 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727892889639206e-05, + "loss": 0.9175, + "step": 47102 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727881332033754e-05, + "loss": 0.8036, + "step": 47103 + }, + { + "epoch": 0.05, + "learning_rate": 4.972786977418421e-05, + "loss": 0.9709, + "step": 47104 + }, + { + "epoch": 0.05, + "learning_rate": 4.972785821609055e-05, + "loss": 1.0236, + "step": 47105 + }, + { + "epoch": 0.05, + "learning_rate": 4.97278466577528e-05, + "loss": 1.043, + "step": 47106 + }, + { + "epoch": 0.05, + "learning_rate": 4.972783509917094e-05, + "loss": 0.9666, + "step": 47107 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727823540344975e-05, + "loss": 1.3567, + "step": 47108 + }, + { + "epoch": 0.05, + "learning_rate": 4.972781198127491e-05, + "loss": 1.053, + "step": 47109 + }, + { + "epoch": 0.05, + "learning_rate": 4.972780042196074e-05, + "loss": 0.9548, + "step": 47110 + }, + { + "epoch": 0.05, + "learning_rate": 4.972778886240247e-05, + "loss": 1.2804, + "step": 47111 + }, + { + "epoch": 0.05, + "learning_rate": 4.97277773026001e-05, + "loss": 1.1052, + "step": 47112 + }, + { + "epoch": 0.05, + "learning_rate": 4.972776574255362e-05, + "loss": 0.8313, + "step": 47113 + }, + { + "epoch": 0.05, + "learning_rate": 4.972775418226305e-05, + "loss": 1.0177, + "step": 47114 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727742621728366e-05, + "loss": 0.8757, + "step": 47115 + }, + { + "epoch": 0.05, + "learning_rate": 4.972773106094959e-05, + "loss": 0.6286, + "step": 47116 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727719499926706e-05, + "loss": 0.3499, + "step": 47117 + }, + { + "epoch": 0.05, + "learning_rate": 4.972770793865972e-05, + "loss": 0.2092, + "step": 47118 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727696377148634e-05, + "loss": 0.1855, + "step": 47119 + }, + { + "epoch": 0.05, + "learning_rate": 4.972768481539345e-05, + "loss": 0.334, + "step": 47120 + }, + { + "epoch": 0.05, + "learning_rate": 4.972767325339416e-05, + "loss": 0.5049, + "step": 47121 + }, + { + "epoch": 0.05, + "learning_rate": 4.972766169115077e-05, + "loss": 1.2995, + "step": 47122 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727650128663286e-05, + "loss": 1.3047, + "step": 47123 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727638565931686e-05, + "loss": 1.3134, + "step": 47124 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727627002956e-05, + "loss": 1.0468, + "step": 47125 + }, + { + "epoch": 0.05, + "learning_rate": 4.972761543973621e-05, + "loss": 0.9008, + "step": 47126 + }, + { + "epoch": 0.05, + "learning_rate": 4.972760387627232e-05, + "loss": 1.2408, + "step": 47127 + }, + { + "epoch": 0.05, + "learning_rate": 4.972759231256432e-05, + "loss": 1.3808, + "step": 47128 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727580748612225e-05, + "loss": 1.7073, + "step": 47129 + }, + { + "epoch": 0.05, + "learning_rate": 4.972756918441604e-05, + "loss": 1.2842, + "step": 47130 + }, + { + "epoch": 0.05, + "learning_rate": 4.972755761997574e-05, + "loss": 1.2236, + "step": 47131 + }, + { + "epoch": 0.05, + "learning_rate": 4.972754605529134e-05, + "loss": 0.6306, + "step": 47132 + }, + { + "epoch": 0.05, + "learning_rate": 4.972753449036285e-05, + "loss": 0.3848, + "step": 47133 + }, + { + "epoch": 0.05, + "learning_rate": 4.972752292519026e-05, + "loss": 0.4331, + "step": 47134 + }, + { + "epoch": 0.05, + "learning_rate": 4.972751135977356e-05, + "loss": 0.9583, + "step": 47135 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727499794112767e-05, + "loss": 1.0414, + "step": 47136 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727488228207876e-05, + "loss": 0.9703, + "step": 47137 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727476662058884e-05, + "loss": 0.8854, + "step": 47138 + }, + { + "epoch": 0.05, + "learning_rate": 4.972746509566579e-05, + "loss": 0.8729, + "step": 47139 + }, + { + "epoch": 0.05, + "learning_rate": 4.97274535290286e-05, + "loss": 1.0251, + "step": 47140 + }, + { + "epoch": 0.05, + "learning_rate": 4.972744196214731e-05, + "loss": 1.1406, + "step": 47141 + }, + { + "epoch": 0.05, + "learning_rate": 4.972743039502192e-05, + "loss": 1.1545, + "step": 47142 + }, + { + "epoch": 0.05, + "learning_rate": 4.972741882765244e-05, + "loss": 1.096, + "step": 47143 + }, + { + "epoch": 0.05, + "learning_rate": 4.972740726003885e-05, + "loss": 1.3844, + "step": 47144 + }, + { + "epoch": 0.05, + "learning_rate": 4.972739569218117e-05, + "loss": 1.0848, + "step": 47145 + }, + { + "epoch": 0.05, + "learning_rate": 4.972738412407938e-05, + "loss": 1.1154, + "step": 47146 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727372555733496e-05, + "loss": 0.9417, + "step": 47147 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727360987143525e-05, + "loss": 0.9582, + "step": 47148 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727349418309445e-05, + "loss": 1.1796, + "step": 47149 + }, + { + "epoch": 0.05, + "learning_rate": 4.972733784923127e-05, + "loss": 1.0774, + "step": 47150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727326279908995e-05, + "loss": 1.3594, + "step": 47151 + }, + { + "epoch": 0.05, + "learning_rate": 4.972731471034262e-05, + "loss": 1.0133, + "step": 47152 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727303140532153e-05, + "loss": 1.0505, + "step": 47153 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727291570477594e-05, + "loss": 1.132, + "step": 47154 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727280000178927e-05, + "loss": 0.81, + "step": 47155 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727268429636165e-05, + "loss": 0.9739, + "step": 47156 + }, + { + "epoch": 0.05, + "learning_rate": 4.972725685884931e-05, + "loss": 1.1225, + "step": 47157 + }, + { + "epoch": 0.05, + "learning_rate": 4.972724528781835e-05, + "loss": 0.9021, + "step": 47158 + }, + { + "epoch": 0.05, + "learning_rate": 4.97272337165433e-05, + "loss": 1.4298, + "step": 47159 + }, + { + "epoch": 0.05, + "learning_rate": 4.972722214502416e-05, + "loss": 1.1587, + "step": 47160 + }, + { + "epoch": 0.05, + "learning_rate": 4.972721057326091e-05, + "loss": 0.9081, + "step": 47161 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727199001253566e-05, + "loss": 0.8032, + "step": 47162 + }, + { + "epoch": 0.05, + "learning_rate": 4.972718742900213e-05, + "loss": 1.3094, + "step": 47163 + }, + { + "epoch": 0.05, + "learning_rate": 4.972717585650659e-05, + "loss": 1.0887, + "step": 47164 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727164283766956e-05, + "loss": 0.5378, + "step": 47165 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727152710783235e-05, + "loss": 0.7458, + "step": 47166 + }, + { + "epoch": 0.05, + "learning_rate": 4.972714113755541e-05, + "loss": 1.0846, + "step": 47167 + }, + { + "epoch": 0.05, + "learning_rate": 4.972712956408349e-05, + "loss": 1.0236, + "step": 47168 + }, + { + "epoch": 0.05, + "learning_rate": 4.972711799036747e-05, + "loss": 0.3378, + "step": 47169 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727106416407363e-05, + "loss": 0.2653, + "step": 47170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727094842203156e-05, + "loss": 0.6023, + "step": 47171 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727083267754854e-05, + "loss": 0.8893, + "step": 47172 + }, + { + "epoch": 0.05, + "learning_rate": 4.972707169306246e-05, + "loss": 1.1012, + "step": 47173 + }, + { + "epoch": 0.05, + "learning_rate": 4.972706011812596e-05, + "loss": 0.7776, + "step": 47174 + }, + { + "epoch": 0.05, + "learning_rate": 4.972704854294537e-05, + "loss": 0.6485, + "step": 47175 + }, + { + "epoch": 0.05, + "learning_rate": 4.972703696752069e-05, + "loss": 0.197, + "step": 47176 + }, + { + "epoch": 0.05, + "learning_rate": 4.972702539185191e-05, + "loss": 1.1252, + "step": 47177 + }, + { + "epoch": 0.05, + "learning_rate": 4.972701381593904e-05, + "loss": 0.8259, + "step": 47178 + }, + { + "epoch": 0.05, + "learning_rate": 4.972700223978207e-05, + "loss": 0.4899, + "step": 47179 + }, + { + "epoch": 0.05, + "learning_rate": 4.972699066338101e-05, + "loss": 0.841, + "step": 47180 + }, + { + "epoch": 0.05, + "learning_rate": 4.972697908673586e-05, + "loss": 1.2298, + "step": 47181 + }, + { + "epoch": 0.05, + "learning_rate": 4.972696750984661e-05, + "loss": 0.964, + "step": 47182 + }, + { + "epoch": 0.05, + "learning_rate": 4.972695593271326e-05, + "loss": 1.0068, + "step": 47183 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726944355335824e-05, + "loss": 1.1285, + "step": 47184 + }, + { + "epoch": 0.05, + "learning_rate": 4.972693277771429e-05, + "loss": 0.8755, + "step": 47185 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726921199848666e-05, + "loss": 1.0966, + "step": 47186 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726909621738946e-05, + "loss": 0.8594, + "step": 47187 + }, + { + "epoch": 0.05, + "learning_rate": 4.972689804338513e-05, + "loss": 1.061, + "step": 47188 + }, + { + "epoch": 0.05, + "learning_rate": 4.972688646478723e-05, + "loss": 1.0854, + "step": 47189 + }, + { + "epoch": 0.05, + "learning_rate": 4.972687488594522e-05, + "loss": 0.9095, + "step": 47190 + }, + { + "epoch": 0.05, + "learning_rate": 4.972686330685913e-05, + "loss": 1.2761, + "step": 47191 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726851727528944e-05, + "loss": 1.3857, + "step": 47192 + }, + { + "epoch": 0.05, + "learning_rate": 4.972684014795466e-05, + "loss": 1.5303, + "step": 47193 + }, + { + "epoch": 0.05, + "learning_rate": 4.972682856813629e-05, + "loss": 1.0708, + "step": 47194 + }, + { + "epoch": 0.05, + "learning_rate": 4.972681698807382e-05, + "loss": 0.8411, + "step": 47195 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726805407767265e-05, + "loss": 0.7456, + "step": 47196 + }, + { + "epoch": 0.05, + "learning_rate": 4.972679382721661e-05, + "loss": 1.2334, + "step": 47197 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726782246421865e-05, + "loss": 0.929, + "step": 47198 + }, + { + "epoch": 0.05, + "learning_rate": 4.972677066538303e-05, + "loss": 0.8006, + "step": 47199 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726759084100106e-05, + "loss": 1.3269, + "step": 47200 + }, + { + "epoch": 0.05, + "learning_rate": 4.972674750257308e-05, + "loss": 1.5196, + "step": 47201 + }, + { + "epoch": 0.05, + "learning_rate": 4.972673592080197e-05, + "loss": 1.5504, + "step": 47202 + }, + { + "epoch": 0.05, + "learning_rate": 4.972672433878677e-05, + "loss": 1.2319, + "step": 47203 + }, + { + "epoch": 0.05, + "learning_rate": 4.972671275652747e-05, + "loss": 0.8895, + "step": 47204 + }, + { + "epoch": 0.05, + "learning_rate": 4.972670117402408e-05, + "loss": 0.7244, + "step": 47205 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726689591276596e-05, + "loss": 0.9974, + "step": 47206 + }, + { + "epoch": 0.05, + "learning_rate": 4.972667800828503e-05, + "loss": 0.8634, + "step": 47207 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726666425049366e-05, + "loss": 0.4326, + "step": 47208 + }, + { + "epoch": 0.05, + "learning_rate": 4.972665484156961e-05, + "loss": 0.798, + "step": 47209 + }, + { + "epoch": 0.05, + "learning_rate": 4.972664325784577e-05, + "loss": 1.2236, + "step": 47210 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726631673877835e-05, + "loss": 0.9664, + "step": 47211 + }, + { + "epoch": 0.05, + "learning_rate": 4.972662008966581e-05, + "loss": 1.1837, + "step": 47212 + }, + { + "epoch": 0.05, + "learning_rate": 4.972660850520969e-05, + "loss": 1.1316, + "step": 47213 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726596920509486e-05, + "loss": 0.8749, + "step": 47214 + }, + { + "epoch": 0.05, + "learning_rate": 4.972658533556518e-05, + "loss": 1.0425, + "step": 47215 + }, + { + "epoch": 0.05, + "learning_rate": 4.972657375037679e-05, + "loss": 1.0562, + "step": 47216 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726562164944313e-05, + "loss": 0.9311, + "step": 47217 + }, + { + "epoch": 0.05, + "learning_rate": 4.972655057926774e-05, + "loss": 0.9352, + "step": 47218 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726538993347084e-05, + "loss": 0.8838, + "step": 47219 + }, + { + "epoch": 0.05, + "learning_rate": 4.972652740718233e-05, + "loss": 0.9446, + "step": 47220 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726515820773496e-05, + "loss": 1.1447, + "step": 47221 + }, + { + "epoch": 0.05, + "learning_rate": 4.972650423412056e-05, + "loss": 0.728, + "step": 47222 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726492647223544e-05, + "loss": 1.0597, + "step": 47223 + }, + { + "epoch": 0.05, + "learning_rate": 4.972648106008244e-05, + "loss": 1.0488, + "step": 47224 + }, + { + "epoch": 0.05, + "learning_rate": 4.972646947269724e-05, + "loss": 1.273, + "step": 47225 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726457885067954e-05, + "loss": 1.9392, + "step": 47226 + }, + { + "epoch": 0.05, + "learning_rate": 4.972644629719458e-05, + "loss": 5.1628, + "step": 47227 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726434709077116e-05, + "loss": 5.0879, + "step": 47228 + }, + { + "epoch": 0.05, + "learning_rate": 4.972642312071556e-05, + "loss": 4.9184, + "step": 47229 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726411532109915e-05, + "loss": 5.4188, + "step": 47230 + }, + { + "epoch": 0.05, + "learning_rate": 4.972639994326018e-05, + "loss": 5.134, + "step": 47231 + }, + { + "epoch": 0.05, + "learning_rate": 4.972638835416636e-05, + "loss": 5.7264, + "step": 47232 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726376764828454e-05, + "loss": 5.2432, + "step": 47233 + }, + { + "epoch": 0.05, + "learning_rate": 4.972636517524646e-05, + "loss": 5.5391, + "step": 47234 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726353585420374e-05, + "loss": 5.8012, + "step": 47235 + }, + { + "epoch": 0.05, + "learning_rate": 4.97263419953502e-05, + "loss": 5.6858, + "step": 47236 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726330405035937e-05, + "loss": 5.8638, + "step": 47237 + }, + { + "epoch": 0.05, + "learning_rate": 4.972631881447758e-05, + "loss": 5.7448, + "step": 47238 + }, + { + "epoch": 0.05, + "learning_rate": 4.972630722367515e-05, + "loss": 5.8938, + "step": 47239 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726295632628625e-05, + "loss": 5.915, + "step": 47240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726284041338015e-05, + "loss": 3.8882, + "step": 47241 + }, + { + "epoch": 0.05, + "learning_rate": 4.972627244980331e-05, + "loss": 0.9929, + "step": 47242 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726260858024524e-05, + "loss": 0.8271, + "step": 47243 + }, + { + "epoch": 0.05, + "learning_rate": 4.972624926600164e-05, + "loss": 1.1974, + "step": 47244 + }, + { + "epoch": 0.05, + "learning_rate": 4.972623767373469e-05, + "loss": 1.0447, + "step": 47245 + }, + { + "epoch": 0.05, + "learning_rate": 4.972622608122364e-05, + "loss": 1.1897, + "step": 47246 + }, + { + "epoch": 0.05, + "learning_rate": 4.97262144884685e-05, + "loss": 0.886, + "step": 47247 + }, + { + "epoch": 0.05, + "learning_rate": 4.972620289546928e-05, + "loss": 1.0516, + "step": 47248 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726191302225964e-05, + "loss": 1.3022, + "step": 47249 + }, + { + "epoch": 0.05, + "learning_rate": 4.972617970873858e-05, + "loss": 0.8526, + "step": 47250 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726168115007095e-05, + "loss": 0.9775, + "step": 47251 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726156521031526e-05, + "loss": 1.2824, + "step": 47252 + }, + { + "epoch": 0.05, + "learning_rate": 4.972614492681187e-05, + "loss": 1.2457, + "step": 47253 + }, + { + "epoch": 0.05, + "learning_rate": 4.972613333234813e-05, + "loss": 0.675, + "step": 47254 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726121737640305e-05, + "loss": 0.9908, + "step": 47255 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726110142688384e-05, + "loss": 1.0396, + "step": 47256 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726098547492396e-05, + "loss": 0.9609, + "step": 47257 + }, + { + "epoch": 0.05, + "learning_rate": 4.972608695205231e-05, + "loss": 0.6992, + "step": 47258 + }, + { + "epoch": 0.05, + "learning_rate": 4.972607535636814e-05, + "loss": 0.9127, + "step": 47259 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726063760439885e-05, + "loss": 1.0654, + "step": 47260 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726052164267546e-05, + "loss": 0.7891, + "step": 47261 + }, + { + "epoch": 0.05, + "learning_rate": 4.972604056785112e-05, + "loss": 0.7047, + "step": 47262 + }, + { + "epoch": 0.05, + "learning_rate": 4.972602897119061e-05, + "loss": 0.3923, + "step": 47263 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726017374286015e-05, + "loss": 0.3969, + "step": 47264 + }, + { + "epoch": 0.05, + "learning_rate": 4.972600577713733e-05, + "loss": 0.4803, + "step": 47265 + }, + { + "epoch": 0.05, + "learning_rate": 4.972599417974457e-05, + "loss": 0.4822, + "step": 47266 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725982582107715e-05, + "loss": 0.3442, + "step": 47267 + }, + { + "epoch": 0.05, + "learning_rate": 4.972597098422678e-05, + "loss": 0.4049, + "step": 47268 + }, + { + "epoch": 0.05, + "learning_rate": 4.972595938610176e-05, + "loss": 0.3307, + "step": 47269 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725947787732665e-05, + "loss": 0.4603, + "step": 47270 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725936189119475e-05, + "loss": 0.8771, + "step": 47271 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725924590262204e-05, + "loss": 1.0224, + "step": 47272 + }, + { + "epoch": 0.05, + "learning_rate": 4.972591299116085e-05, + "loss": 0.9841, + "step": 47273 + }, + { + "epoch": 0.05, + "learning_rate": 4.972590139181541e-05, + "loss": 0.9809, + "step": 47274 + }, + { + "epoch": 0.05, + "learning_rate": 4.972588979222589e-05, + "loss": 0.7906, + "step": 47275 + }, + { + "epoch": 0.05, + "learning_rate": 4.972587819239228e-05, + "loss": 1.0337, + "step": 47276 + }, + { + "epoch": 0.05, + "learning_rate": 4.972586659231459e-05, + "loss": 1.1348, + "step": 47277 + }, + { + "epoch": 0.05, + "learning_rate": 4.972585499199283e-05, + "loss": 0.9629, + "step": 47278 + }, + { + "epoch": 0.05, + "learning_rate": 4.972584339142697e-05, + "loss": 0.39, + "step": 47279 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725831790617025e-05, + "loss": 0.1751, + "step": 47280 + }, + { + "epoch": 0.05, + "learning_rate": 4.972582018956301e-05, + "loss": 0.0414, + "step": 47281 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725808588264896e-05, + "loss": 0.4028, + "step": 47282 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725796986722714e-05, + "loss": 0.1399, + "step": 47283 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725785384936444e-05, + "loss": 0.1782, + "step": 47284 + }, + { + "epoch": 0.05, + "learning_rate": 4.972577378290609e-05, + "loss": 0.2117, + "step": 47285 + }, + { + "epoch": 0.05, + "learning_rate": 4.972576218063166e-05, + "loss": 0.9372, + "step": 47286 + }, + { + "epoch": 0.05, + "learning_rate": 4.972575057811314e-05, + "loss": 0.961, + "step": 47287 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725738975350546e-05, + "loss": 1.1006, + "step": 47288 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725727372343864e-05, + "loss": 1.0525, + "step": 47289 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725715769093094e-05, + "loss": 1.1554, + "step": 47290 + }, + { + "epoch": 0.05, + "learning_rate": 4.972570416559825e-05, + "loss": 1.052, + "step": 47291 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725692561859324e-05, + "loss": 0.7876, + "step": 47292 + }, + { + "epoch": 0.05, + "learning_rate": 4.972568095787632e-05, + "loss": 0.8809, + "step": 47293 + }, + { + "epoch": 0.05, + "learning_rate": 4.972566935364923e-05, + "loss": 0.821, + "step": 47294 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725657749178056e-05, + "loss": 1.0012, + "step": 47295 + }, + { + "epoch": 0.05, + "learning_rate": 4.97256461444628e-05, + "loss": 1.0809, + "step": 47296 + }, + { + "epoch": 0.05, + "learning_rate": 4.972563453950346e-05, + "loss": 1.0241, + "step": 47297 + }, + { + "epoch": 0.05, + "learning_rate": 4.972562293430005e-05, + "loss": 1.1237, + "step": 47298 + }, + { + "epoch": 0.05, + "learning_rate": 4.972561132885255e-05, + "loss": 0.9638, + "step": 47299 + }, + { + "epoch": 0.05, + "learning_rate": 4.972559972316097e-05, + "loss": 0.9035, + "step": 47300 + }, + { + "epoch": 0.05, + "learning_rate": 4.972558811722532e-05, + "loss": 0.3832, + "step": 47301 + }, + { + "epoch": 0.05, + "learning_rate": 4.972557651104558e-05, + "loss": 0.8784, + "step": 47302 + }, + { + "epoch": 0.05, + "learning_rate": 4.972556490462177e-05, + "loss": 1.0054, + "step": 47303 + }, + { + "epoch": 0.05, + "learning_rate": 4.972555329795386e-05, + "loss": 1.1814, + "step": 47304 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725541691041885e-05, + "loss": 1.1575, + "step": 47305 + }, + { + "epoch": 0.05, + "learning_rate": 4.972553008388583e-05, + "loss": 1.4476, + "step": 47306 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725518476485685e-05, + "loss": 1.0689, + "step": 47307 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725506868841464e-05, + "loss": 1.0057, + "step": 47308 + }, + { + "epoch": 0.05, + "learning_rate": 4.972549526095317e-05, + "loss": 1.0406, + "step": 47309 + }, + { + "epoch": 0.05, + "learning_rate": 4.972548365282079e-05, + "loss": 1.0724, + "step": 47310 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725472044444334e-05, + "loss": 1.2389, + "step": 47311 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725460435823795e-05, + "loss": 1.0601, + "step": 47312 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725448826959176e-05, + "loss": 0.5561, + "step": 47313 + }, + { + "epoch": 0.05, + "learning_rate": 4.972543721785048e-05, + "loss": 0.1846, + "step": 47314 + }, + { + "epoch": 0.05, + "learning_rate": 4.972542560849771e-05, + "loss": 0.1003, + "step": 47315 + }, + { + "epoch": 0.05, + "learning_rate": 4.972541399890085e-05, + "loss": 0.1152, + "step": 47316 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725402389059914e-05, + "loss": 0.0846, + "step": 47317 + }, + { + "epoch": 0.05, + "learning_rate": 4.97253907789749e-05, + "loss": 0.0596, + "step": 47318 + }, + { + "epoch": 0.05, + "learning_rate": 4.972537916864581e-05, + "loss": 0.0429, + "step": 47319 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725367558072644e-05, + "loss": 0.0358, + "step": 47320 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725355947255404e-05, + "loss": 0.2211, + "step": 47321 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725344336194075e-05, + "loss": 0.2923, + "step": 47322 + }, + { + "epoch": 0.05, + "learning_rate": 4.972533272488867e-05, + "loss": 0.2664, + "step": 47323 + }, + { + "epoch": 0.05, + "learning_rate": 4.972532111333918e-05, + "loss": 0.7532, + "step": 47324 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725309501545624e-05, + "loss": 0.9824, + "step": 47325 + }, + { + "epoch": 0.05, + "learning_rate": 4.972529788950799e-05, + "loss": 0.9214, + "step": 47326 + }, + { + "epoch": 0.05, + "learning_rate": 4.972528627722627e-05, + "loss": 0.9849, + "step": 47327 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725274664700485e-05, + "loss": 0.7678, + "step": 47328 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725263051930616e-05, + "loss": 0.1669, + "step": 47329 + }, + { + "epoch": 0.05, + "learning_rate": 4.972525143891667e-05, + "loss": 0.1965, + "step": 47330 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725239825658636e-05, + "loss": 0.2651, + "step": 47331 + }, + { + "epoch": 0.05, + "learning_rate": 4.972522821215654e-05, + "loss": 0.3871, + "step": 47332 + }, + { + "epoch": 0.05, + "learning_rate": 4.972521659841036e-05, + "loss": 0.169, + "step": 47333 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725204984420106e-05, + "loss": 0.1831, + "step": 47334 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725193370185765e-05, + "loss": 0.3821, + "step": 47335 + }, + { + "epoch": 0.05, + "learning_rate": 4.972518175570736e-05, + "loss": 0.9786, + "step": 47336 + }, + { + "epoch": 0.05, + "learning_rate": 4.972517014098487e-05, + "loss": 1.3002, + "step": 47337 + }, + { + "epoch": 0.05, + "learning_rate": 4.972515852601831e-05, + "loss": 1.2876, + "step": 47338 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725146910807666e-05, + "loss": 1.1376, + "step": 47339 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725135295352954e-05, + "loss": 1.2638, + "step": 47340 + }, + { + "epoch": 0.05, + "learning_rate": 4.972512367965416e-05, + "loss": 0.903, + "step": 47341 + }, + { + "epoch": 0.05, + "learning_rate": 4.972511206371129e-05, + "loss": 0.9893, + "step": 47342 + }, + { + "epoch": 0.05, + "learning_rate": 4.972510044752435e-05, + "loss": 0.2606, + "step": 47343 + }, + { + "epoch": 0.05, + "learning_rate": 4.972508883109333e-05, + "loss": 0.0689, + "step": 47344 + }, + { + "epoch": 0.05, + "learning_rate": 4.972507721441824e-05, + "loss": 0.2565, + "step": 47345 + }, + { + "epoch": 0.05, + "learning_rate": 4.972506559749907e-05, + "loss": 0.6409, + "step": 47346 + }, + { + "epoch": 0.05, + "learning_rate": 4.972505398033582e-05, + "loss": 0.7309, + "step": 47347 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725042362928496e-05, + "loss": 0.9056, + "step": 47348 + }, + { + "epoch": 0.05, + "learning_rate": 4.97250307452771e-05, + "loss": 0.8961, + "step": 47349 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725019127381634e-05, + "loss": 0.9306, + "step": 47350 + }, + { + "epoch": 0.05, + "learning_rate": 4.9725007509242085e-05, + "loss": 1.0392, + "step": 47351 + }, + { + "epoch": 0.05, + "learning_rate": 4.972499589085846e-05, + "loss": 1.1059, + "step": 47352 + }, + { + "epoch": 0.05, + "learning_rate": 4.972498427223077e-05, + "loss": 0.7465, + "step": 47353 + }, + { + "epoch": 0.05, + "learning_rate": 4.972497265335899e-05, + "loss": 0.7445, + "step": 47354 + }, + { + "epoch": 0.05, + "learning_rate": 4.972496103424315e-05, + "loss": 0.8073, + "step": 47355 + }, + { + "epoch": 0.05, + "learning_rate": 4.972494941488323e-05, + "loss": 0.6767, + "step": 47356 + }, + { + "epoch": 0.05, + "learning_rate": 4.972493779527924e-05, + "loss": 0.7559, + "step": 47357 + }, + { + "epoch": 0.05, + "learning_rate": 4.972492617543117e-05, + "loss": 0.7023, + "step": 47358 + }, + { + "epoch": 0.05, + "learning_rate": 4.972491455533903e-05, + "loss": 1.4445, + "step": 47359 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724902935002815e-05, + "loss": 1.0933, + "step": 47360 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724891314422524e-05, + "loss": 0.8464, + "step": 47361 + }, + { + "epoch": 0.05, + "learning_rate": 4.972487969359816e-05, + "loss": 1.3321, + "step": 47362 + }, + { + "epoch": 0.05, + "learning_rate": 4.972486807252972e-05, + "loss": 0.9723, + "step": 47363 + }, + { + "epoch": 0.05, + "learning_rate": 4.972485645121721e-05, + "loss": 0.8759, + "step": 47364 + }, + { + "epoch": 0.05, + "learning_rate": 4.972484482966063e-05, + "loss": 1.0955, + "step": 47365 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724833207859967e-05, + "loss": 1.1391, + "step": 47366 + }, + { + "epoch": 0.05, + "learning_rate": 4.972482158581524e-05, + "loss": 1.0003, + "step": 47367 + }, + { + "epoch": 0.05, + "learning_rate": 4.972480996352644e-05, + "loss": 1.1062, + "step": 47368 + }, + { + "epoch": 0.05, + "learning_rate": 4.972479834099356e-05, + "loss": 1.2483, + "step": 47369 + }, + { + "epoch": 0.05, + "learning_rate": 4.972478671821661e-05, + "loss": 1.0486, + "step": 47370 + }, + { + "epoch": 0.05, + "learning_rate": 4.972477509519559e-05, + "loss": 0.5602, + "step": 47371 + }, + { + "epoch": 0.05, + "learning_rate": 4.972476347193049e-05, + "loss": 1.009, + "step": 47372 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724751848421324e-05, + "loss": 0.986, + "step": 47373 + }, + { + "epoch": 0.05, + "learning_rate": 4.972474022466809e-05, + "loss": 1.0623, + "step": 47374 + }, + { + "epoch": 0.05, + "learning_rate": 4.972472860067078e-05, + "loss": 1.1278, + "step": 47375 + }, + { + "epoch": 0.05, + "learning_rate": 4.972471697642939e-05, + "loss": 1.0449, + "step": 47376 + }, + { + "epoch": 0.05, + "learning_rate": 4.972470535194393e-05, + "loss": 0.855, + "step": 47377 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724693727214414e-05, + "loss": 1.0394, + "step": 47378 + }, + { + "epoch": 0.05, + "learning_rate": 4.972468210224081e-05, + "loss": 0.7867, + "step": 47379 + }, + { + "epoch": 0.05, + "learning_rate": 4.972467047702314e-05, + "loss": 1.0457, + "step": 47380 + }, + { + "epoch": 0.05, + "learning_rate": 4.97246588515614e-05, + "loss": 1.2962, + "step": 47381 + }, + { + "epoch": 0.05, + "learning_rate": 4.972464722585558e-05, + "loss": 1.0096, + "step": 47382 + }, + { + "epoch": 0.05, + "learning_rate": 4.97246355999057e-05, + "loss": 1.1025, + "step": 47383 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724623973711744e-05, + "loss": 0.9643, + "step": 47384 + }, + { + "epoch": 0.05, + "learning_rate": 4.972461234727371e-05, + "loss": 0.9582, + "step": 47385 + }, + { + "epoch": 0.05, + "learning_rate": 4.972460072059162e-05, + "loss": 0.8803, + "step": 47386 + }, + { + "epoch": 0.05, + "learning_rate": 4.972458909366545e-05, + "loss": 0.9371, + "step": 47387 + }, + { + "epoch": 0.05, + "learning_rate": 4.972457746649521e-05, + "loss": 0.907, + "step": 47388 + }, + { + "epoch": 0.05, + "learning_rate": 4.97245658390809e-05, + "loss": 0.883, + "step": 47389 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724554211422516e-05, + "loss": 0.8377, + "step": 47390 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724542583520065e-05, + "loss": 0.799, + "step": 47391 + }, + { + "epoch": 0.05, + "learning_rate": 4.972453095537355e-05, + "loss": 0.821, + "step": 47392 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724519326982954e-05, + "loss": 1.2618, + "step": 47393 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724507698348294e-05, + "loss": 1.2119, + "step": 47394 + }, + { + "epoch": 0.05, + "learning_rate": 4.972449606946956e-05, + "loss": 0.9531, + "step": 47395 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724484440346765e-05, + "loss": 0.6227, + "step": 47396 + }, + { + "epoch": 0.05, + "learning_rate": 4.972447281097989e-05, + "loss": 1.0859, + "step": 47397 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724461181368946e-05, + "loss": 1.1658, + "step": 47398 + }, + { + "epoch": 0.05, + "learning_rate": 4.972444955151394e-05, + "loss": 1.1444, + "step": 47399 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724437921414857e-05, + "loss": 0.9993, + "step": 47400 + }, + { + "epoch": 0.05, + "learning_rate": 4.972442629107171e-05, + "loss": 0.7827, + "step": 47401 + }, + { + "epoch": 0.05, + "learning_rate": 4.972441466048449e-05, + "loss": 0.9202, + "step": 47402 + }, + { + "epoch": 0.05, + "learning_rate": 4.97244030296532e-05, + "loss": 0.7523, + "step": 47403 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724391398577855e-05, + "loss": 0.8437, + "step": 47404 + }, + { + "epoch": 0.05, + "learning_rate": 4.972437976725842e-05, + "loss": 1.0582, + "step": 47405 + }, + { + "epoch": 0.05, + "learning_rate": 4.972436813569493e-05, + "loss": 0.9717, + "step": 47406 + }, + { + "epoch": 0.05, + "learning_rate": 4.972435650388737e-05, + "loss": 0.9788, + "step": 47407 + }, + { + "epoch": 0.05, + "learning_rate": 4.972434487183574e-05, + "loss": 1.1435, + "step": 47408 + }, + { + "epoch": 0.05, + "learning_rate": 4.972433323954004e-05, + "loss": 1.0504, + "step": 47409 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724321607000275e-05, + "loss": 0.7498, + "step": 47410 + }, + { + "epoch": 0.05, + "learning_rate": 4.972430997421644e-05, + "loss": 0.9077, + "step": 47411 + }, + { + "epoch": 0.05, + "learning_rate": 4.972429834118853e-05, + "loss": 1.0682, + "step": 47412 + }, + { + "epoch": 0.05, + "learning_rate": 4.972428670791657e-05, + "loss": 1.5217, + "step": 47413 + }, + { + "epoch": 0.05, + "learning_rate": 4.972427507440053e-05, + "loss": 1.1577, + "step": 47414 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724263440640423e-05, + "loss": 0.9272, + "step": 47415 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724251806636245e-05, + "loss": 1.1134, + "step": 47416 + }, + { + "epoch": 0.05, + "learning_rate": 4.972424017238801e-05, + "loss": 0.9286, + "step": 47417 + }, + { + "epoch": 0.05, + "learning_rate": 4.97242285378957e-05, + "loss": 1.0745, + "step": 47418 + }, + { + "epoch": 0.05, + "learning_rate": 4.972421690315933e-05, + "loss": 0.9756, + "step": 47419 + }, + { + "epoch": 0.05, + "learning_rate": 4.972420526817888e-05, + "loss": 0.9483, + "step": 47420 + }, + { + "epoch": 0.05, + "learning_rate": 4.972419363295438e-05, + "loss": 0.9807, + "step": 47421 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724181997485794e-05, + "loss": 0.9794, + "step": 47422 + }, + { + "epoch": 0.05, + "learning_rate": 4.972417036177315e-05, + "loss": 1.1824, + "step": 47423 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724158725816447e-05, + "loss": 0.8721, + "step": 47424 + }, + { + "epoch": 0.05, + "learning_rate": 4.972414708961567e-05, + "loss": 1.062, + "step": 47425 + }, + { + "epoch": 0.05, + "learning_rate": 4.972413545317083e-05, + "loss": 0.9113, + "step": 47426 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724123816481916e-05, + "loss": 1.0698, + "step": 47427 + }, + { + "epoch": 0.05, + "learning_rate": 4.972411217954894e-05, + "loss": 1.1072, + "step": 47428 + }, + { + "epoch": 0.05, + "learning_rate": 4.972410054237191e-05, + "loss": 0.9901, + "step": 47429 + }, + { + "epoch": 0.05, + "learning_rate": 4.972408890495079e-05, + "loss": 0.8218, + "step": 47430 + }, + { + "epoch": 0.05, + "learning_rate": 4.972407726728563e-05, + "loss": 0.8465, + "step": 47431 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724065629376384e-05, + "loss": 0.9426, + "step": 47432 + }, + { + "epoch": 0.05, + "learning_rate": 4.972405399122309e-05, + "loss": 1.5024, + "step": 47433 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724042352825715e-05, + "loss": 1.2873, + "step": 47434 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724030714184275e-05, + "loss": 1.2534, + "step": 47435 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724019075298775e-05, + "loss": 1.0371, + "step": 47436 + }, + { + "epoch": 0.05, + "learning_rate": 4.972400743616922e-05, + "loss": 1.0383, + "step": 47437 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723995796795586e-05, + "loss": 0.7222, + "step": 47438 + }, + { + "epoch": 0.05, + "learning_rate": 4.97239841571779e-05, + "loss": 1.0986, + "step": 47439 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723972517316133e-05, + "loss": 0.8258, + "step": 47440 + }, + { + "epoch": 0.05, + "learning_rate": 4.972396087721031e-05, + "loss": 0.956, + "step": 47441 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723949236860425e-05, + "loss": 1.0015, + "step": 47442 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723937596266466e-05, + "loss": 0.932, + "step": 47443 + }, + { + "epoch": 0.05, + "learning_rate": 4.972392595542845e-05, + "loss": 0.447, + "step": 47444 + }, + { + "epoch": 0.05, + "learning_rate": 4.972391431434637e-05, + "loss": 1.083, + "step": 47445 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723902673020225e-05, + "loss": 1.0155, + "step": 47446 + }, + { + "epoch": 0.05, + "learning_rate": 4.972389103145002e-05, + "loss": 0.9779, + "step": 47447 + }, + { + "epoch": 0.05, + "learning_rate": 4.972387938963574e-05, + "loss": 1.0168, + "step": 47448 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723867747577405e-05, + "loss": 1.2087, + "step": 47449 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723856105275e-05, + "loss": 0.7228, + "step": 47450 + }, + { + "epoch": 0.05, + "learning_rate": 4.972384446272854e-05, + "loss": 0.8132, + "step": 47451 + }, + { + "epoch": 0.05, + "learning_rate": 4.972383281993802e-05, + "loss": 0.8029, + "step": 47452 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723821176903425e-05, + "loss": 0.8649, + "step": 47453 + }, + { + "epoch": 0.05, + "learning_rate": 4.972380953362477e-05, + "loss": 1.1776, + "step": 47454 + }, + { + "epoch": 0.05, + "learning_rate": 4.972379789010205e-05, + "loss": 1.0307, + "step": 47455 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723786246335275e-05, + "loss": 1.109, + "step": 47456 + }, + { + "epoch": 0.05, + "learning_rate": 4.972377460232443e-05, + "loss": 1.0875, + "step": 47457 + }, + { + "epoch": 0.05, + "learning_rate": 4.972376295806952e-05, + "loss": 1.063, + "step": 47458 + }, + { + "epoch": 0.05, + "learning_rate": 4.972375131357055e-05, + "loss": 0.9804, + "step": 47459 + }, + { + "epoch": 0.05, + "learning_rate": 4.972373966882753e-05, + "loss": 0.8742, + "step": 47460 + }, + { + "epoch": 0.05, + "learning_rate": 4.972372802384043e-05, + "loss": 0.5911, + "step": 47461 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723716378609284e-05, + "loss": 0.4932, + "step": 47462 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723704733134064e-05, + "loss": 0.435, + "step": 47463 + }, + { + "epoch": 0.05, + "learning_rate": 4.972369308741478e-05, + "loss": 0.9755, + "step": 47464 + }, + { + "epoch": 0.05, + "learning_rate": 4.972368144145144e-05, + "loss": 1.0935, + "step": 47465 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723669795244047e-05, + "loss": 1.097, + "step": 47466 + }, + { + "epoch": 0.05, + "learning_rate": 4.972365814879258e-05, + "loss": 1.121, + "step": 47467 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723646502097054e-05, + "loss": 0.9515, + "step": 47468 + }, + { + "epoch": 0.05, + "learning_rate": 4.972363485515746e-05, + "loss": 0.6616, + "step": 47469 + }, + { + "epoch": 0.05, + "learning_rate": 4.972362320797382e-05, + "loss": 0.772, + "step": 47470 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723611560546106e-05, + "loss": 0.8237, + "step": 47471 + }, + { + "epoch": 0.05, + "learning_rate": 4.972359991287434e-05, + "loss": 1.0167, + "step": 47472 + }, + { + "epoch": 0.05, + "learning_rate": 4.972358826495851e-05, + "loss": 1.3674, + "step": 47473 + }, + { + "epoch": 0.05, + "learning_rate": 4.972357661679862e-05, + "loss": 0.8622, + "step": 47474 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723564968394657e-05, + "loss": 1.5365, + "step": 47475 + }, + { + "epoch": 0.05, + "learning_rate": 4.972355331974665e-05, + "loss": 1.7409, + "step": 47476 + }, + { + "epoch": 0.05, + "learning_rate": 4.972354167085457e-05, + "loss": 1.1145, + "step": 47477 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723530021718434e-05, + "loss": 0.6862, + "step": 47478 + }, + { + "epoch": 0.05, + "learning_rate": 4.972351837233824e-05, + "loss": 0.8678, + "step": 47479 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723506722713985e-05, + "loss": 0.9923, + "step": 47480 + }, + { + "epoch": 0.05, + "learning_rate": 4.972349507284567e-05, + "loss": 0.5492, + "step": 47481 + }, + { + "epoch": 0.05, + "learning_rate": 4.97234834227333e-05, + "loss": 1.0498, + "step": 47482 + }, + { + "epoch": 0.05, + "learning_rate": 4.972347177237686e-05, + "loss": 1.0002, + "step": 47483 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723460121776365e-05, + "loss": 0.8533, + "step": 47484 + }, + { + "epoch": 0.05, + "learning_rate": 4.972344847093181e-05, + "loss": 0.9885, + "step": 47485 + }, + { + "epoch": 0.05, + "learning_rate": 4.97234368198432e-05, + "loss": 0.9477, + "step": 47486 + }, + { + "epoch": 0.05, + "learning_rate": 4.972342516851053e-05, + "loss": 1.1564, + "step": 47487 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723413516933794e-05, + "loss": 1.9091, + "step": 47488 + }, + { + "epoch": 0.05, + "learning_rate": 4.972340186511301e-05, + "loss": 1.3725, + "step": 47489 + }, + { + "epoch": 0.05, + "learning_rate": 4.972339021304815e-05, + "loss": 1.2152, + "step": 47490 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723378560739245e-05, + "loss": 1.1971, + "step": 47491 + }, + { + "epoch": 0.05, + "learning_rate": 4.972336690818627e-05, + "loss": 0.8745, + "step": 47492 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723355255389245e-05, + "loss": 0.8613, + "step": 47493 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723343602348164e-05, + "loss": 1.2347, + "step": 47494 + }, + { + "epoch": 0.05, + "learning_rate": 4.972333194906302e-05, + "loss": 1.0256, + "step": 47495 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723320295533814e-05, + "loss": 0.954, + "step": 47496 + }, + { + "epoch": 0.05, + "learning_rate": 4.972330864176056e-05, + "loss": 0.9583, + "step": 47497 + }, + { + "epoch": 0.05, + "learning_rate": 4.972329698774324e-05, + "loss": 1.0692, + "step": 47498 + }, + { + "epoch": 0.05, + "learning_rate": 4.972328533348186e-05, + "loss": 0.9339, + "step": 47499 + }, + { + "epoch": 0.05, + "learning_rate": 4.972327367897643e-05, + "loss": 1.5265, + "step": 47500 + }, + { + "epoch": 0.05, + "eval_loss": 1.079487919807434, + "eval_runtime": 126.9614, + "eval_samples_per_second": 10.909, + "eval_steps_per_second": 5.458, + "step": 47500 + }, + { + "epoch": 0.05, + "learning_rate": 4.972326202422693e-05, + "loss": 1.184, + "step": 47501 + }, + { + "epoch": 0.05, + "learning_rate": 4.972325036923339e-05, + "loss": 0.9142, + "step": 47502 + }, + { + "epoch": 0.05, + "learning_rate": 4.972323871399578e-05, + "loss": 1.257, + "step": 47503 + }, + { + "epoch": 0.05, + "learning_rate": 4.972322705851412e-05, + "loss": 1.7796, + "step": 47504 + }, + { + "epoch": 0.05, + "learning_rate": 4.97232154027884e-05, + "loss": 1.6747, + "step": 47505 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723203746818616e-05, + "loss": 1.0056, + "step": 47506 + }, + { + "epoch": 0.05, + "learning_rate": 4.972319209060478e-05, + "loss": 1.1675, + "step": 47507 + }, + { + "epoch": 0.05, + "learning_rate": 4.972318043414689e-05, + "loss": 0.9335, + "step": 47508 + }, + { + "epoch": 0.05, + "learning_rate": 4.972316877744494e-05, + "loss": 1.0005, + "step": 47509 + }, + { + "epoch": 0.05, + "learning_rate": 4.972315712049893e-05, + "loss": 0.9524, + "step": 47510 + }, + { + "epoch": 0.05, + "learning_rate": 4.972314546330887e-05, + "loss": 0.4542, + "step": 47511 + }, + { + "epoch": 0.05, + "learning_rate": 4.972313380587476e-05, + "loss": 1.0785, + "step": 47512 + }, + { + "epoch": 0.05, + "learning_rate": 4.972312214819658e-05, + "loss": 1.089, + "step": 47513 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723110490274344e-05, + "loss": 0.8167, + "step": 47514 + }, + { + "epoch": 0.05, + "learning_rate": 4.972309883210806e-05, + "loss": 1.0724, + "step": 47515 + }, + { + "epoch": 0.05, + "learning_rate": 4.9723087173697715e-05, + "loss": 0.9436, + "step": 47516 + }, + { + "epoch": 0.05, + "learning_rate": 4.972307551504331e-05, + "loss": 1.3269, + "step": 47517 + }, + { + "epoch": 0.05, + "learning_rate": 4.972306385614486e-05, + "loss": 0.9477, + "step": 47518 + }, + { + "epoch": 0.05, + "learning_rate": 4.972305219700235e-05, + "loss": 1.1268, + "step": 47519 + }, + { + "epoch": 0.05, + "learning_rate": 4.972304053761578e-05, + "loss": 1.3103, + "step": 47520 + }, + { + "epoch": 0.05, + "learning_rate": 4.972302887798516e-05, + "loss": 0.6856, + "step": 47521 + }, + { + "epoch": 0.05, + "learning_rate": 4.972301721811048e-05, + "loss": 1.0998, + "step": 47522 + }, + { + "epoch": 0.05, + "learning_rate": 4.972300555799174e-05, + "loss": 0.5727, + "step": 47523 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722993897628955e-05, + "loss": 0.9988, + "step": 47524 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722982237022115e-05, + "loss": 1.0005, + "step": 47525 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722970576171214e-05, + "loss": 0.8141, + "step": 47526 + }, + { + "epoch": 0.05, + "learning_rate": 4.972295891507626e-05, + "loss": 0.3928, + "step": 47527 + }, + { + "epoch": 0.05, + "learning_rate": 4.972294725373726e-05, + "loss": 0.6103, + "step": 47528 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722935592154195e-05, + "loss": 1.2807, + "step": 47529 + }, + { + "epoch": 0.05, + "learning_rate": 4.972292393032708e-05, + "loss": 1.2436, + "step": 47530 + }, + { + "epoch": 0.05, + "learning_rate": 4.972291226825591e-05, + "loss": 1.36, + "step": 47531 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722900605940683e-05, + "loss": 1.1032, + "step": 47532 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722888943381405e-05, + "loss": 1.0128, + "step": 47533 + }, + { + "epoch": 0.05, + "learning_rate": 4.972287728057807e-05, + "loss": 0.6976, + "step": 47534 + }, + { + "epoch": 0.05, + "learning_rate": 4.972286561753069e-05, + "loss": 0.9742, + "step": 47535 + }, + { + "epoch": 0.05, + "learning_rate": 4.972285395423925e-05, + "loss": 1.1171, + "step": 47536 + }, + { + "epoch": 0.05, + "learning_rate": 4.972284229070375e-05, + "loss": 0.9876, + "step": 47537 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722830626924205e-05, + "loss": 0.7511, + "step": 47538 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722818962900604e-05, + "loss": 1.2316, + "step": 47539 + }, + { + "epoch": 0.05, + "learning_rate": 4.972280729863295e-05, + "loss": 1.1524, + "step": 47540 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722795634121245e-05, + "loss": 0.7962, + "step": 47541 + }, + { + "epoch": 0.05, + "learning_rate": 4.972278396936548e-05, + "loss": 0.9375, + "step": 47542 + }, + { + "epoch": 0.05, + "learning_rate": 4.972277230436567e-05, + "loss": 1.2624, + "step": 47543 + }, + { + "epoch": 0.05, + "learning_rate": 4.97227606391218e-05, + "loss": 1.0783, + "step": 47544 + }, + { + "epoch": 0.05, + "learning_rate": 4.972274897363388e-05, + "loss": 0.9241, + "step": 47545 + }, + { + "epoch": 0.05, + "learning_rate": 4.972273730790191e-05, + "loss": 0.696, + "step": 47546 + }, + { + "epoch": 0.05, + "learning_rate": 4.97227256419259e-05, + "loss": 1.319, + "step": 47547 + }, + { + "epoch": 0.05, + "learning_rate": 4.972271397570582e-05, + "loss": 0.8645, + "step": 47548 + }, + { + "epoch": 0.05, + "learning_rate": 4.972270230924169e-05, + "loss": 1.0405, + "step": 47549 + }, + { + "epoch": 0.05, + "learning_rate": 4.972269064253351e-05, + "loss": 1.2527, + "step": 47550 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722678975581273e-05, + "loss": 0.9075, + "step": 47551 + }, + { + "epoch": 0.05, + "learning_rate": 4.972266730838499e-05, + "loss": 1.0558, + "step": 47552 + }, + { + "epoch": 0.05, + "learning_rate": 4.972265564094466e-05, + "loss": 1.1986, + "step": 47553 + }, + { + "epoch": 0.05, + "learning_rate": 4.972264397326027e-05, + "loss": 0.8642, + "step": 47554 + }, + { + "epoch": 0.05, + "learning_rate": 4.972263230533183e-05, + "loss": 0.9598, + "step": 47555 + }, + { + "epoch": 0.05, + "learning_rate": 4.972262063715934e-05, + "loss": 0.7168, + "step": 47556 + }, + { + "epoch": 0.05, + "learning_rate": 4.97226089687428e-05, + "loss": 0.9344, + "step": 47557 + }, + { + "epoch": 0.05, + "learning_rate": 4.972259730008221e-05, + "loss": 1.151, + "step": 47558 + }, + { + "epoch": 0.05, + "learning_rate": 4.972258563117757e-05, + "loss": 1.4403, + "step": 47559 + }, + { + "epoch": 0.05, + "learning_rate": 4.972257396202887e-05, + "loss": 0.9698, + "step": 47560 + }, + { + "epoch": 0.05, + "learning_rate": 4.972256229263612e-05, + "loss": 1.2386, + "step": 47561 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722550622999327e-05, + "loss": 1.0838, + "step": 47562 + }, + { + "epoch": 0.05, + "learning_rate": 4.972253895311848e-05, + "loss": 1.0081, + "step": 47563 + }, + { + "epoch": 0.05, + "learning_rate": 4.972252728299358e-05, + "loss": 1.115, + "step": 47564 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722515612624634e-05, + "loss": 0.9838, + "step": 47565 + }, + { + "epoch": 0.05, + "learning_rate": 4.972250394201163e-05, + "loss": 0.9051, + "step": 47566 + }, + { + "epoch": 0.05, + "learning_rate": 4.972249227115459e-05, + "loss": 0.8142, + "step": 47567 + }, + { + "epoch": 0.05, + "learning_rate": 4.972248060005349e-05, + "loss": 0.8631, + "step": 47568 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722468928708346e-05, + "loss": 1.3519, + "step": 47569 + }, + { + "epoch": 0.05, + "learning_rate": 4.972245725711914e-05, + "loss": 1.2532, + "step": 47570 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722445585285895e-05, + "loss": 0.8689, + "step": 47571 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722433913208596e-05, + "loss": 1.097, + "step": 47572 + }, + { + "epoch": 0.05, + "learning_rate": 4.972242224088725e-05, + "loss": 0.9888, + "step": 47573 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722410568321856e-05, + "loss": 1.0036, + "step": 47574 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722398895512415e-05, + "loss": 1.1234, + "step": 47575 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722387222458914e-05, + "loss": 1.0608, + "step": 47576 + }, + { + "epoch": 0.05, + "learning_rate": 4.972237554916137e-05, + "loss": 0.967, + "step": 47577 + }, + { + "epoch": 0.05, + "learning_rate": 4.972236387561978e-05, + "loss": 1.0022, + "step": 47578 + }, + { + "epoch": 0.05, + "learning_rate": 4.972235220183415e-05, + "loss": 1.2565, + "step": 47579 + }, + { + "epoch": 0.05, + "learning_rate": 4.972234052780445e-05, + "loss": 0.9437, + "step": 47580 + }, + { + "epoch": 0.05, + "learning_rate": 4.972232885353071e-05, + "loss": 0.9624, + "step": 47581 + }, + { + "epoch": 0.05, + "learning_rate": 4.972231717901293e-05, + "loss": 1.2095, + "step": 47582 + }, + { + "epoch": 0.05, + "learning_rate": 4.972230550425109e-05, + "loss": 1.5633, + "step": 47583 + }, + { + "epoch": 0.05, + "learning_rate": 4.972229382924521e-05, + "loss": 1.5075, + "step": 47584 + }, + { + "epoch": 0.05, + "learning_rate": 4.972228215399528e-05, + "loss": 1.122, + "step": 47585 + }, + { + "epoch": 0.05, + "learning_rate": 4.97222704785013e-05, + "loss": 1.0618, + "step": 47586 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722258802763273e-05, + "loss": 0.3949, + "step": 47587 + }, + { + "epoch": 0.05, + "learning_rate": 4.972224712678119e-05, + "loss": 1.2002, + "step": 47588 + }, + { + "epoch": 0.05, + "learning_rate": 4.972223545055508e-05, + "loss": 0.9827, + "step": 47589 + }, + { + "epoch": 0.05, + "learning_rate": 4.972222377408491e-05, + "loss": 1.0788, + "step": 47590 + }, + { + "epoch": 0.05, + "learning_rate": 4.972221209737069e-05, + "loss": 0.932, + "step": 47591 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722200420412426e-05, + "loss": 1.0133, + "step": 47592 + }, + { + "epoch": 0.05, + "learning_rate": 4.972218874321011e-05, + "loss": 0.9323, + "step": 47593 + }, + { + "epoch": 0.05, + "learning_rate": 4.972217706576375e-05, + "loss": 0.9646, + "step": 47594 + }, + { + "epoch": 0.05, + "learning_rate": 4.972216538807335e-05, + "loss": 0.9314, + "step": 47595 + }, + { + "epoch": 0.05, + "learning_rate": 4.97221537101389e-05, + "loss": 1.0897, + "step": 47596 + }, + { + "epoch": 0.05, + "learning_rate": 4.97221420319604e-05, + "loss": 0.9972, + "step": 47597 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722130353537856e-05, + "loss": 1.1032, + "step": 47598 + }, + { + "epoch": 0.05, + "learning_rate": 4.972211867487126e-05, + "loss": 0.8286, + "step": 47599 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722106995960624e-05, + "loss": 0.9249, + "step": 47600 + }, + { + "epoch": 0.05, + "learning_rate": 4.972209531680594e-05, + "loss": 0.964, + "step": 47601 + }, + { + "epoch": 0.05, + "learning_rate": 4.972208363740721e-05, + "loss": 1.0891, + "step": 47602 + }, + { + "epoch": 0.05, + "learning_rate": 4.972207195776443e-05, + "loss": 1.0506, + "step": 47603 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722060277877615e-05, + "loss": 1.0986, + "step": 47604 + }, + { + "epoch": 0.05, + "learning_rate": 4.972204859774674e-05, + "loss": 1.157, + "step": 47605 + }, + { + "epoch": 0.05, + "learning_rate": 4.972203691737183e-05, + "loss": 0.9802, + "step": 47606 + }, + { + "epoch": 0.05, + "learning_rate": 4.972202523675287e-05, + "loss": 1.4229, + "step": 47607 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722013555889866e-05, + "loss": 0.9283, + "step": 47608 + }, + { + "epoch": 0.05, + "learning_rate": 4.9722001874782816e-05, + "loss": 0.9217, + "step": 47609 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721990193431726e-05, + "loss": 0.9343, + "step": 47610 + }, + { + "epoch": 0.05, + "learning_rate": 4.972197851183658e-05, + "loss": 1.143, + "step": 47611 + }, + { + "epoch": 0.05, + "learning_rate": 4.972196682999739e-05, + "loss": 0.8308, + "step": 47612 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721955147914165e-05, + "loss": 1.2084, + "step": 47613 + }, + { + "epoch": 0.05, + "learning_rate": 4.972194346558689e-05, + "loss": 0.5448, + "step": 47614 + }, + { + "epoch": 0.05, + "learning_rate": 4.972193178301557e-05, + "loss": 0.1711, + "step": 47615 + }, + { + "epoch": 0.05, + "learning_rate": 4.97219201002002e-05, + "loss": 0.1445, + "step": 47616 + }, + { + "epoch": 0.05, + "learning_rate": 4.972190841714079e-05, + "loss": 1.0674, + "step": 47617 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721896733837346e-05, + "loss": 0.7391, + "step": 47618 + }, + { + "epoch": 0.05, + "learning_rate": 4.972188505028985e-05, + "loss": 1.025, + "step": 47619 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721873366498304e-05, + "loss": 0.9227, + "step": 47620 + }, + { + "epoch": 0.05, + "learning_rate": 4.972186168246272e-05, + "loss": 0.7861, + "step": 47621 + }, + { + "epoch": 0.05, + "learning_rate": 4.972184999818309e-05, + "loss": 0.9628, + "step": 47622 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721838313659424e-05, + "loss": 0.4995, + "step": 47623 + }, + { + "epoch": 0.05, + "learning_rate": 4.972182662889171e-05, + "loss": 0.9367, + "step": 47624 + }, + { + "epoch": 0.05, + "learning_rate": 4.972181494387995e-05, + "loss": 1.2652, + "step": 47625 + }, + { + "epoch": 0.05, + "learning_rate": 4.972180325862415e-05, + "loss": 0.9921, + "step": 47626 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721791573124296e-05, + "loss": 0.7729, + "step": 47627 + }, + { + "epoch": 0.05, + "learning_rate": 4.972177988738042e-05, + "loss": 1.1216, + "step": 47628 + }, + { + "epoch": 0.05, + "learning_rate": 4.972176820139248e-05, + "loss": 1.2712, + "step": 47629 + }, + { + "epoch": 0.05, + "learning_rate": 4.972175651516051e-05, + "loss": 1.2716, + "step": 47630 + }, + { + "epoch": 0.05, + "learning_rate": 4.972174482868449e-05, + "loss": 1.1832, + "step": 47631 + }, + { + "epoch": 0.05, + "learning_rate": 4.972173314196443e-05, + "loss": 0.9865, + "step": 47632 + }, + { + "epoch": 0.05, + "learning_rate": 4.972172145500033e-05, + "loss": 1.768, + "step": 47633 + }, + { + "epoch": 0.05, + "learning_rate": 4.972170976779218e-05, + "loss": 0.9221, + "step": 47634 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721698080339994e-05, + "loss": 1.261, + "step": 47635 + }, + { + "epoch": 0.05, + "learning_rate": 4.972168639264376e-05, + "loss": 1.4001, + "step": 47636 + }, + { + "epoch": 0.05, + "learning_rate": 4.972167470470349e-05, + "loss": 0.3266, + "step": 47637 + }, + { + "epoch": 0.05, + "learning_rate": 4.972166301651918e-05, + "loss": 0.0808, + "step": 47638 + }, + { + "epoch": 0.05, + "learning_rate": 4.972165132809082e-05, + "loss": 0.5525, + "step": 47639 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721639639418425e-05, + "loss": 0.8228, + "step": 47640 + }, + { + "epoch": 0.05, + "learning_rate": 4.972162795050199e-05, + "loss": 1.0019, + "step": 47641 + }, + { + "epoch": 0.05, + "learning_rate": 4.972161626134151e-05, + "loss": 1.8521, + "step": 47642 + }, + { + "epoch": 0.05, + "learning_rate": 4.972160457193699e-05, + "loss": 1.1853, + "step": 47643 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721592882288424e-05, + "loss": 0.8639, + "step": 47644 + }, + { + "epoch": 0.05, + "learning_rate": 4.972158119239582e-05, + "loss": 0.5994, + "step": 47645 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721569502259174e-05, + "loss": 0.637, + "step": 47646 + }, + { + "epoch": 0.05, + "learning_rate": 4.972155781187849e-05, + "loss": 0.7375, + "step": 47647 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721546121253756e-05, + "loss": 0.6945, + "step": 47648 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721534430385e-05, + "loss": 1.0021, + "step": 47649 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721522739272184e-05, + "loss": 0.9229, + "step": 47650 + }, + { + "epoch": 0.05, + "learning_rate": 4.972151104791534e-05, + "loss": 0.8649, + "step": 47651 + }, + { + "epoch": 0.05, + "learning_rate": 4.972149935631445e-05, + "loss": 1.1628, + "step": 47652 + }, + { + "epoch": 0.05, + "learning_rate": 4.972148766446952e-05, + "loss": 0.9557, + "step": 47653 + }, + { + "epoch": 0.05, + "learning_rate": 4.972147597238055e-05, + "loss": 0.5337, + "step": 47654 + }, + { + "epoch": 0.05, + "learning_rate": 4.972146428004754e-05, + "loss": 0.4337, + "step": 47655 + }, + { + "epoch": 0.05, + "learning_rate": 4.97214525874705e-05, + "loss": 0.3762, + "step": 47656 + }, + { + "epoch": 0.05, + "learning_rate": 4.972144089464941e-05, + "loss": 0.3152, + "step": 47657 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721429201584274e-05, + "loss": 0.3382, + "step": 47658 + }, + { + "epoch": 0.05, + "learning_rate": 4.972141750827511e-05, + "loss": 0.3332, + "step": 47659 + }, + { + "epoch": 0.05, + "learning_rate": 4.97214058147219e-05, + "loss": 0.3576, + "step": 47660 + }, + { + "epoch": 0.05, + "learning_rate": 4.972139412092465e-05, + "loss": 0.3137, + "step": 47661 + }, + { + "epoch": 0.05, + "learning_rate": 4.972138242688337e-05, + "loss": 0.3941, + "step": 47662 + }, + { + "epoch": 0.05, + "learning_rate": 4.972137073259804e-05, + "loss": 0.2715, + "step": 47663 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721359038068685e-05, + "loss": 0.2055, + "step": 47664 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721347343295274e-05, + "loss": 0.3552, + "step": 47665 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721335648277835e-05, + "loss": 0.4101, + "step": 47666 + }, + { + "epoch": 0.05, + "learning_rate": 4.972132395301635e-05, + "loss": 0.2725, + "step": 47667 + }, + { + "epoch": 0.05, + "learning_rate": 4.972131225751083e-05, + "loss": 0.5107, + "step": 47668 + }, + { + "epoch": 0.05, + "learning_rate": 4.972130056176128e-05, + "loss": 1.1275, + "step": 47669 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721288865767685e-05, + "loss": 1.6218, + "step": 47670 + }, + { + "epoch": 0.05, + "learning_rate": 4.972127716953005e-05, + "loss": 1.1528, + "step": 47671 + }, + { + "epoch": 0.05, + "learning_rate": 4.972126547304838e-05, + "loss": 1.289, + "step": 47672 + }, + { + "epoch": 0.05, + "learning_rate": 4.972125377632268e-05, + "loss": 1.0604, + "step": 47673 + }, + { + "epoch": 0.05, + "learning_rate": 4.972124207935292e-05, + "loss": 0.9352, + "step": 47674 + }, + { + "epoch": 0.05, + "learning_rate": 4.972123038213914e-05, + "loss": 0.9284, + "step": 47675 + }, + { + "epoch": 0.05, + "learning_rate": 4.972121868468131e-05, + "loss": 0.7878, + "step": 47676 + }, + { + "epoch": 0.05, + "learning_rate": 4.972120698697946e-05, + "loss": 0.7248, + "step": 47677 + }, + { + "epoch": 0.05, + "learning_rate": 4.972119528903356e-05, + "loss": 1.2297, + "step": 47678 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721183590843625e-05, + "loss": 0.6367, + "step": 47679 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721171892409656e-05, + "loss": 0.9748, + "step": 47680 + }, + { + "epoch": 0.05, + "learning_rate": 4.972116019373164e-05, + "loss": 1.1297, + "step": 47681 + }, + { + "epoch": 0.05, + "learning_rate": 4.97211484948096e-05, + "loss": 0.7429, + "step": 47682 + }, + { + "epoch": 0.05, + "learning_rate": 4.972113679564352e-05, + "loss": 1.0496, + "step": 47683 + }, + { + "epoch": 0.05, + "learning_rate": 4.97211250962334e-05, + "loss": 0.771, + "step": 47684 + }, + { + "epoch": 0.05, + "learning_rate": 4.972111339657924e-05, + "loss": 0.842, + "step": 47685 + }, + { + "epoch": 0.05, + "learning_rate": 4.972110169668105e-05, + "loss": 1.147, + "step": 47686 + }, + { + "epoch": 0.05, + "learning_rate": 4.972108999653883e-05, + "loss": 1.0923, + "step": 47687 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721078296152555e-05, + "loss": 0.9326, + "step": 47688 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721066595522256e-05, + "loss": 1.0511, + "step": 47689 + }, + { + "epoch": 0.05, + "learning_rate": 4.972105489464792e-05, + "loss": 1.0039, + "step": 47690 + }, + { + "epoch": 0.05, + "learning_rate": 4.972104319352955e-05, + "loss": 0.808, + "step": 47691 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721031492167144e-05, + "loss": 0.6086, + "step": 47692 + }, + { + "epoch": 0.05, + "learning_rate": 4.9721019790560704e-05, + "loss": 0.9635, + "step": 47693 + }, + { + "epoch": 0.05, + "learning_rate": 4.972100808871022e-05, + "loss": 1.1222, + "step": 47694 + }, + { + "epoch": 0.05, + "learning_rate": 4.972099638661571e-05, + "loss": 0.8763, + "step": 47695 + }, + { + "epoch": 0.05, + "learning_rate": 4.972098468427716e-05, + "loss": 1.0849, + "step": 47696 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720972981694574e-05, + "loss": 1.1144, + "step": 47697 + }, + { + "epoch": 0.05, + "learning_rate": 4.972096127886795e-05, + "loss": 0.9442, + "step": 47698 + }, + { + "epoch": 0.05, + "learning_rate": 4.97209495757973e-05, + "loss": 0.9034, + "step": 47699 + }, + { + "epoch": 0.05, + "learning_rate": 4.972093787248261e-05, + "loss": 0.9529, + "step": 47700 + }, + { + "epoch": 0.05, + "learning_rate": 4.972092616892389e-05, + "loss": 0.9721, + "step": 47701 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720914465121125e-05, + "loss": 0.9717, + "step": 47702 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720902761074335e-05, + "loss": 0.9885, + "step": 47703 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720891056783505e-05, + "loss": 1.0476, + "step": 47704 + }, + { + "epoch": 0.05, + "learning_rate": 4.972087935224864e-05, + "loss": 1.018, + "step": 47705 + }, + { + "epoch": 0.05, + "learning_rate": 4.972086764746975e-05, + "loss": 1.2491, + "step": 47706 + }, + { + "epoch": 0.05, + "learning_rate": 4.972085594244682e-05, + "loss": 0.9278, + "step": 47707 + }, + { + "epoch": 0.05, + "learning_rate": 4.972084423717985e-05, + "loss": 0.857, + "step": 47708 + }, + { + "epoch": 0.05, + "learning_rate": 4.972083253166886e-05, + "loss": 0.7622, + "step": 47709 + }, + { + "epoch": 0.05, + "learning_rate": 4.972082082591383e-05, + "loss": 0.8215, + "step": 47710 + }, + { + "epoch": 0.05, + "learning_rate": 4.972080911991477e-05, + "loss": 0.9671, + "step": 47711 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720797413671666e-05, + "loss": 1.0666, + "step": 47712 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720785707184534e-05, + "loss": 1.1219, + "step": 47713 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720774000453375e-05, + "loss": 0.7542, + "step": 47714 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720762293478176e-05, + "loss": 1.2995, + "step": 47715 + }, + { + "epoch": 0.05, + "learning_rate": 4.972075058625894e-05, + "loss": 0.979, + "step": 47716 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720738878795684e-05, + "loss": 1.0348, + "step": 47717 + }, + { + "epoch": 0.05, + "learning_rate": 4.972072717108839e-05, + "loss": 1.0104, + "step": 47718 + }, + { + "epoch": 0.05, + "learning_rate": 4.972071546313706e-05, + "loss": 1.1856, + "step": 47719 + }, + { + "epoch": 0.05, + "learning_rate": 4.97207037549417e-05, + "loss": 1.0401, + "step": 47720 + }, + { + "epoch": 0.05, + "learning_rate": 4.97206920465023e-05, + "loss": 1.149, + "step": 47721 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720680337818876e-05, + "loss": 0.9164, + "step": 47722 + }, + { + "epoch": 0.05, + "learning_rate": 4.972066862889142e-05, + "loss": 0.8521, + "step": 47723 + }, + { + "epoch": 0.05, + "learning_rate": 4.972065691971993e-05, + "loss": 1.1875, + "step": 47724 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720645210304406e-05, + "loss": 1.0924, + "step": 47725 + }, + { + "epoch": 0.05, + "learning_rate": 4.972063350064485e-05, + "loss": 1.0371, + "step": 47726 + }, + { + "epoch": 0.05, + "learning_rate": 4.972062179074127e-05, + "loss": 1.2434, + "step": 47727 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720610080593654e-05, + "loss": 1.1229, + "step": 47728 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720598370202005e-05, + "loss": 0.9933, + "step": 47729 + }, + { + "epoch": 0.05, + "learning_rate": 4.972058665956633e-05, + "loss": 0.7903, + "step": 47730 + }, + { + "epoch": 0.05, + "learning_rate": 4.972057494868661e-05, + "loss": 1.3459, + "step": 47731 + }, + { + "epoch": 0.05, + "learning_rate": 4.972056323756287e-05, + "loss": 1.2519, + "step": 47732 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720551526195094e-05, + "loss": 1.0229, + "step": 47733 + }, + { + "epoch": 0.05, + "learning_rate": 4.97205398145833e-05, + "loss": 1.195, + "step": 47734 + }, + { + "epoch": 0.05, + "learning_rate": 4.972052810272746e-05, + "loss": 1.1327, + "step": 47735 + }, + { + "epoch": 0.05, + "learning_rate": 4.972051639062759e-05, + "loss": 0.6835, + "step": 47736 + }, + { + "epoch": 0.05, + "learning_rate": 4.97205046782837e-05, + "loss": 1.1935, + "step": 47737 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720492965695776e-05, + "loss": 1.0257, + "step": 47738 + }, + { + "epoch": 0.05, + "learning_rate": 4.972048125286382e-05, + "loss": 1.0257, + "step": 47739 + }, + { + "epoch": 0.05, + "learning_rate": 4.972046953978783e-05, + "loss": 0.7577, + "step": 47740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720457826467816e-05, + "loss": 1.0573, + "step": 47741 + }, + { + "epoch": 0.05, + "learning_rate": 4.972044611290377e-05, + "loss": 0.8577, + "step": 47742 + }, + { + "epoch": 0.05, + "learning_rate": 4.972043439909569e-05, + "loss": 1.0311, + "step": 47743 + }, + { + "epoch": 0.05, + "learning_rate": 4.972042268504359e-05, + "loss": 0.965, + "step": 47744 + }, + { + "epoch": 0.05, + "learning_rate": 4.972041097074745e-05, + "loss": 0.9833, + "step": 47745 + }, + { + "epoch": 0.05, + "learning_rate": 4.972039925620728e-05, + "loss": 0.9759, + "step": 47746 + }, + { + "epoch": 0.05, + "learning_rate": 4.972038754142309e-05, + "loss": 0.9423, + "step": 47747 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720375826394865e-05, + "loss": 1.3926, + "step": 47748 + }, + { + "epoch": 0.05, + "learning_rate": 4.972036411112261e-05, + "loss": 1.0237, + "step": 47749 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720352395606326e-05, + "loss": 0.8381, + "step": 47750 + }, + { + "epoch": 0.05, + "learning_rate": 4.972034067984602e-05, + "loss": 1.2476, + "step": 47751 + }, + { + "epoch": 0.05, + "learning_rate": 4.972032896384168e-05, + "loss": 0.8204, + "step": 47752 + }, + { + "epoch": 0.05, + "learning_rate": 4.972031724759332e-05, + "loss": 0.7039, + "step": 47753 + }, + { + "epoch": 0.05, + "learning_rate": 4.972030553110092e-05, + "loss": 0.9028, + "step": 47754 + }, + { + "epoch": 0.05, + "learning_rate": 4.972029381436449e-05, + "loss": 0.9067, + "step": 47755 + }, + { + "epoch": 0.05, + "learning_rate": 4.972028209738404e-05, + "loss": 0.7831, + "step": 47756 + }, + { + "epoch": 0.05, + "learning_rate": 4.972027038015956e-05, + "loss": 0.633, + "step": 47757 + }, + { + "epoch": 0.05, + "learning_rate": 4.972025866269104e-05, + "loss": 0.8689, + "step": 47758 + }, + { + "epoch": 0.05, + "learning_rate": 4.972024694497851e-05, + "loss": 0.7366, + "step": 47759 + }, + { + "epoch": 0.05, + "learning_rate": 4.972023522702194e-05, + "loss": 0.9524, + "step": 47760 + }, + { + "epoch": 0.05, + "learning_rate": 4.972022350882135e-05, + "loss": 0.6738, + "step": 47761 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720211790376725e-05, + "loss": 1.2146, + "step": 47762 + }, + { + "epoch": 0.05, + "learning_rate": 4.972020007168808e-05, + "loss": 1.1921, + "step": 47763 + }, + { + "epoch": 0.05, + "learning_rate": 4.97201883527554e-05, + "loss": 0.9299, + "step": 47764 + }, + { + "epoch": 0.05, + "learning_rate": 4.97201766335787e-05, + "loss": 1.1184, + "step": 47765 + }, + { + "epoch": 0.05, + "learning_rate": 4.972016491415797e-05, + "loss": 0.5616, + "step": 47766 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720153194493215e-05, + "loss": 1.0847, + "step": 47767 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720141474584426e-05, + "loss": 1.0944, + "step": 47768 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720129754431624e-05, + "loss": 1.166, + "step": 47769 + }, + { + "epoch": 0.05, + "learning_rate": 4.972011803403478e-05, + "loss": 1.209, + "step": 47770 + }, + { + "epoch": 0.05, + "learning_rate": 4.972010631339392e-05, + "loss": 1.4612, + "step": 47771 + }, + { + "epoch": 0.05, + "learning_rate": 4.972009459250903e-05, + "loss": 0.4367, + "step": 47772 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720082871380106e-05, + "loss": 0.3597, + "step": 47773 + }, + { + "epoch": 0.05, + "learning_rate": 4.972007115000716e-05, + "loss": 0.7923, + "step": 47774 + }, + { + "epoch": 0.05, + "learning_rate": 4.97200594283902e-05, + "loss": 1.091, + "step": 47775 + }, + { + "epoch": 0.05, + "learning_rate": 4.97200477065292e-05, + "loss": 0.8889, + "step": 47776 + }, + { + "epoch": 0.05, + "learning_rate": 4.972003598442417e-05, + "loss": 1.0389, + "step": 47777 + }, + { + "epoch": 0.05, + "learning_rate": 4.972002426207513e-05, + "loss": 1.0606, + "step": 47778 + }, + { + "epoch": 0.05, + "learning_rate": 4.972001253948205e-05, + "loss": 1.1538, + "step": 47779 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720000816644954e-05, + "loss": 0.9408, + "step": 47780 + }, + { + "epoch": 0.05, + "learning_rate": 4.971998909356384e-05, + "loss": 1.2453, + "step": 47781 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719977370238686e-05, + "loss": 1.2547, + "step": 47782 + }, + { + "epoch": 0.05, + "learning_rate": 4.971996564666951e-05, + "loss": 0.7524, + "step": 47783 + }, + { + "epoch": 0.05, + "learning_rate": 4.971995392285631e-05, + "loss": 0.9262, + "step": 47784 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719942198799086e-05, + "loss": 0.97, + "step": 47785 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719930474497835e-05, + "loss": 1.0714, + "step": 47786 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719918749952564e-05, + "loss": 0.7358, + "step": 47787 + }, + { + "epoch": 0.05, + "learning_rate": 4.971990702516326e-05, + "loss": 1.0663, + "step": 47788 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719895300129934e-05, + "loss": 0.9223, + "step": 47789 + }, + { + "epoch": 0.05, + "learning_rate": 4.971988357485259e-05, + "loss": 0.9007, + "step": 47790 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719871849331224e-05, + "loss": 0.4539, + "step": 47791 + }, + { + "epoch": 0.05, + "learning_rate": 4.971986012356582e-05, + "loss": 1.0839, + "step": 47792 + }, + { + "epoch": 0.05, + "learning_rate": 4.97198483975564e-05, + "loss": 1.1637, + "step": 47793 + }, + { + "epoch": 0.05, + "learning_rate": 4.971983667130296e-05, + "loss": 0.7394, + "step": 47794 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719824944805496e-05, + "loss": 1.0142, + "step": 47795 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719813218064004e-05, + "loss": 0.9299, + "step": 47796 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719801491078485e-05, + "loss": 0.5248, + "step": 47797 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719789763848946e-05, + "loss": 0.8851, + "step": 47798 + }, + { + "epoch": 0.05, + "learning_rate": 4.971977803637539e-05, + "loss": 1.078, + "step": 47799 + }, + { + "epoch": 0.05, + "learning_rate": 4.97197663086578e-05, + "loss": 1.0448, + "step": 47800 + }, + { + "epoch": 0.05, + "learning_rate": 4.971975458069619e-05, + "loss": 1.0146, + "step": 47801 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719742852490556e-05, + "loss": 0.9754, + "step": 47802 + }, + { + "epoch": 0.05, + "learning_rate": 4.97197311240409e-05, + "loss": 0.9723, + "step": 47803 + }, + { + "epoch": 0.05, + "learning_rate": 4.971971939534723e-05, + "loss": 1.1399, + "step": 47804 + }, + { + "epoch": 0.05, + "learning_rate": 4.971970766640953e-05, + "loss": 1.2429, + "step": 47805 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719695937227805e-05, + "loss": 1.0528, + "step": 47806 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719684207802065e-05, + "loss": 1.1351, + "step": 47807 + }, + { + "epoch": 0.05, + "learning_rate": 4.97196724781323e-05, + "loss": 1.0351, + "step": 47808 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719660748218505e-05, + "loss": 0.9655, + "step": 47809 + }, + { + "epoch": 0.05, + "learning_rate": 4.97196490180607e-05, + "loss": 1.1886, + "step": 47810 + }, + { + "epoch": 0.05, + "learning_rate": 4.971963728765886e-05, + "loss": 1.2295, + "step": 47811 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719625557013006e-05, + "loss": 1.2562, + "step": 47812 + }, + { + "epoch": 0.05, + "learning_rate": 4.971961382612313e-05, + "loss": 1.2019, + "step": 47813 + }, + { + "epoch": 0.05, + "learning_rate": 4.971960209498923e-05, + "loss": 0.8841, + "step": 47814 + }, + { + "epoch": 0.05, + "learning_rate": 4.971959036361131e-05, + "loss": 0.902, + "step": 47815 + }, + { + "epoch": 0.05, + "learning_rate": 4.971957863198937e-05, + "loss": 0.9207, + "step": 47816 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719566900123405e-05, + "loss": 0.9506, + "step": 47817 + }, + { + "epoch": 0.05, + "learning_rate": 4.971955516801342e-05, + "loss": 1.0682, + "step": 47818 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719543435659424e-05, + "loss": 0.8568, + "step": 47819 + }, + { + "epoch": 0.05, + "learning_rate": 4.97195317030614e-05, + "loss": 1.2246, + "step": 47820 + }, + { + "epoch": 0.05, + "learning_rate": 4.971951997021935e-05, + "loss": 1.0816, + "step": 47821 + }, + { + "epoch": 0.05, + "learning_rate": 4.971950823713328e-05, + "loss": 1.0639, + "step": 47822 + }, + { + "epoch": 0.05, + "learning_rate": 4.971949650380319e-05, + "loss": 1.0204, + "step": 47823 + }, + { + "epoch": 0.05, + "learning_rate": 4.971948477022909e-05, + "loss": 0.8279, + "step": 47824 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719473036410955e-05, + "loss": 1.1145, + "step": 47825 + }, + { + "epoch": 0.05, + "learning_rate": 4.971946130234881e-05, + "loss": 1.1822, + "step": 47826 + }, + { + "epoch": 0.05, + "learning_rate": 4.971944956804264e-05, + "loss": 1.0658, + "step": 47827 + }, + { + "epoch": 0.05, + "learning_rate": 4.971943783349245e-05, + "loss": 0.9432, + "step": 47828 + }, + { + "epoch": 0.05, + "learning_rate": 4.971942609869824e-05, + "loss": 0.735, + "step": 47829 + }, + { + "epoch": 0.05, + "learning_rate": 4.971941436366001e-05, + "loss": 0.8667, + "step": 47830 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719402628377764e-05, + "loss": 0.7171, + "step": 47831 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719390892851504e-05, + "loss": 0.8647, + "step": 47832 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719379157081216e-05, + "loss": 0.5938, + "step": 47833 + }, + { + "epoch": 0.05, + "learning_rate": 4.971936742106691e-05, + "loss": 0.8453, + "step": 47834 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719355684808574e-05, + "loss": 1.0334, + "step": 47835 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719343948306234e-05, + "loss": 1.0637, + "step": 47836 + }, + { + "epoch": 0.05, + "learning_rate": 4.971933221155987e-05, + "loss": 1.0455, + "step": 47837 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719320474569485e-05, + "loss": 1.012, + "step": 47838 + }, + { + "epoch": 0.05, + "learning_rate": 4.971930873733509e-05, + "loss": 0.7937, + "step": 47839 + }, + { + "epoch": 0.05, + "learning_rate": 4.971929699985667e-05, + "loss": 0.9388, + "step": 47840 + }, + { + "epoch": 0.05, + "learning_rate": 4.971928526213423e-05, + "loss": 1.3618, + "step": 47841 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719273524167775e-05, + "loss": 1.028, + "step": 47842 + }, + { + "epoch": 0.05, + "learning_rate": 4.97192617859573e-05, + "loss": 0.986, + "step": 47843 + }, + { + "epoch": 0.05, + "learning_rate": 4.971925004750281e-05, + "loss": 1.1092, + "step": 47844 + }, + { + "epoch": 0.05, + "learning_rate": 4.971923830880429e-05, + "loss": 1.0639, + "step": 47845 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719226569861765e-05, + "loss": 1.5566, + "step": 47846 + }, + { + "epoch": 0.05, + "learning_rate": 4.971921483067522e-05, + "loss": 1.5366, + "step": 47847 + }, + { + "epoch": 0.05, + "learning_rate": 4.971920309124466e-05, + "loss": 0.9483, + "step": 47848 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719191351570076e-05, + "loss": 1.1997, + "step": 47849 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719179611651476e-05, + "loss": 1.0101, + "step": 47850 + }, + { + "epoch": 0.05, + "learning_rate": 4.971916787148886e-05, + "loss": 2.1167, + "step": 47851 + }, + { + "epoch": 0.05, + "learning_rate": 4.971915613108222e-05, + "loss": 1.3043, + "step": 47852 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719144390431573e-05, + "loss": 0.8568, + "step": 47853 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719132649536906e-05, + "loss": 0.8326, + "step": 47854 + }, + { + "epoch": 0.05, + "learning_rate": 4.971912090839822e-05, + "loss": 1.1538, + "step": 47855 + }, + { + "epoch": 0.05, + "learning_rate": 4.971910916701552e-05, + "loss": 0.4732, + "step": 47856 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719097425388804e-05, + "loss": 1.0621, + "step": 47857 + }, + { + "epoch": 0.05, + "learning_rate": 4.971908568351807e-05, + "loss": 0.8774, + "step": 47858 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719073941403316e-05, + "loss": 0.7676, + "step": 47859 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719062199044556e-05, + "loss": 0.6178, + "step": 47860 + }, + { + "epoch": 0.05, + "learning_rate": 4.971905045644177e-05, + "loss": 0.9491, + "step": 47861 + }, + { + "epoch": 0.05, + "learning_rate": 4.971903871359497e-05, + "loss": 0.4884, + "step": 47862 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719026970504155e-05, + "loss": 0.4233, + "step": 47863 + }, + { + "epoch": 0.05, + "learning_rate": 4.971901522716932e-05, + "loss": 0.4039, + "step": 47864 + }, + { + "epoch": 0.05, + "learning_rate": 4.9719003483590474e-05, + "loss": 0.3183, + "step": 47865 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718991739767614e-05, + "loss": 0.3281, + "step": 47866 + }, + { + "epoch": 0.05, + "learning_rate": 4.971897999570073e-05, + "loss": 0.3538, + "step": 47867 + }, + { + "epoch": 0.05, + "learning_rate": 4.971896825138984e-05, + "loss": 0.2481, + "step": 47868 + }, + { + "epoch": 0.05, + "learning_rate": 4.971895650683493e-05, + "loss": 0.9612, + "step": 47869 + }, + { + "epoch": 0.05, + "learning_rate": 4.971894476203601e-05, + "loss": 0.9478, + "step": 47870 + }, + { + "epoch": 0.05, + "learning_rate": 4.971893301699307e-05, + "loss": 0.6166, + "step": 47871 + }, + { + "epoch": 0.05, + "learning_rate": 4.971892127170612e-05, + "loss": 1.0773, + "step": 47872 + }, + { + "epoch": 0.05, + "learning_rate": 4.971890952617515e-05, + "loss": 1.042, + "step": 47873 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718897780400166e-05, + "loss": 0.968, + "step": 47874 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718886034381167e-05, + "loss": 0.9952, + "step": 47875 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718874288118154e-05, + "loss": 0.9939, + "step": 47876 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718862541611134e-05, + "loss": 0.963, + "step": 47877 + }, + { + "epoch": 0.05, + "learning_rate": 4.971885079486009e-05, + "loss": 0.9601, + "step": 47878 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718839047865035e-05, + "loss": 0.953, + "step": 47879 + }, + { + "epoch": 0.05, + "learning_rate": 4.971882730062597e-05, + "loss": 0.9169, + "step": 47880 + }, + { + "epoch": 0.05, + "learning_rate": 4.971881555314288e-05, + "loss": 0.9104, + "step": 47881 + }, + { + "epoch": 0.05, + "learning_rate": 4.971880380541579e-05, + "loss": 0.8942, + "step": 47882 + }, + { + "epoch": 0.05, + "learning_rate": 4.971879205744468e-05, + "loss": 0.9441, + "step": 47883 + }, + { + "epoch": 0.05, + "learning_rate": 4.971878030922955e-05, + "loss": 0.802, + "step": 47884 + }, + { + "epoch": 0.05, + "learning_rate": 4.971876856077042e-05, + "loss": 1.1263, + "step": 47885 + }, + { + "epoch": 0.05, + "learning_rate": 4.971875681206727e-05, + "loss": 1.0566, + "step": 47886 + }, + { + "epoch": 0.05, + "learning_rate": 4.971874506312011e-05, + "loss": 1.2937, + "step": 47887 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718733313928936e-05, + "loss": 1.05, + "step": 47888 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718721564493744e-05, + "loss": 0.4595, + "step": 47889 + }, + { + "epoch": 0.05, + "learning_rate": 4.971870981481454e-05, + "loss": 1.145, + "step": 47890 + }, + { + "epoch": 0.05, + "learning_rate": 4.971869806489132e-05, + "loss": 0.8362, + "step": 47891 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718686314724094e-05, + "loss": 0.8462, + "step": 47892 + }, + { + "epoch": 0.05, + "learning_rate": 4.971867456431286e-05, + "loss": 0.9743, + "step": 47893 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718662813657604e-05, + "loss": 1.3106, + "step": 47894 + }, + { + "epoch": 0.05, + "learning_rate": 4.971865106275834e-05, + "loss": 0.9319, + "step": 47895 + }, + { + "epoch": 0.05, + "learning_rate": 4.971863931161507e-05, + "loss": 1.0123, + "step": 47896 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718627560227784e-05, + "loss": 1.1448, + "step": 47897 + }, + { + "epoch": 0.05, + "learning_rate": 4.971861580859648e-05, + "loss": 1.0335, + "step": 47898 + }, + { + "epoch": 0.05, + "learning_rate": 4.971860405672116e-05, + "loss": 1.2229, + "step": 47899 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718592304601844e-05, + "loss": 1.2572, + "step": 47900 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718580552238506e-05, + "loss": 0.9347, + "step": 47901 + }, + { + "epoch": 0.05, + "learning_rate": 4.971856879963116e-05, + "loss": 1.1134, + "step": 47902 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718557046779805e-05, + "loss": 1.1024, + "step": 47903 + }, + { + "epoch": 0.05, + "learning_rate": 4.971854529368444e-05, + "loss": 1.0624, + "step": 47904 + }, + { + "epoch": 0.05, + "learning_rate": 4.971853354034506e-05, + "loss": 1.1642, + "step": 47905 + }, + { + "epoch": 0.05, + "learning_rate": 4.971852178676166e-05, + "loss": 1.0014, + "step": 47906 + }, + { + "epoch": 0.05, + "learning_rate": 4.971851003293426e-05, + "loss": 1.157, + "step": 47907 + }, + { + "epoch": 0.05, + "learning_rate": 4.971849827886285e-05, + "loss": 0.9972, + "step": 47908 + }, + { + "epoch": 0.05, + "learning_rate": 4.971848652454742e-05, + "loss": 1.0393, + "step": 47909 + }, + { + "epoch": 0.05, + "learning_rate": 4.971847476998799e-05, + "loss": 1.4466, + "step": 47910 + }, + { + "epoch": 0.05, + "learning_rate": 4.971846301518454e-05, + "loss": 1.1639, + "step": 47911 + }, + { + "epoch": 0.05, + "learning_rate": 4.97184512601371e-05, + "loss": 1.3107, + "step": 47912 + }, + { + "epoch": 0.05, + "learning_rate": 4.971843950484563e-05, + "loss": 1.2166, + "step": 47913 + }, + { + "epoch": 0.05, + "learning_rate": 4.971842774931016e-05, + "loss": 0.9024, + "step": 47914 + }, + { + "epoch": 0.05, + "learning_rate": 4.971841599353067e-05, + "loss": 0.3774, + "step": 47915 + }, + { + "epoch": 0.05, + "learning_rate": 4.971840423750718e-05, + "loss": 1.0197, + "step": 47916 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718392481239675e-05, + "loss": 0.9183, + "step": 47917 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718380724728166e-05, + "loss": 0.5847, + "step": 47918 + }, + { + "epoch": 0.05, + "learning_rate": 4.971836896797264e-05, + "loss": 1.2693, + "step": 47919 + }, + { + "epoch": 0.05, + "learning_rate": 4.971835721097311e-05, + "loss": 1.1582, + "step": 47920 + }, + { + "epoch": 0.05, + "learning_rate": 4.971834545372957e-05, + "loss": 0.6291, + "step": 47921 + }, + { + "epoch": 0.05, + "learning_rate": 4.971833369624201e-05, + "loss": 1.1275, + "step": 47922 + }, + { + "epoch": 0.05, + "learning_rate": 4.971832193851046e-05, + "loss": 0.6304, + "step": 47923 + }, + { + "epoch": 0.05, + "learning_rate": 4.971831018053489e-05, + "loss": 0.651, + "step": 47924 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718298422315316e-05, + "loss": 0.9428, + "step": 47925 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718286663851735e-05, + "loss": 0.791, + "step": 47926 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718274905144134e-05, + "loss": 0.5232, + "step": 47927 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718263146192534e-05, + "loss": 0.8948, + "step": 47928 + }, + { + "epoch": 0.05, + "learning_rate": 4.971825138699692e-05, + "loss": 0.8136, + "step": 47929 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718239627557306e-05, + "loss": 0.7271, + "step": 47930 + }, + { + "epoch": 0.05, + "learning_rate": 4.971822786787368e-05, + "loss": 0.6753, + "step": 47931 + }, + { + "epoch": 0.05, + "learning_rate": 4.971821610794604e-05, + "loss": 0.4135, + "step": 47932 + }, + { + "epoch": 0.05, + "learning_rate": 4.97182043477744e-05, + "loss": 0.74, + "step": 47933 + }, + { + "epoch": 0.05, + "learning_rate": 4.971819258735875e-05, + "loss": 1.5391, + "step": 47934 + }, + { + "epoch": 0.05, + "learning_rate": 4.971818082669909e-05, + "loss": 0.7827, + "step": 47935 + }, + { + "epoch": 0.05, + "learning_rate": 4.971816906579543e-05, + "loss": 0.9459, + "step": 47936 + }, + { + "epoch": 0.05, + "learning_rate": 4.971815730464775e-05, + "loss": 0.9521, + "step": 47937 + }, + { + "epoch": 0.05, + "learning_rate": 4.971814554325607e-05, + "loss": 1.2932, + "step": 47938 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718133781620385e-05, + "loss": 1.9232, + "step": 47939 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718122019740693e-05, + "loss": 1.2605, + "step": 47940 + }, + { + "epoch": 0.05, + "learning_rate": 4.971811025761699e-05, + "loss": 0.8334, + "step": 47941 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718098495249284e-05, + "loss": 0.9239, + "step": 47942 + }, + { + "epoch": 0.05, + "learning_rate": 4.971808673263757e-05, + "loss": 1.2861, + "step": 47943 + }, + { + "epoch": 0.05, + "learning_rate": 4.971807496978185e-05, + "loss": 1.1474, + "step": 47944 + }, + { + "epoch": 0.05, + "learning_rate": 4.971806320668212e-05, + "loss": 0.9995, + "step": 47945 + }, + { + "epoch": 0.05, + "learning_rate": 4.9718051443338386e-05, + "loss": 0.9859, + "step": 47946 + }, + { + "epoch": 0.05, + "learning_rate": 4.971803967975065e-05, + "loss": 1.1634, + "step": 47947 + }, + { + "epoch": 0.05, + "learning_rate": 4.97180279159189e-05, + "loss": 0.9621, + "step": 47948 + }, + { + "epoch": 0.05, + "learning_rate": 4.971801615184315e-05, + "loss": 0.8813, + "step": 47949 + }, + { + "epoch": 0.05, + "learning_rate": 4.971800438752339e-05, + "loss": 0.9944, + "step": 47950 + }, + { + "epoch": 0.05, + "learning_rate": 4.971799262295963e-05, + "loss": 1.1877, + "step": 47951 + }, + { + "epoch": 0.05, + "learning_rate": 4.971798085815186e-05, + "loss": 1.4145, + "step": 47952 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717969093100084e-05, + "loss": 1.2354, + "step": 47953 + }, + { + "epoch": 0.05, + "learning_rate": 4.971795732780431e-05, + "loss": 1.1528, + "step": 47954 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717945562264526e-05, + "loss": 0.85, + "step": 47955 + }, + { + "epoch": 0.05, + "learning_rate": 4.971793379648073e-05, + "loss": 1.0967, + "step": 47956 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717922030452936e-05, + "loss": 1.3113, + "step": 47957 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717910264181134e-05, + "loss": 1.0339, + "step": 47958 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717898497665326e-05, + "loss": 1.3509, + "step": 47959 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717886730905525e-05, + "loss": 1.2725, + "step": 47960 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717874963901704e-05, + "loss": 1.3047, + "step": 47961 + }, + { + "epoch": 0.05, + "learning_rate": 4.971786319665389e-05, + "loss": 1.0735, + "step": 47962 + }, + { + "epoch": 0.05, + "learning_rate": 4.971785142916206e-05, + "loss": 0.8971, + "step": 47963 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717839661426236e-05, + "loss": 0.7113, + "step": 47964 + }, + { + "epoch": 0.05, + "learning_rate": 4.97178278934464e-05, + "loss": 1.2934, + "step": 47965 + }, + { + "epoch": 0.05, + "learning_rate": 4.971781612522257e-05, + "loss": 1.3044, + "step": 47966 + }, + { + "epoch": 0.05, + "learning_rate": 4.971780435675473e-05, + "loss": 1.174, + "step": 47967 + }, + { + "epoch": 0.05, + "learning_rate": 4.971779258804289e-05, + "loss": 1.518, + "step": 47968 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717780819087046e-05, + "loss": 1.202, + "step": 47969 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717769049887193e-05, + "loss": 0.7934, + "step": 47970 + }, + { + "epoch": 0.05, + "learning_rate": 4.971775728044334e-05, + "loss": 1.1278, + "step": 47971 + }, + { + "epoch": 0.05, + "learning_rate": 4.971774551075549e-05, + "loss": 0.7482, + "step": 47972 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717733740823625e-05, + "loss": 1.0826, + "step": 47973 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717721970647754e-05, + "loss": 1.2243, + "step": 47974 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717710200227897e-05, + "loss": 1.4319, + "step": 47975 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717698429564026e-05, + "loss": 0.9705, + "step": 47976 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717686658656155e-05, + "loss": 1.3915, + "step": 47977 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717674887504285e-05, + "loss": 1.2542, + "step": 47978 + }, + { + "epoch": 0.05, + "learning_rate": 4.97176631161084e-05, + "loss": 1.1164, + "step": 47979 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717651344468526e-05, + "loss": 1.4452, + "step": 47980 + }, + { + "epoch": 0.05, + "learning_rate": 4.971763957258464e-05, + "loss": 0.6406, + "step": 47981 + }, + { + "epoch": 0.05, + "learning_rate": 4.971762780045676e-05, + "loss": 0.5105, + "step": 47982 + }, + { + "epoch": 0.05, + "learning_rate": 4.971761602808488e-05, + "loss": 0.5284, + "step": 47983 + }, + { + "epoch": 0.05, + "learning_rate": 4.971760425546898e-05, + "loss": 0.7796, + "step": 47984 + }, + { + "epoch": 0.05, + "learning_rate": 4.97175924826091e-05, + "loss": 0.6912, + "step": 47985 + }, + { + "epoch": 0.05, + "learning_rate": 4.971758070950521e-05, + "loss": 0.7517, + "step": 47986 + }, + { + "epoch": 0.05, + "learning_rate": 4.971756893615732e-05, + "loss": 0.7692, + "step": 47987 + }, + { + "epoch": 0.05, + "learning_rate": 4.971755716256542e-05, + "loss": 0.3094, + "step": 47988 + }, + { + "epoch": 0.05, + "learning_rate": 4.971754538872953e-05, + "loss": 0.975, + "step": 47989 + }, + { + "epoch": 0.05, + "learning_rate": 4.971753361464964e-05, + "loss": 0.8321, + "step": 47990 + }, + { + "epoch": 0.05, + "learning_rate": 4.971752184032574e-05, + "loss": 1.2248, + "step": 47991 + }, + { + "epoch": 0.05, + "learning_rate": 4.971751006575784e-05, + "loss": 0.441, + "step": 47992 + }, + { + "epoch": 0.05, + "learning_rate": 4.971749829094594e-05, + "loss": 0.9109, + "step": 47993 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717486515890046e-05, + "loss": 0.8088, + "step": 47994 + }, + { + "epoch": 0.05, + "learning_rate": 4.971747474059014e-05, + "loss": 1.178, + "step": 47995 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717462965046235e-05, + "loss": 1.2086, + "step": 47996 + }, + { + "epoch": 0.05, + "learning_rate": 4.971745118925834e-05, + "loss": 1.0299, + "step": 47997 + }, + { + "epoch": 0.05, + "learning_rate": 4.971743941322644e-05, + "loss": 0.8505, + "step": 47998 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717427636950535e-05, + "loss": 0.9805, + "step": 47999 + }, + { + "epoch": 0.05, + "learning_rate": 4.971741586043063e-05, + "loss": 0.9851, + "step": 48000 + }, + { + "epoch": 0.05, + "eval_loss": 1.0888420343399048, + "eval_runtime": 128.4222, + "eval_samples_per_second": 10.785, + "eval_steps_per_second": 5.396, + "step": 48000 + }, + { + "epoch": 0.05, + "learning_rate": 4.971740408366673e-05, + "loss": 0.9193, + "step": 48001 + }, + { + "epoch": 0.05, + "learning_rate": 4.971739230665883e-05, + "loss": 0.6531, + "step": 48002 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717380529406933e-05, + "loss": 0.921, + "step": 48003 + }, + { + "epoch": 0.05, + "learning_rate": 4.971736875191103e-05, + "loss": 0.9173, + "step": 48004 + }, + { + "epoch": 0.05, + "learning_rate": 4.971735697417113e-05, + "loss": 1.0663, + "step": 48005 + }, + { + "epoch": 0.05, + "learning_rate": 4.971734519618723e-05, + "loss": 1.1954, + "step": 48006 + }, + { + "epoch": 0.05, + "learning_rate": 4.971733341795933e-05, + "loss": 0.9367, + "step": 48007 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717321639487436e-05, + "loss": 1.1529, + "step": 48008 + }, + { + "epoch": 0.05, + "learning_rate": 4.971730986077154e-05, + "loss": 1.0752, + "step": 48009 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717298081811647e-05, + "loss": 1.0588, + "step": 48010 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717286302607746e-05, + "loss": 0.9496, + "step": 48011 + }, + { + "epoch": 0.05, + "learning_rate": 4.971727452315986e-05, + "loss": 1.1228, + "step": 48012 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717262743467965e-05, + "loss": 0.7436, + "step": 48013 + }, + { + "epoch": 0.05, + "learning_rate": 4.971725096353207e-05, + "loss": 0.8428, + "step": 48014 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717239183352186e-05, + "loss": 1.2723, + "step": 48015 + }, + { + "epoch": 0.05, + "learning_rate": 4.97172274029283e-05, + "loss": 1.2421, + "step": 48016 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717215622260415e-05, + "loss": 1.0926, + "step": 48017 + }, + { + "epoch": 0.05, + "learning_rate": 4.971720384134854e-05, + "loss": 0.848, + "step": 48018 + }, + { + "epoch": 0.05, + "learning_rate": 4.971719206019265e-05, + "loss": 1.0233, + "step": 48019 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717180278792774e-05, + "loss": 1.1251, + "step": 48020 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717168497148903e-05, + "loss": 1.3159, + "step": 48021 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717156715261026e-05, + "loss": 1.1116, + "step": 48022 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717144933129156e-05, + "loss": 1.5984, + "step": 48023 + }, + { + "epoch": 0.05, + "learning_rate": 4.971713315075329e-05, + "loss": 2.2172, + "step": 48024 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717121368133424e-05, + "loss": 1.0451, + "step": 48025 + }, + { + "epoch": 0.05, + "learning_rate": 4.971710958526957e-05, + "loss": 0.6342, + "step": 48026 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717097802161706e-05, + "loss": 0.5851, + "step": 48027 + }, + { + "epoch": 0.05, + "learning_rate": 4.971708601880986e-05, + "loss": 0.9174, + "step": 48028 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717074235214e-05, + "loss": 1.1595, + "step": 48029 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717062451374155e-05, + "loss": 0.9544, + "step": 48030 + }, + { + "epoch": 0.05, + "learning_rate": 4.971705066729031e-05, + "loss": 0.7667, + "step": 48031 + }, + { + "epoch": 0.05, + "learning_rate": 4.971703888296246e-05, + "loss": 0.7752, + "step": 48032 + }, + { + "epoch": 0.05, + "learning_rate": 4.9717027098390635e-05, + "loss": 0.5716, + "step": 48033 + }, + { + "epoch": 0.05, + "learning_rate": 4.97170153135748e-05, + "loss": 0.7508, + "step": 48034 + }, + { + "epoch": 0.05, + "learning_rate": 4.971700352851497e-05, + "loss": 0.7066, + "step": 48035 + }, + { + "epoch": 0.05, + "learning_rate": 4.971699174321114e-05, + "loss": 0.8501, + "step": 48036 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716979957663325e-05, + "loss": 0.7771, + "step": 48037 + }, + { + "epoch": 0.05, + "learning_rate": 4.97169681718715e-05, + "loss": 1.1123, + "step": 48038 + }, + { + "epoch": 0.05, + "learning_rate": 4.971695638583569e-05, + "loss": 1.0383, + "step": 48039 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716944599555885e-05, + "loss": 1.2719, + "step": 48040 + }, + { + "epoch": 0.05, + "learning_rate": 4.971693281303208e-05, + "loss": 0.9525, + "step": 48041 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716921026264285e-05, + "loss": 0.7107, + "step": 48042 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716909239252496e-05, + "loss": 1.2465, + "step": 48043 + }, + { + "epoch": 0.05, + "learning_rate": 4.97168974519967e-05, + "loss": 1.1956, + "step": 48044 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716885664496924e-05, + "loss": 0.6923, + "step": 48045 + }, + { + "epoch": 0.05, + "learning_rate": 4.971687387675314e-05, + "loss": 0.925, + "step": 48046 + }, + { + "epoch": 0.05, + "learning_rate": 4.971686208876537e-05, + "loss": 0.9913, + "step": 48047 + }, + { + "epoch": 0.05, + "learning_rate": 4.971685030053361e-05, + "loss": 1.0654, + "step": 48048 + }, + { + "epoch": 0.05, + "learning_rate": 4.971683851205785e-05, + "loss": 0.6742, + "step": 48049 + }, + { + "epoch": 0.05, + "learning_rate": 4.971682672333809e-05, + "loss": 0.7675, + "step": 48050 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716814934374346e-05, + "loss": 1.2207, + "step": 48051 + }, + { + "epoch": 0.05, + "learning_rate": 4.97168031451666e-05, + "loss": 0.9538, + "step": 48052 + }, + { + "epoch": 0.05, + "learning_rate": 4.971679135571487e-05, + "loss": 1.0385, + "step": 48053 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716779566019136e-05, + "loss": 0.7888, + "step": 48054 + }, + { + "epoch": 0.05, + "learning_rate": 4.971676777607941e-05, + "loss": 0.9701, + "step": 48055 + }, + { + "epoch": 0.05, + "learning_rate": 4.971675598589569e-05, + "loss": 1.1385, + "step": 48056 + }, + { + "epoch": 0.05, + "learning_rate": 4.971674419546798e-05, + "loss": 1.2607, + "step": 48057 + }, + { + "epoch": 0.05, + "learning_rate": 4.971673240479627e-05, + "loss": 1.2922, + "step": 48058 + }, + { + "epoch": 0.05, + "learning_rate": 4.971672061388058e-05, + "loss": 1.239, + "step": 48059 + }, + { + "epoch": 0.05, + "learning_rate": 4.971670882272089e-05, + "loss": 1.2401, + "step": 48060 + }, + { + "epoch": 0.05, + "learning_rate": 4.97166970313172e-05, + "loss": 1.4309, + "step": 48061 + }, + { + "epoch": 0.05, + "learning_rate": 4.971668523966953e-05, + "loss": 0.9893, + "step": 48062 + }, + { + "epoch": 0.05, + "learning_rate": 4.971667344777786e-05, + "loss": 0.6353, + "step": 48063 + }, + { + "epoch": 0.05, + "learning_rate": 4.97166616556422e-05, + "loss": 0.7439, + "step": 48064 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716649863262544e-05, + "loss": 1.3575, + "step": 48065 + }, + { + "epoch": 0.05, + "learning_rate": 4.97166380706389e-05, + "loss": 1.2111, + "step": 48066 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716626277771264e-05, + "loss": 0.9689, + "step": 48067 + }, + { + "epoch": 0.05, + "learning_rate": 4.971661448465963e-05, + "loss": 1.2028, + "step": 48068 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716602691304005e-05, + "loss": 1.1523, + "step": 48069 + }, + { + "epoch": 0.05, + "learning_rate": 4.971659089770439e-05, + "loss": 0.7315, + "step": 48070 + }, + { + "epoch": 0.05, + "learning_rate": 4.971657910386078e-05, + "loss": 1.0987, + "step": 48071 + }, + { + "epoch": 0.05, + "learning_rate": 4.971656730977319e-05, + "loss": 1.1782, + "step": 48072 + }, + { + "epoch": 0.05, + "learning_rate": 4.97165555154416e-05, + "loss": 0.8375, + "step": 48073 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716543720866016e-05, + "loss": 0.991, + "step": 48074 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716531926046446e-05, + "loss": 1.0213, + "step": 48075 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716520130982877e-05, + "loss": 1.3297, + "step": 48076 + }, + { + "epoch": 0.05, + "learning_rate": 4.971650833567533e-05, + "loss": 1.3436, + "step": 48077 + }, + { + "epoch": 0.05, + "learning_rate": 4.971649654012378e-05, + "loss": 1.0207, + "step": 48078 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716484744328244e-05, + "loss": 0.9513, + "step": 48079 + }, + { + "epoch": 0.05, + "learning_rate": 4.971647294828872e-05, + "loss": 0.9409, + "step": 48080 + }, + { + "epoch": 0.05, + "learning_rate": 4.97164611520052e-05, + "loss": 1.1352, + "step": 48081 + }, + { + "epoch": 0.05, + "learning_rate": 4.971644935547769e-05, + "loss": 1.1322, + "step": 48082 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716437558706197e-05, + "loss": 1.057, + "step": 48083 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716425761690704e-05, + "loss": 1.1758, + "step": 48084 + }, + { + "epoch": 0.05, + "learning_rate": 4.971641396443123e-05, + "loss": 1.2867, + "step": 48085 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716402166927754e-05, + "loss": 1.1031, + "step": 48086 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716390369180296e-05, + "loss": 1.0397, + "step": 48087 + }, + { + "epoch": 0.05, + "learning_rate": 4.971637857118885e-05, + "loss": 1.2385, + "step": 48088 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716366772953415e-05, + "loss": 1.0673, + "step": 48089 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716354974473985e-05, + "loss": 1.2124, + "step": 48090 + }, + { + "epoch": 0.05, + "learning_rate": 4.971634317575057e-05, + "loss": 1.2033, + "step": 48091 + }, + { + "epoch": 0.05, + "learning_rate": 4.971633137678316e-05, + "loss": 1.0696, + "step": 48092 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716319577571766e-05, + "loss": 1.0772, + "step": 48093 + }, + { + "epoch": 0.05, + "learning_rate": 4.971630777811638e-05, + "loss": 1.161, + "step": 48094 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716295978417004e-05, + "loss": 1.1974, + "step": 48095 + }, + { + "epoch": 0.05, + "learning_rate": 4.971628417847364e-05, + "loss": 1.0854, + "step": 48096 + }, + { + "epoch": 0.05, + "learning_rate": 4.971627237828629e-05, + "loss": 1.6331, + "step": 48097 + }, + { + "epoch": 0.05, + "learning_rate": 4.971626057785495e-05, + "loss": 1.1804, + "step": 48098 + }, + { + "epoch": 0.05, + "learning_rate": 4.971624877717962e-05, + "loss": 1.0956, + "step": 48099 + }, + { + "epoch": 0.05, + "learning_rate": 4.971623697626031e-05, + "loss": 1.1068, + "step": 48100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716225175096996e-05, + "loss": 1.0246, + "step": 48101 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716213373689705e-05, + "loss": 1.0875, + "step": 48102 + }, + { + "epoch": 0.05, + "learning_rate": 4.971620157203842e-05, + "loss": 1.1142, + "step": 48103 + }, + { + "epoch": 0.05, + "learning_rate": 4.971618977014316e-05, + "loss": 1.1488, + "step": 48104 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716177968003896e-05, + "loss": 0.7732, + "step": 48105 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716166165620654e-05, + "loss": 0.9587, + "step": 48106 + }, + { + "epoch": 0.05, + "learning_rate": 4.971615436299342e-05, + "loss": 1.0427, + "step": 48107 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716142560122205e-05, + "loss": 0.9674, + "step": 48108 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716130757007e-05, + "loss": 0.9675, + "step": 48109 + }, + { + "epoch": 0.05, + "learning_rate": 4.97161189536478e-05, + "loss": 1.0815, + "step": 48110 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716107150044625e-05, + "loss": 1.0884, + "step": 48111 + }, + { + "epoch": 0.05, + "learning_rate": 4.971609534619745e-05, + "loss": 1.1165, + "step": 48112 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716083542106294e-05, + "loss": 0.9338, + "step": 48113 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716071737771156e-05, + "loss": 0.9524, + "step": 48114 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716059933192025e-05, + "loss": 1.2227, + "step": 48115 + }, + { + "epoch": 0.05, + "learning_rate": 4.971604812836891e-05, + "loss": 1.0319, + "step": 48116 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716036323301805e-05, + "loss": 0.9361, + "step": 48117 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716024517990716e-05, + "loss": 1.0951, + "step": 48118 + }, + { + "epoch": 0.05, + "learning_rate": 4.971601271243565e-05, + "loss": 0.8248, + "step": 48119 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716000906636585e-05, + "loss": 1.2658, + "step": 48120 + }, + { + "epoch": 0.05, + "learning_rate": 4.971598910059354e-05, + "loss": 1.2088, + "step": 48121 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715977294306504e-05, + "loss": 0.8959, + "step": 48122 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715965487775484e-05, + "loss": 1.2912, + "step": 48123 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715953681000484e-05, + "loss": 1.1104, + "step": 48124 + }, + { + "epoch": 0.05, + "learning_rate": 4.97159418739815e-05, + "loss": 1.1616, + "step": 48125 + }, + { + "epoch": 0.05, + "learning_rate": 4.971593006671852e-05, + "loss": 1.037, + "step": 48126 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715918259211556e-05, + "loss": 1.0769, + "step": 48127 + }, + { + "epoch": 0.05, + "learning_rate": 4.971590645146061e-05, + "loss": 1.1278, + "step": 48128 + }, + { + "epoch": 0.05, + "learning_rate": 4.971589464346569e-05, + "loss": 1.5623, + "step": 48129 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715882835226765e-05, + "loss": 1.2584, + "step": 48130 + }, + { + "epoch": 0.05, + "learning_rate": 4.971587102674387e-05, + "loss": 1.1203, + "step": 48131 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715859218016994e-05, + "loss": 0.8478, + "step": 48132 + }, + { + "epoch": 0.05, + "learning_rate": 4.971584740904612e-05, + "loss": 1.1956, + "step": 48133 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715835599831266e-05, + "loss": 0.994, + "step": 48134 + }, + { + "epoch": 0.05, + "learning_rate": 4.971582379037243e-05, + "loss": 0.7587, + "step": 48135 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715811980669606e-05, + "loss": 0.972, + "step": 48136 + }, + { + "epoch": 0.05, + "learning_rate": 4.971580017072281e-05, + "loss": 1.249, + "step": 48137 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715788360532015e-05, + "loss": 1.0299, + "step": 48138 + }, + { + "epoch": 0.05, + "learning_rate": 4.971577655009724e-05, + "loss": 1.0984, + "step": 48139 + }, + { + "epoch": 0.05, + "learning_rate": 4.971576473941848e-05, + "loss": 0.9546, + "step": 48140 + }, + { + "epoch": 0.05, + "learning_rate": 4.971575292849574e-05, + "loss": 0.9746, + "step": 48141 + }, + { + "epoch": 0.05, + "learning_rate": 4.971574111732902e-05, + "loss": 0.6616, + "step": 48142 + }, + { + "epoch": 0.05, + "learning_rate": 4.971572930591831e-05, + "loss": 0.7333, + "step": 48143 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715717494263615e-05, + "loss": 0.606, + "step": 48144 + }, + { + "epoch": 0.05, + "learning_rate": 4.971570568236494e-05, + "loss": 0.3911, + "step": 48145 + }, + { + "epoch": 0.05, + "learning_rate": 4.971569387022229e-05, + "loss": 0.9839, + "step": 48146 + }, + { + "epoch": 0.05, + "learning_rate": 4.971568205783565e-05, + "loss": 0.8897, + "step": 48147 + }, + { + "epoch": 0.05, + "learning_rate": 4.971567024520503e-05, + "loss": 0.9609, + "step": 48148 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715658432330415e-05, + "loss": 1.1224, + "step": 48149 + }, + { + "epoch": 0.05, + "learning_rate": 4.971564661921183e-05, + "loss": 0.5149, + "step": 48150 + }, + { + "epoch": 0.05, + "learning_rate": 4.971563480584926e-05, + "loss": 0.461, + "step": 48151 + }, + { + "epoch": 0.05, + "learning_rate": 4.971562299224271e-05, + "loss": 1.1209, + "step": 48152 + }, + { + "epoch": 0.05, + "learning_rate": 4.971561117839217e-05, + "loss": 0.7044, + "step": 48153 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715599364297655e-05, + "loss": 0.9387, + "step": 48154 + }, + { + "epoch": 0.05, + "learning_rate": 4.971558754995915e-05, + "loss": 0.7735, + "step": 48155 + }, + { + "epoch": 0.05, + "learning_rate": 4.971557573537667e-05, + "loss": 0.7732, + "step": 48156 + }, + { + "epoch": 0.05, + "learning_rate": 4.971556392055021e-05, + "loss": 0.7859, + "step": 48157 + }, + { + "epoch": 0.05, + "learning_rate": 4.971555210547977e-05, + "loss": 0.9281, + "step": 48158 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715540290165344e-05, + "loss": 1.1158, + "step": 48159 + }, + { + "epoch": 0.05, + "learning_rate": 4.971552847460693e-05, + "loss": 1.2478, + "step": 48160 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715516658804546e-05, + "loss": 0.8851, + "step": 48161 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715504842758175e-05, + "loss": 0.7418, + "step": 48162 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715493026467824e-05, + "loss": 1.4344, + "step": 48163 + }, + { + "epoch": 0.05, + "learning_rate": 4.971548120993349e-05, + "loss": 0.8717, + "step": 48164 + }, + { + "epoch": 0.05, + "learning_rate": 4.971546939315518e-05, + "loss": 0.9968, + "step": 48165 + }, + { + "epoch": 0.05, + "learning_rate": 4.971545757613288e-05, + "loss": 1.0082, + "step": 48166 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715445758866614e-05, + "loss": 0.8914, + "step": 48167 + }, + { + "epoch": 0.05, + "learning_rate": 4.971543394135636e-05, + "loss": 0.888, + "step": 48168 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715422123602126e-05, + "loss": 1.032, + "step": 48169 + }, + { + "epoch": 0.05, + "learning_rate": 4.971541030560391e-05, + "loss": 1.2738, + "step": 48170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715398487361714e-05, + "loss": 0.8466, + "step": 48171 + }, + { + "epoch": 0.05, + "learning_rate": 4.971538666887554e-05, + "loss": 1.399, + "step": 48172 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715374850145384e-05, + "loss": 1.1684, + "step": 48173 + }, + { + "epoch": 0.05, + "learning_rate": 4.971536303117125e-05, + "loss": 1.1017, + "step": 48174 + }, + { + "epoch": 0.05, + "learning_rate": 4.971535121195313e-05, + "loss": 1.1232, + "step": 48175 + }, + { + "epoch": 0.05, + "learning_rate": 4.971533939249105e-05, + "loss": 1.0426, + "step": 48176 + }, + { + "epoch": 0.05, + "learning_rate": 4.971532757278497e-05, + "loss": 1.197, + "step": 48177 + }, + { + "epoch": 0.05, + "learning_rate": 4.971531575283491e-05, + "loss": 1.2603, + "step": 48178 + }, + { + "epoch": 0.05, + "learning_rate": 4.971530393264088e-05, + "loss": 1.0025, + "step": 48179 + }, + { + "epoch": 0.05, + "learning_rate": 4.971529211220287e-05, + "loss": 0.9819, + "step": 48180 + }, + { + "epoch": 0.05, + "learning_rate": 4.971528029152088e-05, + "loss": 0.7587, + "step": 48181 + }, + { + "epoch": 0.05, + "learning_rate": 4.971526847059492e-05, + "loss": 1.1808, + "step": 48182 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715256649424966e-05, + "loss": 1.0024, + "step": 48183 + }, + { + "epoch": 0.05, + "learning_rate": 4.971524482801104e-05, + "loss": 1.0354, + "step": 48184 + }, + { + "epoch": 0.05, + "learning_rate": 4.971523300635313e-05, + "loss": 1.1664, + "step": 48185 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715221184451255e-05, + "loss": 1.2295, + "step": 48186 + }, + { + "epoch": 0.05, + "learning_rate": 4.971520936230539e-05, + "loss": 0.8573, + "step": 48187 + }, + { + "epoch": 0.05, + "learning_rate": 4.971519753991556e-05, + "loss": 1.0284, + "step": 48188 + }, + { + "epoch": 0.05, + "learning_rate": 4.971518571728174e-05, + "loss": 0.7699, + "step": 48189 + }, + { + "epoch": 0.05, + "learning_rate": 4.971517389440394e-05, + "loss": 0.8879, + "step": 48190 + }, + { + "epoch": 0.05, + "learning_rate": 4.971516207128217e-05, + "loss": 1.0769, + "step": 48191 + }, + { + "epoch": 0.05, + "learning_rate": 4.971515024791642e-05, + "loss": 1.0179, + "step": 48192 + }, + { + "epoch": 0.05, + "learning_rate": 4.971513842430669e-05, + "loss": 0.9185, + "step": 48193 + }, + { + "epoch": 0.05, + "learning_rate": 4.971512660045299e-05, + "loss": 1.1658, + "step": 48194 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715114776355305e-05, + "loss": 1.081, + "step": 48195 + }, + { + "epoch": 0.05, + "learning_rate": 4.971510295201364e-05, + "loss": 1.241, + "step": 48196 + }, + { + "epoch": 0.05, + "learning_rate": 4.9715091127428006e-05, + "loss": 1.2099, + "step": 48197 + }, + { + "epoch": 0.05, + "learning_rate": 4.971507930259839e-05, + "loss": 1.2638, + "step": 48198 + }, + { + "epoch": 0.05, + "learning_rate": 4.97150674775248e-05, + "loss": 0.7353, + "step": 48199 + }, + { + "epoch": 0.05, + "learning_rate": 4.971505565220723e-05, + "loss": 0.7259, + "step": 48200 + }, + { + "epoch": 0.05, + "learning_rate": 4.971504382664569e-05, + "loss": 0.6202, + "step": 48201 + }, + { + "epoch": 0.05, + "learning_rate": 4.971503200084017e-05, + "loss": 0.7004, + "step": 48202 + }, + { + "epoch": 0.05, + "learning_rate": 4.971502017479067e-05, + "loss": 0.6727, + "step": 48203 + }, + { + "epoch": 0.05, + "learning_rate": 4.97150083484972e-05, + "loss": 0.8411, + "step": 48204 + }, + { + "epoch": 0.05, + "learning_rate": 4.971499652195975e-05, + "loss": 0.7956, + "step": 48205 + }, + { + "epoch": 0.05, + "learning_rate": 4.971498469517832e-05, + "loss": 0.6019, + "step": 48206 + }, + { + "epoch": 0.05, + "learning_rate": 4.971497286815292e-05, + "loss": 0.7997, + "step": 48207 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714961040883544e-05, + "loss": 0.4504, + "step": 48208 + }, + { + "epoch": 0.05, + "learning_rate": 4.971494921337019e-05, + "loss": 0.5794, + "step": 48209 + }, + { + "epoch": 0.05, + "learning_rate": 4.971493738561286e-05, + "loss": 0.6477, + "step": 48210 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714925557611556e-05, + "loss": 0.6188, + "step": 48211 + }, + { + "epoch": 0.05, + "learning_rate": 4.971491372936628e-05, + "loss": 0.6497, + "step": 48212 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714901900877026e-05, + "loss": 1.0867, + "step": 48213 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714890072143795e-05, + "loss": 1.1996, + "step": 48214 + }, + { + "epoch": 0.05, + "learning_rate": 4.971487824316659e-05, + "loss": 1.1492, + "step": 48215 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714866413945416e-05, + "loss": 1.1247, + "step": 48216 + }, + { + "epoch": 0.05, + "learning_rate": 4.971485458448026e-05, + "loss": 1.2363, + "step": 48217 + }, + { + "epoch": 0.05, + "learning_rate": 4.971484275477113e-05, + "loss": 0.8724, + "step": 48218 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714830924818025e-05, + "loss": 0.6603, + "step": 48219 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714819094620945e-05, + "loss": 1.1059, + "step": 48220 + }, + { + "epoch": 0.05, + "learning_rate": 4.97148072641799e-05, + "loss": 1.0778, + "step": 48221 + }, + { + "epoch": 0.05, + "learning_rate": 4.971479543349487e-05, + "loss": 0.5477, + "step": 48222 + }, + { + "epoch": 0.05, + "learning_rate": 4.971478360256587e-05, + "loss": 1.1179, + "step": 48223 + }, + { + "epoch": 0.05, + "learning_rate": 4.97147717713929e-05, + "loss": 1.2839, + "step": 48224 + }, + { + "epoch": 0.05, + "learning_rate": 4.971475993997595e-05, + "loss": 0.8564, + "step": 48225 + }, + { + "epoch": 0.05, + "learning_rate": 4.971474810831502e-05, + "loss": 0.7296, + "step": 48226 + }, + { + "epoch": 0.05, + "learning_rate": 4.971473627641013e-05, + "loss": 0.8988, + "step": 48227 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714724444261265e-05, + "loss": 0.5811, + "step": 48228 + }, + { + "epoch": 0.05, + "learning_rate": 4.971471261186842e-05, + "loss": 0.967, + "step": 48229 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714700779231606e-05, + "loss": 0.6166, + "step": 48230 + }, + { + "epoch": 0.05, + "learning_rate": 4.971468894635082e-05, + "loss": 1.0583, + "step": 48231 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714677113226056e-05, + "loss": 0.9072, + "step": 48232 + }, + { + "epoch": 0.05, + "learning_rate": 4.971466527985732e-05, + "loss": 1.0988, + "step": 48233 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714653446244616e-05, + "loss": 0.9428, + "step": 48234 + }, + { + "epoch": 0.05, + "learning_rate": 4.971464161238793e-05, + "loss": 1.2128, + "step": 48235 + }, + { + "epoch": 0.05, + "learning_rate": 4.971462977828728e-05, + "loss": 1.1482, + "step": 48236 + }, + { + "epoch": 0.05, + "learning_rate": 4.971461794394265e-05, + "loss": 1.0798, + "step": 48237 + }, + { + "epoch": 0.05, + "learning_rate": 4.971460610935406e-05, + "loss": 0.6435, + "step": 48238 + }, + { + "epoch": 0.05, + "learning_rate": 4.971459427452149e-05, + "loss": 0.8129, + "step": 48239 + }, + { + "epoch": 0.05, + "learning_rate": 4.971458243944495e-05, + "loss": 1.2694, + "step": 48240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714570604124435e-05, + "loss": 0.9699, + "step": 48241 + }, + { + "epoch": 0.05, + "learning_rate": 4.971455876855995e-05, + "loss": 0.9653, + "step": 48242 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714546932751495e-05, + "loss": 1.0966, + "step": 48243 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714535096699066e-05, + "loss": 1.0625, + "step": 48244 + }, + { + "epoch": 0.05, + "learning_rate": 4.971452326040267e-05, + "loss": 1.3763, + "step": 48245 + }, + { + "epoch": 0.05, + "learning_rate": 4.97145114238623e-05, + "loss": 1.6163, + "step": 48246 + }, + { + "epoch": 0.05, + "learning_rate": 4.971449958707796e-05, + "loss": 1.5436, + "step": 48247 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714487750049644e-05, + "loss": 1.3392, + "step": 48248 + }, + { + "epoch": 0.05, + "learning_rate": 4.971447591277736e-05, + "loss": 1.5116, + "step": 48249 + }, + { + "epoch": 0.05, + "learning_rate": 4.97144640752611e-05, + "loss": 1.5839, + "step": 48250 + }, + { + "epoch": 0.05, + "learning_rate": 4.971445223750088e-05, + "loss": 1.2809, + "step": 48251 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714440399496675e-05, + "loss": 1.2041, + "step": 48252 + }, + { + "epoch": 0.05, + "learning_rate": 4.971442856124851e-05, + "loss": 1.6521, + "step": 48253 + }, + { + "epoch": 0.05, + "learning_rate": 4.971441672275637e-05, + "loss": 1.3729, + "step": 48254 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714404884020264e-05, + "loss": 1.483, + "step": 48255 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714393045040185e-05, + "loss": 1.3114, + "step": 48256 + }, + { + "epoch": 0.05, + "learning_rate": 4.971438120581614e-05, + "loss": 1.4574, + "step": 48257 + }, + { + "epoch": 0.05, + "learning_rate": 4.971436936634812e-05, + "loss": 1.2347, + "step": 48258 + }, + { + "epoch": 0.05, + "learning_rate": 4.971435752663614e-05, + "loss": 1.3955, + "step": 48259 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714345686680174e-05, + "loss": 1.3422, + "step": 48260 + }, + { + "epoch": 0.05, + "learning_rate": 4.971433384648025e-05, + "loss": 1.4578, + "step": 48261 + }, + { + "epoch": 0.05, + "learning_rate": 4.971432200603635e-05, + "loss": 1.384, + "step": 48262 + }, + { + "epoch": 0.05, + "learning_rate": 4.971431016534849e-05, + "loss": 1.1995, + "step": 48263 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714298324416655e-05, + "loss": 1.3643, + "step": 48264 + }, + { + "epoch": 0.05, + "learning_rate": 4.971428648324085e-05, + "loss": 1.12, + "step": 48265 + }, + { + "epoch": 0.05, + "learning_rate": 4.971427464182107e-05, + "loss": 0.9777, + "step": 48266 + }, + { + "epoch": 0.05, + "learning_rate": 4.971426280015733e-05, + "loss": 0.9851, + "step": 48267 + }, + { + "epoch": 0.05, + "learning_rate": 4.971425095824962e-05, + "loss": 0.6717, + "step": 48268 + }, + { + "epoch": 0.05, + "learning_rate": 4.971423911609794e-05, + "loss": 0.7369, + "step": 48269 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714227273702284e-05, + "loss": 1.0874, + "step": 48270 + }, + { + "epoch": 0.05, + "learning_rate": 4.971421543106267e-05, + "loss": 0.8166, + "step": 48271 + }, + { + "epoch": 0.05, + "learning_rate": 4.971420358817909e-05, + "loss": 1.1024, + "step": 48272 + }, + { + "epoch": 0.05, + "learning_rate": 4.971419174505153e-05, + "loss": 0.8955, + "step": 48273 + }, + { + "epoch": 0.05, + "learning_rate": 4.971417990168001e-05, + "loss": 0.9641, + "step": 48274 + }, + { + "epoch": 0.05, + "learning_rate": 4.971416805806452e-05, + "loss": 0.6695, + "step": 48275 + }, + { + "epoch": 0.05, + "learning_rate": 4.971415621420506e-05, + "loss": 1.1274, + "step": 48276 + }, + { + "epoch": 0.05, + "learning_rate": 4.971414437010163e-05, + "loss": 1.0125, + "step": 48277 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714132525754245e-05, + "loss": 0.9267, + "step": 48278 + }, + { + "epoch": 0.05, + "learning_rate": 4.971412068116288e-05, + "loss": 0.9961, + "step": 48279 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714108836327556e-05, + "loss": 1.3077, + "step": 48280 + }, + { + "epoch": 0.05, + "learning_rate": 4.971409699124826e-05, + "loss": 1.3257, + "step": 48281 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714085145925e-05, + "loss": 0.7463, + "step": 48282 + }, + { + "epoch": 0.05, + "learning_rate": 4.971407330035777e-05, + "loss": 1.3837, + "step": 48283 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714061454546576e-05, + "loss": 1.2864, + "step": 48284 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714049608491406e-05, + "loss": 0.8601, + "step": 48285 + }, + { + "epoch": 0.05, + "learning_rate": 4.9714037762192284e-05, + "loss": 1.1816, + "step": 48286 + }, + { + "epoch": 0.05, + "learning_rate": 4.971402591564919e-05, + "loss": 1.0047, + "step": 48287 + }, + { + "epoch": 0.05, + "learning_rate": 4.971401406886212e-05, + "loss": 0.8906, + "step": 48288 + }, + { + "epoch": 0.05, + "learning_rate": 4.971400222183109e-05, + "loss": 0.8717, + "step": 48289 + }, + { + "epoch": 0.05, + "learning_rate": 4.97139903745561e-05, + "loss": 0.8148, + "step": 48290 + }, + { + "epoch": 0.05, + "learning_rate": 4.971397852703714e-05, + "loss": 0.7582, + "step": 48291 + }, + { + "epoch": 0.05, + "learning_rate": 4.971396667927421e-05, + "loss": 0.995, + "step": 48292 + }, + { + "epoch": 0.05, + "learning_rate": 4.971395483126732e-05, + "loss": 1.0577, + "step": 48293 + }, + { + "epoch": 0.05, + "learning_rate": 4.971394298301646e-05, + "loss": 0.6799, + "step": 48294 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713931134521636e-05, + "loss": 1.0777, + "step": 48295 + }, + { + "epoch": 0.05, + "learning_rate": 4.971391928578284e-05, + "loss": 0.4303, + "step": 48296 + }, + { + "epoch": 0.05, + "learning_rate": 4.971390743680009e-05, + "loss": 0.6735, + "step": 48297 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713895587573364e-05, + "loss": 0.8147, + "step": 48298 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713883738102686e-05, + "loss": 0.789, + "step": 48299 + }, + { + "epoch": 0.05, + "learning_rate": 4.971387188838803e-05, + "loss": 1.0791, + "step": 48300 + }, + { + "epoch": 0.05, + "learning_rate": 4.971386003842941e-05, + "loss": 1.0116, + "step": 48301 + }, + { + "epoch": 0.05, + "learning_rate": 4.971384818822684e-05, + "loss": 1.331, + "step": 48302 + }, + { + "epoch": 0.05, + "learning_rate": 4.971383633778029e-05, + "loss": 1.1009, + "step": 48303 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713824487089777e-05, + "loss": 1.3708, + "step": 48304 + }, + { + "epoch": 0.05, + "learning_rate": 4.971381263615531e-05, + "loss": 1.1146, + "step": 48305 + }, + { + "epoch": 0.05, + "learning_rate": 4.971380078497687e-05, + "loss": 1.5334, + "step": 48306 + }, + { + "epoch": 0.05, + "learning_rate": 4.971378893355446e-05, + "loss": 0.8024, + "step": 48307 + }, + { + "epoch": 0.05, + "learning_rate": 4.97137770818881e-05, + "loss": 1.0016, + "step": 48308 + }, + { + "epoch": 0.05, + "learning_rate": 4.971376522997777e-05, + "loss": 0.6935, + "step": 48309 + }, + { + "epoch": 0.05, + "learning_rate": 4.971375337782347e-05, + "loss": 0.6951, + "step": 48310 + }, + { + "epoch": 0.05, + "learning_rate": 4.971374152542522e-05, + "loss": 0.4637, + "step": 48311 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713729672782994e-05, + "loss": 0.6042, + "step": 48312 + }, + { + "epoch": 0.05, + "learning_rate": 4.97137178198968e-05, + "loss": 1.4435, + "step": 48313 + }, + { + "epoch": 0.05, + "learning_rate": 4.971370596676666e-05, + "loss": 1.0431, + "step": 48314 + }, + { + "epoch": 0.05, + "learning_rate": 4.971369411339254e-05, + "loss": 1.0038, + "step": 48315 + }, + { + "epoch": 0.05, + "learning_rate": 4.971368225977447e-05, + "loss": 0.8734, + "step": 48316 + }, + { + "epoch": 0.05, + "learning_rate": 4.971367040591243e-05, + "loss": 1.1688, + "step": 48317 + }, + { + "epoch": 0.05, + "learning_rate": 4.971365855180643e-05, + "loss": 1.2204, + "step": 48318 + }, + { + "epoch": 0.05, + "learning_rate": 4.971364669745646e-05, + "loss": 1.1594, + "step": 48319 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713634842862534e-05, + "loss": 0.9076, + "step": 48320 + }, + { + "epoch": 0.05, + "learning_rate": 4.971362298802464e-05, + "loss": 0.8451, + "step": 48321 + }, + { + "epoch": 0.05, + "learning_rate": 4.971361113294279e-05, + "loss": 1.292, + "step": 48322 + }, + { + "epoch": 0.05, + "learning_rate": 4.971359927761698e-05, + "loss": 1.3105, + "step": 48323 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713587422047206e-05, + "loss": 1.0459, + "step": 48324 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713575566233464e-05, + "loss": 1.1468, + "step": 48325 + }, + { + "epoch": 0.05, + "learning_rate": 4.971356371017576e-05, + "loss": 0.8585, + "step": 48326 + }, + { + "epoch": 0.05, + "learning_rate": 4.97135518538741e-05, + "loss": 0.9773, + "step": 48327 + }, + { + "epoch": 0.05, + "learning_rate": 4.971353999732847e-05, + "loss": 0.6794, + "step": 48328 + }, + { + "epoch": 0.05, + "learning_rate": 4.971352814053889e-05, + "loss": 0.9444, + "step": 48329 + }, + { + "epoch": 0.05, + "learning_rate": 4.971351628350534e-05, + "loss": 0.3243, + "step": 48330 + }, + { + "epoch": 0.05, + "learning_rate": 4.971350442622783e-05, + "loss": 0.7521, + "step": 48331 + }, + { + "epoch": 0.05, + "learning_rate": 4.971349256870636e-05, + "loss": 1.1423, + "step": 48332 + }, + { + "epoch": 0.05, + "learning_rate": 4.971348071094093e-05, + "loss": 0.8524, + "step": 48333 + }, + { + "epoch": 0.05, + "learning_rate": 4.971346885293153e-05, + "loss": 1.1634, + "step": 48334 + }, + { + "epoch": 0.05, + "learning_rate": 4.971345699467818e-05, + "loss": 1.3447, + "step": 48335 + }, + { + "epoch": 0.05, + "learning_rate": 4.971344513618087e-05, + "loss": 1.186, + "step": 48336 + }, + { + "epoch": 0.05, + "learning_rate": 4.971343327743959e-05, + "loss": 0.987, + "step": 48337 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713421418454356e-05, + "loss": 1.3992, + "step": 48338 + }, + { + "epoch": 0.05, + "learning_rate": 4.971340955922516e-05, + "loss": 1.214, + "step": 48339 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713397699752006e-05, + "loss": 0.7314, + "step": 48340 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713385840034886e-05, + "loss": 1.2193, + "step": 48341 + }, + { + "epoch": 0.05, + "learning_rate": 4.971337398007381e-05, + "loss": 1.152, + "step": 48342 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713362119868776e-05, + "loss": 1.4183, + "step": 48343 + }, + { + "epoch": 0.05, + "learning_rate": 4.971335025941978e-05, + "loss": 0.946, + "step": 48344 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713338398726816e-05, + "loss": 0.7581, + "step": 48345 + }, + { + "epoch": 0.05, + "learning_rate": 4.97133265377899e-05, + "loss": 0.9171, + "step": 48346 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713314676609026e-05, + "loss": 1.0617, + "step": 48347 + }, + { + "epoch": 0.05, + "learning_rate": 4.971330281518419e-05, + "loss": 0.9865, + "step": 48348 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713290953515394e-05, + "loss": 1.0254, + "step": 48349 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713279091602635e-05, + "loss": 0.998, + "step": 48350 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713267229445925e-05, + "loss": 1.0926, + "step": 48351 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713255367045255e-05, + "loss": 1.0854, + "step": 48352 + }, + { + "epoch": 0.05, + "learning_rate": 4.971324350440063e-05, + "loss": 1.2054, + "step": 48353 + }, + { + "epoch": 0.05, + "learning_rate": 4.971323164151204e-05, + "loss": 1.0275, + "step": 48354 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713219778379486e-05, + "loss": 0.9868, + "step": 48355 + }, + { + "epoch": 0.05, + "learning_rate": 4.971320791500298e-05, + "loss": 0.888, + "step": 48356 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713196051382516e-05, + "loss": 0.9873, + "step": 48357 + }, + { + "epoch": 0.05, + "learning_rate": 4.971318418751809e-05, + "loss": 1.0779, + "step": 48358 + }, + { + "epoch": 0.05, + "learning_rate": 4.971317232340971e-05, + "loss": 0.9165, + "step": 48359 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713160459057374e-05, + "loss": 0.9478, + "step": 48360 + }, + { + "epoch": 0.05, + "learning_rate": 4.971314859446107e-05, + "loss": 1.0385, + "step": 48361 + }, + { + "epoch": 0.05, + "learning_rate": 4.971313672962082e-05, + "loss": 1.1668, + "step": 48362 + }, + { + "epoch": 0.05, + "learning_rate": 4.971312486453661e-05, + "loss": 1.0975, + "step": 48363 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713112999208437e-05, + "loss": 0.9315, + "step": 48364 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713101133636306e-05, + "loss": 0.9141, + "step": 48365 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713089267820224e-05, + "loss": 0.9021, + "step": 48366 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713077401760175e-05, + "loss": 1.1267, + "step": 48367 + }, + { + "epoch": 0.05, + "learning_rate": 4.971306553545618e-05, + "loss": 1.3559, + "step": 48368 + }, + { + "epoch": 0.05, + "learning_rate": 4.971305366890823e-05, + "loss": 1.1094, + "step": 48369 + }, + { + "epoch": 0.05, + "learning_rate": 4.971304180211631e-05, + "loss": 1.0248, + "step": 48370 + }, + { + "epoch": 0.05, + "learning_rate": 4.971302993508044e-05, + "loss": 1.1411, + "step": 48371 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713018067800616e-05, + "loss": 0.7277, + "step": 48372 + }, + { + "epoch": 0.05, + "learning_rate": 4.9713006200276834e-05, + "loss": 1.1599, + "step": 48373 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712994332509086e-05, + "loss": 0.9232, + "step": 48374 + }, + { + "epoch": 0.05, + "learning_rate": 4.971298246449739e-05, + "loss": 0.8142, + "step": 48375 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712970596241746e-05, + "loss": 0.8411, + "step": 48376 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712958727742134e-05, + "loss": 0.9631, + "step": 48377 + }, + { + "epoch": 0.05, + "learning_rate": 4.971294685899857e-05, + "loss": 0.8334, + "step": 48378 + }, + { + "epoch": 0.05, + "learning_rate": 4.971293499001105e-05, + "loss": 0.9917, + "step": 48379 + }, + { + "epoch": 0.05, + "learning_rate": 4.971292312077957e-05, + "loss": 0.7265, + "step": 48380 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712911251304145e-05, + "loss": 1.0332, + "step": 48381 + }, + { + "epoch": 0.05, + "learning_rate": 4.971289938158476e-05, + "loss": 0.8594, + "step": 48382 + }, + { + "epoch": 0.05, + "learning_rate": 4.971288751162142e-05, + "loss": 0.9658, + "step": 48383 + }, + { + "epoch": 0.05, + "learning_rate": 4.971287564141412e-05, + "loss": 0.601, + "step": 48384 + }, + { + "epoch": 0.05, + "learning_rate": 4.971286377096287e-05, + "loss": 1.1517, + "step": 48385 + }, + { + "epoch": 0.05, + "learning_rate": 4.971285190026766e-05, + "loss": 0.9735, + "step": 48386 + }, + { + "epoch": 0.05, + "learning_rate": 4.97128400293285e-05, + "loss": 1.0634, + "step": 48387 + }, + { + "epoch": 0.05, + "learning_rate": 4.971282815814539e-05, + "loss": 1.1216, + "step": 48388 + }, + { + "epoch": 0.05, + "learning_rate": 4.971281628671831e-05, + "loss": 1.0983, + "step": 48389 + }, + { + "epoch": 0.05, + "learning_rate": 4.971280441504729e-05, + "loss": 1.0593, + "step": 48390 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712792543132306e-05, + "loss": 0.9286, + "step": 48391 + }, + { + "epoch": 0.05, + "learning_rate": 4.971278067097337e-05, + "loss": 1.1897, + "step": 48392 + }, + { + "epoch": 0.05, + "learning_rate": 4.971276879857049e-05, + "loss": 1.196, + "step": 48393 + }, + { + "epoch": 0.05, + "learning_rate": 4.971275692592364e-05, + "loss": 1.1402, + "step": 48394 + }, + { + "epoch": 0.05, + "learning_rate": 4.971274505303285e-05, + "loss": 0.7911, + "step": 48395 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712733179898095e-05, + "loss": 1.1035, + "step": 48396 + }, + { + "epoch": 0.05, + "learning_rate": 4.97127213065194e-05, + "loss": 1.1298, + "step": 48397 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712709432896734e-05, + "loss": 1.1184, + "step": 48398 + }, + { + "epoch": 0.05, + "learning_rate": 4.971269755903012e-05, + "loss": 0.939, + "step": 48399 + }, + { + "epoch": 0.05, + "learning_rate": 4.971268568491956e-05, + "loss": 1.0835, + "step": 48400 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712673810565044e-05, + "loss": 0.6795, + "step": 48401 + }, + { + "epoch": 0.05, + "learning_rate": 4.971266193596657e-05, + "loss": 0.3941, + "step": 48402 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712650061124146e-05, + "loss": 1.2219, + "step": 48403 + }, + { + "epoch": 0.05, + "learning_rate": 4.971263818603777e-05, + "loss": 1.2746, + "step": 48404 + }, + { + "epoch": 0.05, + "learning_rate": 4.971262631070745e-05, + "loss": 1.0386, + "step": 48405 + }, + { + "epoch": 0.05, + "learning_rate": 4.971261443513316e-05, + "loss": 0.9362, + "step": 48406 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712602559314925e-05, + "loss": 0.7915, + "step": 48407 + }, + { + "epoch": 0.05, + "learning_rate": 4.971259068325274e-05, + "loss": 1.1762, + "step": 48408 + }, + { + "epoch": 0.05, + "learning_rate": 4.97125788069466e-05, + "loss": 1.0496, + "step": 48409 + }, + { + "epoch": 0.05, + "learning_rate": 4.971256693039651e-05, + "loss": 1.3837, + "step": 48410 + }, + { + "epoch": 0.05, + "learning_rate": 4.971255505360246e-05, + "loss": 1.1336, + "step": 48411 + }, + { + "epoch": 0.05, + "learning_rate": 4.971254317656447e-05, + "loss": 1.1903, + "step": 48412 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712531299282524e-05, + "loss": 1.3855, + "step": 48413 + }, + { + "epoch": 0.05, + "learning_rate": 4.971251942175662e-05, + "loss": 0.9151, + "step": 48414 + }, + { + "epoch": 0.05, + "learning_rate": 4.971250754398677e-05, + "loss": 1.0028, + "step": 48415 + }, + { + "epoch": 0.05, + "learning_rate": 4.971249566597297e-05, + "loss": 0.9208, + "step": 48416 + }, + { + "epoch": 0.05, + "learning_rate": 4.971248378771521e-05, + "loss": 1.1445, + "step": 48417 + }, + { + "epoch": 0.05, + "learning_rate": 4.971247190921351e-05, + "loss": 1.1349, + "step": 48418 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712460030467856e-05, + "loss": 1.0918, + "step": 48419 + }, + { + "epoch": 0.05, + "learning_rate": 4.971244815147825e-05, + "loss": 1.2467, + "step": 48420 + }, + { + "epoch": 0.05, + "learning_rate": 4.971243627224469e-05, + "loss": 1.1351, + "step": 48421 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712424392767186e-05, + "loss": 1.2098, + "step": 48422 + }, + { + "epoch": 0.05, + "learning_rate": 4.971241251304572e-05, + "loss": 1.2855, + "step": 48423 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712400633080314e-05, + "loss": 1.5861, + "step": 48424 + }, + { + "epoch": 0.05, + "learning_rate": 4.971238875287095e-05, + "loss": 0.7613, + "step": 48425 + }, + { + "epoch": 0.05, + "learning_rate": 4.971237687241764e-05, + "loss": 0.8408, + "step": 48426 + }, + { + "epoch": 0.05, + "learning_rate": 4.971236499172038e-05, + "loss": 0.785, + "step": 48427 + }, + { + "epoch": 0.05, + "learning_rate": 4.971235311077918e-05, + "loss": 1.0255, + "step": 48428 + }, + { + "epoch": 0.05, + "learning_rate": 4.971234122959402e-05, + "loss": 0.8368, + "step": 48429 + }, + { + "epoch": 0.05, + "learning_rate": 4.971232934816491e-05, + "loss": 0.9744, + "step": 48430 + }, + { + "epoch": 0.05, + "learning_rate": 4.971231746649185e-05, + "loss": 0.9773, + "step": 48431 + }, + { + "epoch": 0.05, + "learning_rate": 4.971230558457484e-05, + "loss": 0.9586, + "step": 48432 + }, + { + "epoch": 0.05, + "learning_rate": 4.971229370241388e-05, + "loss": 1.1662, + "step": 48433 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712281820008974e-05, + "loss": 1.0403, + "step": 48434 + }, + { + "epoch": 0.05, + "learning_rate": 4.971226993736011e-05, + "loss": 0.8054, + "step": 48435 + }, + { + "epoch": 0.05, + "learning_rate": 4.971225805446731e-05, + "loss": 1.076, + "step": 48436 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712246171330554e-05, + "loss": 1.154, + "step": 48437 + }, + { + "epoch": 0.05, + "learning_rate": 4.971223428794984e-05, + "loss": 1.2392, + "step": 48438 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712222404325196e-05, + "loss": 1.0915, + "step": 48439 + }, + { + "epoch": 0.05, + "learning_rate": 4.971221052045659e-05, + "loss": 1.0251, + "step": 48440 + }, + { + "epoch": 0.05, + "learning_rate": 4.971219863634404e-05, + "loss": 1.0397, + "step": 48441 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712186751987546e-05, + "loss": 1.2179, + "step": 48442 + }, + { + "epoch": 0.05, + "learning_rate": 4.97121748673871e-05, + "loss": 0.8959, + "step": 48443 + }, + { + "epoch": 0.05, + "learning_rate": 4.97121629825427e-05, + "loss": 1.1417, + "step": 48444 + }, + { + "epoch": 0.05, + "learning_rate": 4.971215109745436e-05, + "loss": 1.0406, + "step": 48445 + }, + { + "epoch": 0.05, + "learning_rate": 4.971213921212207e-05, + "loss": 1.3462, + "step": 48446 + }, + { + "epoch": 0.05, + "learning_rate": 4.971212732654583e-05, + "loss": 0.7458, + "step": 48447 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712115440725644e-05, + "loss": 0.9853, + "step": 48448 + }, + { + "epoch": 0.05, + "learning_rate": 4.9712103554661504e-05, + "loss": 0.7882, + "step": 48449 + }, + { + "epoch": 0.05, + "learning_rate": 4.971209166835343e-05, + "loss": 0.8259, + "step": 48450 + }, + { + "epoch": 0.05, + "learning_rate": 4.97120797818014e-05, + "loss": 1.0924, + "step": 48451 + }, + { + "epoch": 0.05, + "learning_rate": 4.971206789500542e-05, + "loss": 0.9681, + "step": 48452 + }, + { + "epoch": 0.05, + "learning_rate": 4.97120560079655e-05, + "loss": 0.8508, + "step": 48453 + }, + { + "epoch": 0.05, + "learning_rate": 4.971204412068163e-05, + "loss": 1.006, + "step": 48454 + }, + { + "epoch": 0.05, + "learning_rate": 4.971203223315381e-05, + "loss": 1.1811, + "step": 48455 + }, + { + "epoch": 0.05, + "learning_rate": 4.971202034538205e-05, + "loss": 0.8637, + "step": 48456 + }, + { + "epoch": 0.05, + "learning_rate": 4.971200845736633e-05, + "loss": 1.0028, + "step": 48457 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711996569106675e-05, + "loss": 1.4697, + "step": 48458 + }, + { + "epoch": 0.05, + "learning_rate": 4.971198468060307e-05, + "loss": 1.2962, + "step": 48459 + }, + { + "epoch": 0.05, + "learning_rate": 4.971197279185552e-05, + "loss": 1.7505, + "step": 48460 + }, + { + "epoch": 0.05, + "learning_rate": 4.971196090286403e-05, + "loss": 0.9286, + "step": 48461 + }, + { + "epoch": 0.05, + "learning_rate": 4.971194901362859e-05, + "loss": 1.0945, + "step": 48462 + }, + { + "epoch": 0.05, + "learning_rate": 4.97119371241492e-05, + "loss": 0.8155, + "step": 48463 + }, + { + "epoch": 0.05, + "learning_rate": 4.971192523442586e-05, + "loss": 0.9138, + "step": 48464 + }, + { + "epoch": 0.05, + "learning_rate": 4.971191334445858e-05, + "loss": 0.9189, + "step": 48465 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711901454247354e-05, + "loss": 1.1983, + "step": 48466 + }, + { + "epoch": 0.05, + "learning_rate": 4.971188956379219e-05, + "loss": 1.3873, + "step": 48467 + }, + { + "epoch": 0.05, + "learning_rate": 4.971187767309307e-05, + "loss": 0.7155, + "step": 48468 + }, + { + "epoch": 0.05, + "learning_rate": 4.971186578215001e-05, + "loss": 0.8616, + "step": 48469 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711853890963e-05, + "loss": 0.8488, + "step": 48470 + }, + { + "epoch": 0.05, + "learning_rate": 4.971184199953205e-05, + "loss": 1.1662, + "step": 48471 + }, + { + "epoch": 0.05, + "learning_rate": 4.971183010785715e-05, + "loss": 1.3557, + "step": 48472 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711818215938314e-05, + "loss": 0.9451, + "step": 48473 + }, + { + "epoch": 0.05, + "learning_rate": 4.971180632377553e-05, + "loss": 0.8799, + "step": 48474 + }, + { + "epoch": 0.05, + "learning_rate": 4.97117944313688e-05, + "loss": 0.9581, + "step": 48475 + }, + { + "epoch": 0.05, + "learning_rate": 4.971178253871812e-05, + "loss": 0.7858, + "step": 48476 + }, + { + "epoch": 0.05, + "learning_rate": 4.971177064582351e-05, + "loss": 1.0742, + "step": 48477 + }, + { + "epoch": 0.05, + "learning_rate": 4.971175875268495e-05, + "loss": 1.1656, + "step": 48478 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711746859302436e-05, + "loss": 1.0546, + "step": 48479 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711734965675985e-05, + "loss": 0.8366, + "step": 48480 + }, + { + "epoch": 0.05, + "learning_rate": 4.971172307180559e-05, + "loss": 0.7361, + "step": 48481 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711711177691246e-05, + "loss": 0.9439, + "step": 48482 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711699283332966e-05, + "loss": 0.9002, + "step": 48483 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711687388730746e-05, + "loss": 0.8928, + "step": 48484 + }, + { + "epoch": 0.05, + "learning_rate": 4.971167549388457e-05, + "loss": 1.5427, + "step": 48485 + }, + { + "epoch": 0.05, + "learning_rate": 4.971166359879446e-05, + "loss": 1.132, + "step": 48486 + }, + { + "epoch": 0.05, + "learning_rate": 4.97116517034604e-05, + "loss": 1.4194, + "step": 48487 + }, + { + "epoch": 0.05, + "learning_rate": 4.971163980788241e-05, + "loss": 1.0049, + "step": 48488 + }, + { + "epoch": 0.05, + "learning_rate": 4.971162791206047e-05, + "loss": 1.3117, + "step": 48489 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711616015994575e-05, + "loss": 1.1272, + "step": 48490 + }, + { + "epoch": 0.05, + "learning_rate": 4.971160411968475e-05, + "loss": 1.0984, + "step": 48491 + }, + { + "epoch": 0.05, + "learning_rate": 4.971159222313098e-05, + "loss": 1.1024, + "step": 48492 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711580326333265e-05, + "loss": 0.9341, + "step": 48493 + }, + { + "epoch": 0.05, + "learning_rate": 4.971156842929162e-05, + "loss": 1.235, + "step": 48494 + }, + { + "epoch": 0.05, + "learning_rate": 4.971155653200602e-05, + "loss": 1.3937, + "step": 48495 + }, + { + "epoch": 0.05, + "learning_rate": 4.971154463447648e-05, + "loss": 1.1003, + "step": 48496 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711532736703e-05, + "loss": 0.8194, + "step": 48497 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711520838685574e-05, + "loss": 1.0338, + "step": 48498 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711508940424213e-05, + "loss": 0.8924, + "step": 48499 + }, + { + "epoch": 0.05, + "learning_rate": 4.971149704191891e-05, + "loss": 0.5674, + "step": 48500 + }, + { + "epoch": 0.05, + "eval_loss": 1.074501872062683, + "eval_runtime": 128.5455, + "eval_samples_per_second": 10.774, + "eval_steps_per_second": 5.391, + "step": 48500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711485143169655e-05, + "loss": 1.0985, + "step": 48501 + }, + { + "epoch": 0.05, + "learning_rate": 4.971147324417647e-05, + "loss": 0.8689, + "step": 48502 + }, + { + "epoch": 0.05, + "learning_rate": 4.971146134493934e-05, + "loss": 0.9656, + "step": 48503 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711449445458266e-05, + "loss": 0.8307, + "step": 48504 + }, + { + "epoch": 0.05, + "learning_rate": 4.971143754573326e-05, + "loss": 1.132, + "step": 48505 + }, + { + "epoch": 0.05, + "learning_rate": 4.971142564576431e-05, + "loss": 1.1588, + "step": 48506 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711413745551416e-05, + "loss": 1.1469, + "step": 48507 + }, + { + "epoch": 0.05, + "learning_rate": 4.971140184509457e-05, + "loss": 0.9461, + "step": 48508 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711389944393804e-05, + "loss": 0.7063, + "step": 48509 + }, + { + "epoch": 0.05, + "learning_rate": 4.971137804344908e-05, + "loss": 1.4273, + "step": 48510 + }, + { + "epoch": 0.05, + "learning_rate": 4.971136614226043e-05, + "loss": 0.9513, + "step": 48511 + }, + { + "epoch": 0.05, + "learning_rate": 4.971135424082783e-05, + "loss": 0.8435, + "step": 48512 + }, + { + "epoch": 0.05, + "learning_rate": 4.97113423391513e-05, + "loss": 0.6638, + "step": 48513 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711330437230826e-05, + "loss": 1.3537, + "step": 48514 + }, + { + "epoch": 0.05, + "learning_rate": 4.971131853506641e-05, + "loss": 1.0951, + "step": 48515 + }, + { + "epoch": 0.05, + "learning_rate": 4.971130663265805e-05, + "loss": 0.9695, + "step": 48516 + }, + { + "epoch": 0.05, + "learning_rate": 4.971129473000576e-05, + "loss": 0.8266, + "step": 48517 + }, + { + "epoch": 0.05, + "learning_rate": 4.971128282710952e-05, + "loss": 0.8944, + "step": 48518 + }, + { + "epoch": 0.05, + "learning_rate": 4.971127092396935e-05, + "loss": 1.1164, + "step": 48519 + }, + { + "epoch": 0.05, + "learning_rate": 4.971125902058524e-05, + "loss": 1.4517, + "step": 48520 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711247116957186e-05, + "loss": 1.452, + "step": 48521 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711235213085194e-05, + "loss": 1.8431, + "step": 48522 + }, + { + "epoch": 0.05, + "learning_rate": 4.971122330896927e-05, + "loss": 1.6762, + "step": 48523 + }, + { + "epoch": 0.05, + "learning_rate": 4.971121140460939e-05, + "loss": 1.3176, + "step": 48524 + }, + { + "epoch": 0.05, + "learning_rate": 4.971119950000559e-05, + "loss": 1.2963, + "step": 48525 + }, + { + "epoch": 0.05, + "learning_rate": 4.971118759515784e-05, + "loss": 0.6657, + "step": 48526 + }, + { + "epoch": 0.05, + "learning_rate": 4.971117569006616e-05, + "loss": 0.852, + "step": 48527 + }, + { + "epoch": 0.05, + "learning_rate": 4.971116378473053e-05, + "loss": 1.1301, + "step": 48528 + }, + { + "epoch": 0.05, + "learning_rate": 4.971115187915097e-05, + "loss": 1.1912, + "step": 48529 + }, + { + "epoch": 0.05, + "learning_rate": 4.971113997332747e-05, + "loss": 1.0978, + "step": 48530 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711128067260036e-05, + "loss": 0.8317, + "step": 48531 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711116160948663e-05, + "loss": 0.9525, + "step": 48532 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711104254393346e-05, + "loss": 0.8434, + "step": 48533 + }, + { + "epoch": 0.05, + "learning_rate": 4.97110923475941e-05, + "loss": 1.1512, + "step": 48534 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711080440550914e-05, + "loss": 1.1356, + "step": 48535 + }, + { + "epoch": 0.05, + "learning_rate": 4.971106853326378e-05, + "loss": 1.1633, + "step": 48536 + }, + { + "epoch": 0.05, + "learning_rate": 4.971105662573272e-05, + "loss": 0.8429, + "step": 48537 + }, + { + "epoch": 0.05, + "learning_rate": 4.971104471795772e-05, + "loss": 0.78, + "step": 48538 + }, + { + "epoch": 0.05, + "learning_rate": 4.9711032809938786e-05, + "loss": 0.9746, + "step": 48539 + }, + { + "epoch": 0.05, + "learning_rate": 4.971102090167592e-05, + "loss": 1.0974, + "step": 48540 + }, + { + "epoch": 0.05, + "learning_rate": 4.97110089931691e-05, + "loss": 0.9833, + "step": 48541 + }, + { + "epoch": 0.05, + "learning_rate": 4.971099708441836e-05, + "loss": 1.1188, + "step": 48542 + }, + { + "epoch": 0.05, + "learning_rate": 4.971098517542367e-05, + "loss": 1.1522, + "step": 48543 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710973266185055e-05, + "loss": 1.0231, + "step": 48544 + }, + { + "epoch": 0.05, + "learning_rate": 4.971096135670249e-05, + "loss": 1.1584, + "step": 48545 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710949446976006e-05, + "loss": 1.1219, + "step": 48546 + }, + { + "epoch": 0.05, + "learning_rate": 4.971093753700557e-05, + "loss": 1.1912, + "step": 48547 + }, + { + "epoch": 0.05, + "learning_rate": 4.971092562679121e-05, + "loss": 1.2502, + "step": 48548 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710913716332905e-05, + "loss": 1.046, + "step": 48549 + }, + { + "epoch": 0.05, + "learning_rate": 4.971090180563067e-05, + "loss": 1.0619, + "step": 48550 + }, + { + "epoch": 0.05, + "learning_rate": 4.97108898946845e-05, + "loss": 0.9154, + "step": 48551 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710877983494396e-05, + "loss": 0.7999, + "step": 48552 + }, + { + "epoch": 0.05, + "learning_rate": 4.971086607206035e-05, + "loss": 0.969, + "step": 48553 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710854160382374e-05, + "loss": 1.0181, + "step": 48554 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710842248460466e-05, + "loss": 1.1769, + "step": 48555 + }, + { + "epoch": 0.05, + "learning_rate": 4.971083033629461e-05, + "loss": 1.2381, + "step": 48556 + }, + { + "epoch": 0.05, + "learning_rate": 4.971081842388483e-05, + "loss": 0.8589, + "step": 48557 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710806511231114e-05, + "loss": 0.7489, + "step": 48558 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710794598333464e-05, + "loss": 0.9899, + "step": 48559 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710782685191876e-05, + "loss": 0.7666, + "step": 48560 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710770771806355e-05, + "loss": 0.7765, + "step": 48561 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710758858176896e-05, + "loss": 1.1508, + "step": 48562 + }, + { + "epoch": 0.05, + "learning_rate": 4.971074694430351e-05, + "loss": 1.4693, + "step": 48563 + }, + { + "epoch": 0.05, + "learning_rate": 4.971073503018618e-05, + "loss": 0.4549, + "step": 48564 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710723115824926e-05, + "loss": 0.1327, + "step": 48565 + }, + { + "epoch": 0.05, + "learning_rate": 4.971071120121973e-05, + "loss": 0.0451, + "step": 48566 + }, + { + "epoch": 0.05, + "learning_rate": 4.971069928637061e-05, + "loss": 0.0699, + "step": 48567 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710687371277556e-05, + "loss": 0.0417, + "step": 48568 + }, + { + "epoch": 0.05, + "learning_rate": 4.971067545594056e-05, + "loss": 0.0364, + "step": 48569 + }, + { + "epoch": 0.05, + "learning_rate": 4.971066354035963e-05, + "loss": 0.0233, + "step": 48570 + }, + { + "epoch": 0.05, + "learning_rate": 4.971065162453478e-05, + "loss": 0.0183, + "step": 48571 + }, + { + "epoch": 0.05, + "learning_rate": 4.971063970846598e-05, + "loss": 0.0286, + "step": 48572 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710627792153254e-05, + "loss": 0.0233, + "step": 48573 + }, + { + "epoch": 0.05, + "learning_rate": 4.97106158755966e-05, + "loss": 0.0251, + "step": 48574 + }, + { + "epoch": 0.05, + "learning_rate": 4.971060395879601e-05, + "loss": 0.0198, + "step": 48575 + }, + { + "epoch": 0.05, + "learning_rate": 4.971059204175148e-05, + "loss": 0.1375, + "step": 48576 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710580124463025e-05, + "loss": 0.1925, + "step": 48577 + }, + { + "epoch": 0.05, + "learning_rate": 4.971056820693064e-05, + "loss": 0.1795, + "step": 48578 + }, + { + "epoch": 0.05, + "learning_rate": 4.971055628915432e-05, + "loss": 0.1588, + "step": 48579 + }, + { + "epoch": 0.05, + "learning_rate": 4.971054437113406e-05, + "loss": 0.1162, + "step": 48580 + }, + { + "epoch": 0.05, + "learning_rate": 4.971053245286988e-05, + "loss": 0.1405, + "step": 48581 + }, + { + "epoch": 0.05, + "learning_rate": 4.971052053436176e-05, + "loss": 0.7638, + "step": 48582 + }, + { + "epoch": 0.05, + "learning_rate": 4.971050861560971e-05, + "loss": 0.9154, + "step": 48583 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710496696613726e-05, + "loss": 0.9835, + "step": 48584 + }, + { + "epoch": 0.05, + "learning_rate": 4.971048477737382e-05, + "loss": 0.9598, + "step": 48585 + }, + { + "epoch": 0.05, + "learning_rate": 4.971047285788998e-05, + "loss": 0.8985, + "step": 48586 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710460938162195e-05, + "loss": 1.1339, + "step": 48587 + }, + { + "epoch": 0.05, + "learning_rate": 4.971044901819049e-05, + "loss": 1.1102, + "step": 48588 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710437097974856e-05, + "loss": 1.0664, + "step": 48589 + }, + { + "epoch": 0.05, + "learning_rate": 4.971042517751528e-05, + "loss": 1.0093, + "step": 48590 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710413256811784e-05, + "loss": 1.6101, + "step": 48591 + }, + { + "epoch": 0.05, + "learning_rate": 4.971040133586435e-05, + "loss": 1.7868, + "step": 48592 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710389414672997e-05, + "loss": 1.4672, + "step": 48593 + }, + { + "epoch": 0.05, + "learning_rate": 4.97103774932377e-05, + "loss": 1.094, + "step": 48594 + }, + { + "epoch": 0.05, + "learning_rate": 4.971036557155848e-05, + "loss": 1.0568, + "step": 48595 + }, + { + "epoch": 0.05, + "learning_rate": 4.971035364963533e-05, + "loss": 1.2639, + "step": 48596 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710341727468246e-05, + "loss": 1.0992, + "step": 48597 + }, + { + "epoch": 0.05, + "learning_rate": 4.971032980505723e-05, + "loss": 1.0732, + "step": 48598 + }, + { + "epoch": 0.05, + "learning_rate": 4.971031788240229e-05, + "loss": 1.1305, + "step": 48599 + }, + { + "epoch": 0.05, + "learning_rate": 4.971030595950342e-05, + "loss": 0.9223, + "step": 48600 + }, + { + "epoch": 0.05, + "learning_rate": 4.971029403636062e-05, + "loss": 0.4391, + "step": 48601 + }, + { + "epoch": 0.05, + "learning_rate": 4.971028211297388e-05, + "loss": 0.4054, + "step": 48602 + }, + { + "epoch": 0.05, + "learning_rate": 4.971027018934322e-05, + "loss": 0.357, + "step": 48603 + }, + { + "epoch": 0.05, + "learning_rate": 4.971025826546863e-05, + "loss": 1.0632, + "step": 48604 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710246341350113e-05, + "loss": 1.1577, + "step": 48605 + }, + { + "epoch": 0.05, + "learning_rate": 4.971023441698767e-05, + "loss": 1.0025, + "step": 48606 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710222492381286e-05, + "loss": 1.275, + "step": 48607 + }, + { + "epoch": 0.05, + "learning_rate": 4.971021056753098e-05, + "loss": 1.2043, + "step": 48608 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710198642436745e-05, + "loss": 0.8859, + "step": 48609 + }, + { + "epoch": 0.05, + "learning_rate": 4.971018671709858e-05, + "loss": 0.9674, + "step": 48610 + }, + { + "epoch": 0.05, + "learning_rate": 4.971017479151648e-05, + "loss": 1.1341, + "step": 48611 + }, + { + "epoch": 0.05, + "learning_rate": 4.971016286569046e-05, + "loss": 0.97, + "step": 48612 + }, + { + "epoch": 0.05, + "learning_rate": 4.971015093962051e-05, + "loss": 1.211, + "step": 48613 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710139013306634e-05, + "loss": 0.8105, + "step": 48614 + }, + { + "epoch": 0.05, + "learning_rate": 4.971012708674883e-05, + "loss": 0.6126, + "step": 48615 + }, + { + "epoch": 0.05, + "learning_rate": 4.971011515994709e-05, + "loss": 0.8988, + "step": 48616 + }, + { + "epoch": 0.05, + "learning_rate": 4.971010323290143e-05, + "loss": 1.0996, + "step": 48617 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710091305611845e-05, + "loss": 0.9781, + "step": 48618 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710079378078326e-05, + "loss": 1.0819, + "step": 48619 + }, + { + "epoch": 0.05, + "learning_rate": 4.971006745030088e-05, + "loss": 0.9925, + "step": 48620 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710055522279506e-05, + "loss": 0.7779, + "step": 48621 + }, + { + "epoch": 0.05, + "learning_rate": 4.971004359401421e-05, + "loss": 0.5821, + "step": 48622 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710031665504986e-05, + "loss": 1.2952, + "step": 48623 + }, + { + "epoch": 0.05, + "learning_rate": 4.9710019736751835e-05, + "loss": 0.9787, + "step": 48624 + }, + { + "epoch": 0.05, + "learning_rate": 4.971000780775476e-05, + "loss": 1.0996, + "step": 48625 + }, + { + "epoch": 0.05, + "learning_rate": 4.970999587851374e-05, + "loss": 1.2526, + "step": 48626 + }, + { + "epoch": 0.05, + "learning_rate": 4.970998394902881e-05, + "loss": 1.1917, + "step": 48627 + }, + { + "epoch": 0.05, + "learning_rate": 4.970997201929995e-05, + "loss": 1.0371, + "step": 48628 + }, + { + "epoch": 0.05, + "learning_rate": 4.970996008932716e-05, + "loss": 0.8176, + "step": 48629 + }, + { + "epoch": 0.05, + "learning_rate": 4.970994815911045e-05, + "loss": 0.7958, + "step": 48630 + }, + { + "epoch": 0.05, + "learning_rate": 4.970993622864981e-05, + "loss": 1.0039, + "step": 48631 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709924297945244e-05, + "loss": 0.9131, + "step": 48632 + }, + { + "epoch": 0.05, + "learning_rate": 4.970991236699676e-05, + "loss": 0.6566, + "step": 48633 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709900435804336e-05, + "loss": 0.9387, + "step": 48634 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709888504367995e-05, + "loss": 1.227, + "step": 48635 + }, + { + "epoch": 0.05, + "learning_rate": 4.970987657268772e-05, + "loss": 1.2886, + "step": 48636 + }, + { + "epoch": 0.05, + "learning_rate": 4.970986464076353e-05, + "loss": 1.094, + "step": 48637 + }, + { + "epoch": 0.05, + "learning_rate": 4.970985270859541e-05, + "loss": 1.1316, + "step": 48638 + }, + { + "epoch": 0.05, + "learning_rate": 4.970984077618337e-05, + "loss": 0.9193, + "step": 48639 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709828843527396e-05, + "loss": 1.2476, + "step": 48640 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709816910627503e-05, + "loss": 1.0932, + "step": 48641 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709804977483686e-05, + "loss": 0.7058, + "step": 48642 + }, + { + "epoch": 0.05, + "learning_rate": 4.970979304409594e-05, + "loss": 0.6703, + "step": 48643 + }, + { + "epoch": 0.05, + "learning_rate": 4.970978111046427e-05, + "loss": 0.7303, + "step": 48644 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709769176588675e-05, + "loss": 1.0653, + "step": 48645 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709757242469156e-05, + "loss": 1.1113, + "step": 48646 + }, + { + "epoch": 0.05, + "learning_rate": 4.970974530810571e-05, + "loss": 1.2638, + "step": 48647 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709733373498344e-05, + "loss": 1.0748, + "step": 48648 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709721438647064e-05, + "loss": 0.7166, + "step": 48649 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709709503551845e-05, + "loss": 1.259, + "step": 48650 + }, + { + "epoch": 0.05, + "learning_rate": 4.97096975682127e-05, + "loss": 1.3125, + "step": 48651 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709685632629645e-05, + "loss": 0.7832, + "step": 48652 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709673696802664e-05, + "loss": 0.8633, + "step": 48653 + }, + { + "epoch": 0.05, + "learning_rate": 4.970966176073175e-05, + "loss": 0.871, + "step": 48654 + }, + { + "epoch": 0.05, + "learning_rate": 4.970964982441691e-05, + "loss": 0.9248, + "step": 48655 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709637887858164e-05, + "loss": 1.0523, + "step": 48656 + }, + { + "epoch": 0.05, + "learning_rate": 4.970962595105548e-05, + "loss": 0.9449, + "step": 48657 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709614014008875e-05, + "loss": 0.8806, + "step": 48658 + }, + { + "epoch": 0.05, + "learning_rate": 4.970960207671835e-05, + "loss": 0.9649, + "step": 48659 + }, + { + "epoch": 0.05, + "learning_rate": 4.970959013918391e-05, + "loss": 0.7673, + "step": 48660 + }, + { + "epoch": 0.05, + "learning_rate": 4.970957820140554e-05, + "loss": 1.0282, + "step": 48661 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709566263383244e-05, + "loss": 0.6295, + "step": 48662 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709554325117026e-05, + "loss": 1.0447, + "step": 48663 + }, + { + "epoch": 0.05, + "learning_rate": 4.970954238660689e-05, + "loss": 1.1168, + "step": 48664 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709530447852826e-05, + "loss": 1.0573, + "step": 48665 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709518508854845e-05, + "loss": 1.2017, + "step": 48666 + }, + { + "epoch": 0.05, + "learning_rate": 4.970950656961294e-05, + "loss": 1.2446, + "step": 48667 + }, + { + "epoch": 0.05, + "learning_rate": 4.970949463012712e-05, + "loss": 1.0422, + "step": 48668 + }, + { + "epoch": 0.05, + "learning_rate": 4.970948269039737e-05, + "loss": 1.1188, + "step": 48669 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709470750423703e-05, + "loss": 1.1279, + "step": 48670 + }, + { + "epoch": 0.05, + "learning_rate": 4.970945881020611e-05, + "loss": 1.0411, + "step": 48671 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709446869744606e-05, + "loss": 0.8527, + "step": 48672 + }, + { + "epoch": 0.05, + "learning_rate": 4.970943492903917e-05, + "loss": 1.1378, + "step": 48673 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709422988089814e-05, + "loss": 1.1491, + "step": 48674 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709411046896534e-05, + "loss": 1.5815, + "step": 48675 + }, + { + "epoch": 0.05, + "learning_rate": 4.970939910545934e-05, + "loss": 1.6658, + "step": 48676 + }, + { + "epoch": 0.05, + "learning_rate": 4.970938716377822e-05, + "loss": 1.2894, + "step": 48677 + }, + { + "epoch": 0.05, + "learning_rate": 4.970937522185318e-05, + "loss": 0.9253, + "step": 48678 + }, + { + "epoch": 0.05, + "learning_rate": 4.970936327968423e-05, + "loss": 1.0955, + "step": 48679 + }, + { + "epoch": 0.05, + "learning_rate": 4.970935133727135e-05, + "loss": 1.1455, + "step": 48680 + }, + { + "epoch": 0.05, + "learning_rate": 4.970933939461455e-05, + "loss": 1.0009, + "step": 48681 + }, + { + "epoch": 0.05, + "learning_rate": 4.970932745171383e-05, + "loss": 1.4958, + "step": 48682 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709315508569196e-05, + "loss": 1.2797, + "step": 48683 + }, + { + "epoch": 0.05, + "learning_rate": 4.970930356518063e-05, + "loss": 1.2096, + "step": 48684 + }, + { + "epoch": 0.05, + "learning_rate": 4.970929162154815e-05, + "loss": 0.948, + "step": 48685 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709279677671757e-05, + "loss": 0.9585, + "step": 48686 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709267733551435e-05, + "loss": 1.1379, + "step": 48687 + }, + { + "epoch": 0.05, + "learning_rate": 4.97092557891872e-05, + "loss": 0.9783, + "step": 48688 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709243844579045e-05, + "loss": 0.9667, + "step": 48689 + }, + { + "epoch": 0.05, + "learning_rate": 4.970923189972697e-05, + "loss": 0.9763, + "step": 48690 + }, + { + "epoch": 0.05, + "learning_rate": 4.970921995463097e-05, + "loss": 0.489, + "step": 48691 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709208009291054e-05, + "loss": 1.238, + "step": 48692 + }, + { + "epoch": 0.05, + "learning_rate": 4.970919606370722e-05, + "loss": 0.956, + "step": 48693 + }, + { + "epoch": 0.05, + "learning_rate": 4.970918411787947e-05, + "loss": 1.1327, + "step": 48694 + }, + { + "epoch": 0.05, + "learning_rate": 4.97091721718078e-05, + "loss": 1.1954, + "step": 48695 + }, + { + "epoch": 0.05, + "learning_rate": 4.970916022549221e-05, + "loss": 0.5737, + "step": 48696 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709148278932706e-05, + "loss": 0.1381, + "step": 48697 + }, + { + "epoch": 0.05, + "learning_rate": 4.970913633212928e-05, + "loss": 0.3937, + "step": 48698 + }, + { + "epoch": 0.05, + "learning_rate": 4.970912438508193e-05, + "loss": 0.7505, + "step": 48699 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709112437790674e-05, + "loss": 0.7276, + "step": 48700 + }, + { + "epoch": 0.05, + "learning_rate": 4.970910049025549e-05, + "loss": 1.1574, + "step": 48701 + }, + { + "epoch": 0.05, + "learning_rate": 4.970908854247639e-05, + "loss": 0.8436, + "step": 48702 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709076594453376e-05, + "loss": 0.6668, + "step": 48703 + }, + { + "epoch": 0.05, + "learning_rate": 4.970906464618644e-05, + "loss": 0.5546, + "step": 48704 + }, + { + "epoch": 0.05, + "learning_rate": 4.97090526976756e-05, + "loss": 0.7311, + "step": 48705 + }, + { + "epoch": 0.05, + "learning_rate": 4.970904074892082e-05, + "loss": 0.8949, + "step": 48706 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709028799922145e-05, + "loss": 0.5243, + "step": 48707 + }, + { + "epoch": 0.05, + "learning_rate": 4.970901685067954e-05, + "loss": 0.4313, + "step": 48708 + }, + { + "epoch": 0.05, + "learning_rate": 4.970900490119302e-05, + "loss": 0.6317, + "step": 48709 + }, + { + "epoch": 0.05, + "learning_rate": 4.970899295146258e-05, + "loss": 0.9806, + "step": 48710 + }, + { + "epoch": 0.05, + "learning_rate": 4.970898100148823e-05, + "loss": 1.0431, + "step": 48711 + }, + { + "epoch": 0.05, + "learning_rate": 4.970896905126996e-05, + "loss": 1.0497, + "step": 48712 + }, + { + "epoch": 0.05, + "learning_rate": 4.970895710080777e-05, + "loss": 0.8684, + "step": 48713 + }, + { + "epoch": 0.05, + "learning_rate": 4.970894515010167e-05, + "loss": 0.4871, + "step": 48714 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708933199151654e-05, + "loss": 1.1874, + "step": 48715 + }, + { + "epoch": 0.05, + "learning_rate": 4.970892124795771e-05, + "loss": 1.2583, + "step": 48716 + }, + { + "epoch": 0.05, + "learning_rate": 4.970890929651986e-05, + "loss": 0.833, + "step": 48717 + }, + { + "epoch": 0.05, + "learning_rate": 4.97088973448381e-05, + "loss": 0.5428, + "step": 48718 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708885392912414e-05, + "loss": 1.0427, + "step": 48719 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708873440742814e-05, + "loss": 1.1316, + "step": 48720 + }, + { + "epoch": 0.05, + "learning_rate": 4.97088614883293e-05, + "loss": 0.6187, + "step": 48721 + }, + { + "epoch": 0.05, + "learning_rate": 4.970884953567187e-05, + "loss": 0.8796, + "step": 48722 + }, + { + "epoch": 0.05, + "learning_rate": 4.970883758277052e-05, + "loss": 0.8132, + "step": 48723 + }, + { + "epoch": 0.05, + "learning_rate": 4.970882562962527e-05, + "loss": 1.3017, + "step": 48724 + }, + { + "epoch": 0.05, + "learning_rate": 4.970881367623609e-05, + "loss": 1.2895, + "step": 48725 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708801722603e-05, + "loss": 0.7943, + "step": 48726 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708789768725995e-05, + "loss": 1.2766, + "step": 48727 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708777814605075e-05, + "loss": 0.8321, + "step": 48728 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708765860240236e-05, + "loss": 1.4865, + "step": 48729 + }, + { + "epoch": 0.05, + "learning_rate": 4.970875390563149e-05, + "loss": 1.3065, + "step": 48730 + }, + { + "epoch": 0.05, + "learning_rate": 4.970874195077882e-05, + "loss": 1.3997, + "step": 48731 + }, + { + "epoch": 0.05, + "learning_rate": 4.970872999568225e-05, + "loss": 1.0683, + "step": 48732 + }, + { + "epoch": 0.05, + "learning_rate": 4.970871804034175e-05, + "loss": 1.2086, + "step": 48733 + }, + { + "epoch": 0.05, + "learning_rate": 4.970870608475735e-05, + "loss": 1.2193, + "step": 48734 + }, + { + "epoch": 0.05, + "learning_rate": 4.970869412892903e-05, + "loss": 1.5248, + "step": 48735 + }, + { + "epoch": 0.05, + "learning_rate": 4.970868217285679e-05, + "loss": 1.1836, + "step": 48736 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708670216540644e-05, + "loss": 1.3239, + "step": 48737 + }, + { + "epoch": 0.05, + "learning_rate": 4.970865825998059e-05, + "loss": 1.0486, + "step": 48738 + }, + { + "epoch": 0.05, + "learning_rate": 4.970864630317661e-05, + "loss": 1.0706, + "step": 48739 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708634346128725e-05, + "loss": 1.3415, + "step": 48740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708622388836926e-05, + "loss": 0.5004, + "step": 48741 + }, + { + "epoch": 0.05, + "learning_rate": 4.970861043130121e-05, + "loss": 1.0752, + "step": 48742 + }, + { + "epoch": 0.05, + "learning_rate": 4.970859847352158e-05, + "loss": 0.5179, + "step": 48743 + }, + { + "epoch": 0.05, + "learning_rate": 4.970858651549803e-05, + "loss": 0.3393, + "step": 48744 + }, + { + "epoch": 0.05, + "learning_rate": 4.970857455723059e-05, + "loss": 1.1981, + "step": 48745 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708562598719225e-05, + "loss": 1.1075, + "step": 48746 + }, + { + "epoch": 0.05, + "learning_rate": 4.970855063996394e-05, + "loss": 0.6883, + "step": 48747 + }, + { + "epoch": 0.05, + "learning_rate": 4.970853868096475e-05, + "loss": 0.81, + "step": 48748 + }, + { + "epoch": 0.05, + "learning_rate": 4.970852672172165e-05, + "loss": 1.3649, + "step": 48749 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708514762234634e-05, + "loss": 1.1171, + "step": 48750 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708502802503706e-05, + "loss": 0.9786, + "step": 48751 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708490842528866e-05, + "loss": 1.0468, + "step": 48752 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708478882310114e-05, + "loss": 0.9997, + "step": 48753 + }, + { + "epoch": 0.05, + "learning_rate": 4.970846692184745e-05, + "loss": 1.0619, + "step": 48754 + }, + { + "epoch": 0.05, + "learning_rate": 4.970845496114088e-05, + "loss": 0.9817, + "step": 48755 + }, + { + "epoch": 0.05, + "learning_rate": 4.97084430001904e-05, + "loss": 1.0637, + "step": 48756 + }, + { + "epoch": 0.05, + "learning_rate": 4.970843103899599e-05, + "loss": 0.9284, + "step": 48757 + }, + { + "epoch": 0.05, + "learning_rate": 4.970841907755769e-05, + "loss": 0.8128, + "step": 48758 + }, + { + "epoch": 0.05, + "learning_rate": 4.970840711587547e-05, + "loss": 0.8734, + "step": 48759 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708395153949336e-05, + "loss": 1.0599, + "step": 48760 + }, + { + "epoch": 0.05, + "learning_rate": 4.97083831917793e-05, + "loss": 0.5982, + "step": 48761 + }, + { + "epoch": 0.05, + "learning_rate": 4.970837122936535e-05, + "loss": 0.8138, + "step": 48762 + }, + { + "epoch": 0.05, + "learning_rate": 4.970835926670748e-05, + "loss": 0.9829, + "step": 48763 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708347303805715e-05, + "loss": 1.115, + "step": 48764 + }, + { + "epoch": 0.05, + "learning_rate": 4.970833534066003e-05, + "loss": 1.0961, + "step": 48765 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708323377270436e-05, + "loss": 1.0431, + "step": 48766 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708311413636935e-05, + "loss": 1.1264, + "step": 48767 + }, + { + "epoch": 0.05, + "learning_rate": 4.970829944975952e-05, + "loss": 0.6619, + "step": 48768 + }, + { + "epoch": 0.05, + "learning_rate": 4.970828748563819e-05, + "loss": 0.8765, + "step": 48769 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708275521272964e-05, + "loss": 1.4363, + "step": 48770 + }, + { + "epoch": 0.05, + "learning_rate": 4.970826355666382e-05, + "loss": 1.4191, + "step": 48771 + }, + { + "epoch": 0.05, + "learning_rate": 4.970825159181076e-05, + "loss": 1.1133, + "step": 48772 + }, + { + "epoch": 0.05, + "learning_rate": 4.97082396267138e-05, + "loss": 1.12, + "step": 48773 + }, + { + "epoch": 0.05, + "learning_rate": 4.970822766137293e-05, + "loss": 1.4987, + "step": 48774 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708215695788154e-05, + "loss": 1.0512, + "step": 48775 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708203729959456e-05, + "loss": 0.817, + "step": 48776 + }, + { + "epoch": 0.05, + "learning_rate": 4.970819176388686e-05, + "loss": 1.2466, + "step": 48777 + }, + { + "epoch": 0.05, + "learning_rate": 4.970817979757035e-05, + "loss": 1.1203, + "step": 48778 + }, + { + "epoch": 0.05, + "learning_rate": 4.970816783100993e-05, + "loss": 0.566, + "step": 48779 + }, + { + "epoch": 0.05, + "learning_rate": 4.970815586420561e-05, + "loss": 0.8466, + "step": 48780 + }, + { + "epoch": 0.05, + "learning_rate": 4.970814389715738e-05, + "loss": 1.1126, + "step": 48781 + }, + { + "epoch": 0.05, + "learning_rate": 4.970813192986524e-05, + "loss": 1.3535, + "step": 48782 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708119962329186e-05, + "loss": 1.221, + "step": 48783 + }, + { + "epoch": 0.05, + "learning_rate": 4.970810799454922e-05, + "loss": 0.9901, + "step": 48784 + }, + { + "epoch": 0.05, + "learning_rate": 4.970809602652537e-05, + "loss": 1.0588, + "step": 48785 + }, + { + "epoch": 0.05, + "learning_rate": 4.970808405825759e-05, + "loss": 0.6668, + "step": 48786 + }, + { + "epoch": 0.05, + "learning_rate": 4.970807208974591e-05, + "loss": 0.9854, + "step": 48787 + }, + { + "epoch": 0.05, + "learning_rate": 4.970806012099032e-05, + "loss": 1.1171, + "step": 48788 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708048151990817e-05, + "loss": 1.3048, + "step": 48789 + }, + { + "epoch": 0.05, + "learning_rate": 4.970803618274742e-05, + "loss": 1.1632, + "step": 48790 + }, + { + "epoch": 0.05, + "learning_rate": 4.97080242132601e-05, + "loss": 0.8332, + "step": 48791 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708012243528876e-05, + "loss": 0.9156, + "step": 48792 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708000273553755e-05, + "loss": 1.4457, + "step": 48793 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707988303334716e-05, + "loss": 1.0472, + "step": 48794 + }, + { + "epoch": 0.05, + "learning_rate": 4.970797633287178e-05, + "loss": 1.1135, + "step": 48795 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707964362164936e-05, + "loss": 0.9879, + "step": 48796 + }, + { + "epoch": 0.05, + "learning_rate": 4.970795239121418e-05, + "loss": 1.0649, + "step": 48797 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707940420019523e-05, + "loss": 1.1258, + "step": 48798 + }, + { + "epoch": 0.05, + "learning_rate": 4.970792844858095e-05, + "loss": 0.8285, + "step": 48799 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707916476898485e-05, + "loss": 1.0021, + "step": 48800 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707904504972105e-05, + "loss": 1.158, + "step": 48801 + }, + { + "epoch": 0.05, + "learning_rate": 4.970789253280182e-05, + "loss": 1.0914, + "step": 48802 + }, + { + "epoch": 0.05, + "learning_rate": 4.970788056038763e-05, + "loss": 0.9984, + "step": 48803 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707868587729536e-05, + "loss": 1.186, + "step": 48804 + }, + { + "epoch": 0.05, + "learning_rate": 4.970785661482753e-05, + "loss": 1.0367, + "step": 48805 + }, + { + "epoch": 0.05, + "learning_rate": 4.970784464168162e-05, + "loss": 1.3083, + "step": 48806 + }, + { + "epoch": 0.05, + "learning_rate": 4.970783266829181e-05, + "loss": 1.0801, + "step": 48807 + }, + { + "epoch": 0.05, + "learning_rate": 4.970782069465809e-05, + "loss": 0.8859, + "step": 48808 + }, + { + "epoch": 0.05, + "learning_rate": 4.970780872078047e-05, + "loss": 1.4637, + "step": 48809 + }, + { + "epoch": 0.05, + "learning_rate": 4.970779674665894e-05, + "loss": 0.9626, + "step": 48810 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707784772293506e-05, + "loss": 1.6242, + "step": 48811 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707772797684166e-05, + "loss": 1.1582, + "step": 48812 + }, + { + "epoch": 0.05, + "learning_rate": 4.970776082283092e-05, + "loss": 0.7457, + "step": 48813 + }, + { + "epoch": 0.05, + "learning_rate": 4.970774884773377e-05, + "loss": 0.7334, + "step": 48814 + }, + { + "epoch": 0.05, + "learning_rate": 4.970773687239272e-05, + "loss": 0.8774, + "step": 48815 + }, + { + "epoch": 0.05, + "learning_rate": 4.970772489680776e-05, + "loss": 0.6984, + "step": 48816 + }, + { + "epoch": 0.05, + "learning_rate": 4.970771292097891e-05, + "loss": 1.2685, + "step": 48817 + }, + { + "epoch": 0.05, + "learning_rate": 4.970770094490614e-05, + "loss": 0.7042, + "step": 48818 + }, + { + "epoch": 0.05, + "learning_rate": 4.970768896858947e-05, + "loss": 0.6971, + "step": 48819 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707676992028894e-05, + "loss": 1.3597, + "step": 48820 + }, + { + "epoch": 0.05, + "learning_rate": 4.970766501522442e-05, + "loss": 1.0832, + "step": 48821 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707653038176036e-05, + "loss": 0.8112, + "step": 48822 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707641060883756e-05, + "loss": 1.2639, + "step": 48823 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707629083347565e-05, + "loss": 0.9796, + "step": 48824 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707617105567476e-05, + "loss": 0.9362, + "step": 48825 + }, + { + "epoch": 0.05, + "learning_rate": 4.970760512754348e-05, + "loss": 0.8757, + "step": 48826 + }, + { + "epoch": 0.05, + "learning_rate": 4.970759314927559e-05, + "loss": 0.9655, + "step": 48827 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707581170763787e-05, + "loss": 0.8697, + "step": 48828 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707569192008085e-05, + "loss": 1.475, + "step": 48829 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707557213008485e-05, + "loss": 0.844, + "step": 48830 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707545233764974e-05, + "loss": 0.9929, + "step": 48831 + }, + { + "epoch": 0.05, + "learning_rate": 4.970753325427756e-05, + "loss": 0.7935, + "step": 48832 + }, + { + "epoch": 0.05, + "learning_rate": 4.970752127454625e-05, + "loss": 1.0025, + "step": 48833 + }, + { + "epoch": 0.05, + "learning_rate": 4.970750929457103e-05, + "loss": 1.1664, + "step": 48834 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707497314351914e-05, + "loss": 1.6204, + "step": 48835 + }, + { + "epoch": 0.05, + "learning_rate": 4.970748533388889e-05, + "loss": 1.2172, + "step": 48836 + }, + { + "epoch": 0.05, + "learning_rate": 4.970747335318198e-05, + "loss": 0.9492, + "step": 48837 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707461372231154e-05, + "loss": 0.6088, + "step": 48838 + }, + { + "epoch": 0.05, + "learning_rate": 4.970744939103643e-05, + "loss": 0.7855, + "step": 48839 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707437409597804e-05, + "loss": 0.816, + "step": 48840 + }, + { + "epoch": 0.05, + "learning_rate": 4.970742542791528e-05, + "loss": 0.8971, + "step": 48841 + }, + { + "epoch": 0.05, + "learning_rate": 4.970741344598885e-05, + "loss": 1.0335, + "step": 48842 + }, + { + "epoch": 0.05, + "learning_rate": 4.970740146381851e-05, + "loss": 1.0359, + "step": 48843 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707389481404285e-05, + "loss": 1.3481, + "step": 48844 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707377498746154e-05, + "loss": 1.235, + "step": 48845 + }, + { + "epoch": 0.05, + "learning_rate": 4.970736551584412e-05, + "loss": 0.8103, + "step": 48846 + }, + { + "epoch": 0.05, + "learning_rate": 4.970735353269819e-05, + "loss": 1.2373, + "step": 48847 + }, + { + "epoch": 0.05, + "learning_rate": 4.970734154930835e-05, + "loss": 1.4778, + "step": 48848 + }, + { + "epoch": 0.05, + "learning_rate": 4.970732956567462e-05, + "loss": 0.859, + "step": 48849 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707317581796985e-05, + "loss": 0.9391, + "step": 48850 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707305597675444e-05, + "loss": 0.5783, + "step": 48851 + }, + { + "epoch": 0.05, + "learning_rate": 4.970729361331001e-05, + "loss": 0.6488, + "step": 48852 + }, + { + "epoch": 0.05, + "learning_rate": 4.970728162870068e-05, + "loss": 0.7163, + "step": 48853 + }, + { + "epoch": 0.05, + "learning_rate": 4.970726964384744e-05, + "loss": 0.6772, + "step": 48854 + }, + { + "epoch": 0.05, + "learning_rate": 4.97072576587503e-05, + "loss": 0.8496, + "step": 48855 + }, + { + "epoch": 0.05, + "learning_rate": 4.970724567340927e-05, + "loss": 1.1143, + "step": 48856 + }, + { + "epoch": 0.05, + "learning_rate": 4.970723368782434e-05, + "loss": 0.874, + "step": 48857 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707221701995504e-05, + "loss": 0.8586, + "step": 48858 + }, + { + "epoch": 0.05, + "learning_rate": 4.970720971592277e-05, + "loss": 1.1579, + "step": 48859 + }, + { + "epoch": 0.05, + "learning_rate": 4.970719772960614e-05, + "loss": 1.1395, + "step": 48860 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707185743045616e-05, + "loss": 0.9035, + "step": 48861 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707173756241177e-05, + "loss": 0.8315, + "step": 48862 + }, + { + "epoch": 0.05, + "learning_rate": 4.970716176919285e-05, + "loss": 0.9866, + "step": 48863 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707149781900624e-05, + "loss": 1.2194, + "step": 48864 + }, + { + "epoch": 0.05, + "learning_rate": 4.97071377943645e-05, + "loss": 0.8548, + "step": 48865 + }, + { + "epoch": 0.05, + "learning_rate": 4.970712580658447e-05, + "loss": 1.0399, + "step": 48866 + }, + { + "epoch": 0.05, + "learning_rate": 4.970711381856055e-05, + "loss": 0.8203, + "step": 48867 + }, + { + "epoch": 0.05, + "learning_rate": 4.970710183029274e-05, + "loss": 0.906, + "step": 48868 + }, + { + "epoch": 0.05, + "learning_rate": 4.970708984178101e-05, + "loss": 1.022, + "step": 48869 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707077853025395e-05, + "loss": 0.8754, + "step": 48870 + }, + { + "epoch": 0.05, + "learning_rate": 4.970706586402588e-05, + "loss": 1.0018, + "step": 48871 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707053874782474e-05, + "loss": 0.9014, + "step": 48872 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707041885295156e-05, + "loss": 3.5718, + "step": 48873 + }, + { + "epoch": 0.05, + "learning_rate": 4.970702989556395e-05, + "loss": 1.6903, + "step": 48874 + }, + { + "epoch": 0.05, + "learning_rate": 4.970701790558885e-05, + "loss": 1.1004, + "step": 48875 + }, + { + "epoch": 0.05, + "learning_rate": 4.970700591536984e-05, + "loss": 0.8495, + "step": 48876 + }, + { + "epoch": 0.05, + "learning_rate": 4.970699392490694e-05, + "loss": 1.0099, + "step": 48877 + }, + { + "epoch": 0.05, + "learning_rate": 4.970698193420015e-05, + "loss": 1.324, + "step": 48878 + }, + { + "epoch": 0.05, + "learning_rate": 4.970696994324945e-05, + "loss": 0.99, + "step": 48879 + }, + { + "epoch": 0.05, + "learning_rate": 4.970695795205487e-05, + "loss": 1.4168, + "step": 48880 + }, + { + "epoch": 0.05, + "learning_rate": 4.970694596061638e-05, + "loss": 1.1406, + "step": 48881 + }, + { + "epoch": 0.05, + "learning_rate": 4.970693396893399e-05, + "loss": 1.4104, + "step": 48882 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706921977007714e-05, + "loss": 2.2266, + "step": 48883 + }, + { + "epoch": 0.05, + "learning_rate": 4.970690998483754e-05, + "loss": 0.9847, + "step": 48884 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706897992423464e-05, + "loss": 1.1163, + "step": 48885 + }, + { + "epoch": 0.05, + "learning_rate": 4.97068859997655e-05, + "loss": 1.1095, + "step": 48886 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706874006863636e-05, + "loss": 0.4774, + "step": 48887 + }, + { + "epoch": 0.05, + "learning_rate": 4.970686201371787e-05, + "loss": 0.9867, + "step": 48888 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706850020328215e-05, + "loss": 1.073, + "step": 48889 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706838026694664e-05, + "loss": 1.064, + "step": 48890 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706826032817215e-05, + "loss": 1.0941, + "step": 48891 + }, + { + "epoch": 0.05, + "learning_rate": 4.970681403869587e-05, + "loss": 1.0615, + "step": 48892 + }, + { + "epoch": 0.05, + "learning_rate": 4.970680204433064e-05, + "loss": 1.3993, + "step": 48893 + }, + { + "epoch": 0.05, + "learning_rate": 4.97067900497215e-05, + "loss": 1.0149, + "step": 48894 + }, + { + "epoch": 0.05, + "learning_rate": 4.970677805486847e-05, + "loss": 0.9792, + "step": 48895 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706766059771554e-05, + "loss": 0.8642, + "step": 48896 + }, + { + "epoch": 0.05, + "learning_rate": 4.970675406443074e-05, + "loss": 1.4902, + "step": 48897 + }, + { + "epoch": 0.05, + "learning_rate": 4.970674206884602e-05, + "loss": 0.9006, + "step": 48898 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706730073017416e-05, + "loss": 0.4831, + "step": 48899 + }, + { + "epoch": 0.05, + "learning_rate": 4.970671807694491e-05, + "loss": 0.2548, + "step": 48900 + }, + { + "epoch": 0.05, + "learning_rate": 4.970670608062852e-05, + "loss": 0.7109, + "step": 48901 + }, + { + "epoch": 0.05, + "learning_rate": 4.970669408406823e-05, + "loss": 1.0759, + "step": 48902 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706682087264045e-05, + "loss": 1.8798, + "step": 48903 + }, + { + "epoch": 0.05, + "learning_rate": 4.970667009021597e-05, + "loss": 1.3365, + "step": 48904 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706658092923995e-05, + "loss": 1.3408, + "step": 48905 + }, + { + "epoch": 0.05, + "learning_rate": 4.970664609538813e-05, + "loss": 1.0033, + "step": 48906 + }, + { + "epoch": 0.05, + "learning_rate": 4.970663409760837e-05, + "loss": 0.8405, + "step": 48907 + }, + { + "epoch": 0.05, + "learning_rate": 4.970662209958472e-05, + "loss": 0.8386, + "step": 48908 + }, + { + "epoch": 0.05, + "learning_rate": 4.970661010131717e-05, + "loss": 1.0547, + "step": 48909 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706598102805735e-05, + "loss": 0.8196, + "step": 48910 + }, + { + "epoch": 0.05, + "learning_rate": 4.97065861040504e-05, + "loss": 1.0369, + "step": 48911 + }, + { + "epoch": 0.05, + "learning_rate": 4.970657410505117e-05, + "loss": 1.2297, + "step": 48912 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706562105808055e-05, + "loss": 0.8946, + "step": 48913 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706550106321045e-05, + "loss": 0.7122, + "step": 48914 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706538106590145e-05, + "loss": 0.7324, + "step": 48915 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706526106615346e-05, + "loss": 1.0593, + "step": 48916 + }, + { + "epoch": 0.05, + "learning_rate": 4.970651410639666e-05, + "loss": 1.4309, + "step": 48917 + }, + { + "epoch": 0.05, + "learning_rate": 4.970650210593408e-05, + "loss": 1.1627, + "step": 48918 + }, + { + "epoch": 0.05, + "learning_rate": 4.970649010522761e-05, + "loss": 0.503, + "step": 48919 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706478104277246e-05, + "loss": 0.7023, + "step": 48920 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706466103082984e-05, + "loss": 0.9769, + "step": 48921 + }, + { + "epoch": 0.05, + "learning_rate": 4.970645410164484e-05, + "loss": 0.8109, + "step": 48922 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706442099962794e-05, + "loss": 0.8526, + "step": 48923 + }, + { + "epoch": 0.05, + "learning_rate": 4.970643009803686e-05, + "loss": 0.613, + "step": 48924 + }, + { + "epoch": 0.05, + "learning_rate": 4.970641809586704e-05, + "loss": 0.999, + "step": 48925 + }, + { + "epoch": 0.05, + "learning_rate": 4.970640609345333e-05, + "loss": 1.5394, + "step": 48926 + }, + { + "epoch": 0.05, + "learning_rate": 4.970639409079572e-05, + "loss": 1.5779, + "step": 48927 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706382087894223e-05, + "loss": 1.1776, + "step": 48928 + }, + { + "epoch": 0.05, + "learning_rate": 4.970637008474883e-05, + "loss": 1.3234, + "step": 48929 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706358081359555e-05, + "loss": 1.0539, + "step": 48930 + }, + { + "epoch": 0.05, + "learning_rate": 4.970634607772639e-05, + "loss": 1.0945, + "step": 48931 + }, + { + "epoch": 0.05, + "learning_rate": 4.970633407384933e-05, + "loss": 1.0724, + "step": 48932 + }, + { + "epoch": 0.05, + "learning_rate": 4.970632206972837e-05, + "loss": 0.4494, + "step": 48933 + }, + { + "epoch": 0.05, + "learning_rate": 4.970631006536353e-05, + "loss": 0.09, + "step": 48934 + }, + { + "epoch": 0.05, + "learning_rate": 4.97062980607548e-05, + "loss": 0.0514, + "step": 48935 + }, + { + "epoch": 0.05, + "learning_rate": 4.970628605590218e-05, + "loss": 0.3605, + "step": 48936 + }, + { + "epoch": 0.05, + "learning_rate": 4.970627405080567e-05, + "loss": 1.3806, + "step": 48937 + }, + { + "epoch": 0.05, + "learning_rate": 4.970626204546527e-05, + "loss": 1.1691, + "step": 48938 + }, + { + "epoch": 0.05, + "learning_rate": 4.970625003988097e-05, + "loss": 0.7802, + "step": 48939 + }, + { + "epoch": 0.05, + "learning_rate": 4.970623803405279e-05, + "loss": 0.8985, + "step": 48940 + }, + { + "epoch": 0.05, + "learning_rate": 4.970622602798072e-05, + "loss": 0.4886, + "step": 48941 + }, + { + "epoch": 0.05, + "learning_rate": 4.970621402166476e-05, + "loss": 1.2462, + "step": 48942 + }, + { + "epoch": 0.05, + "learning_rate": 4.970620201510491e-05, + "loss": 1.3443, + "step": 48943 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706190008301175e-05, + "loss": 0.9377, + "step": 48944 + }, + { + "epoch": 0.05, + "learning_rate": 4.970617800125354e-05, + "loss": 0.8466, + "step": 48945 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706165993962026e-05, + "loss": 0.8978, + "step": 48946 + }, + { + "epoch": 0.05, + "learning_rate": 4.970615398642662e-05, + "loss": 0.903, + "step": 48947 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706141978647326e-05, + "loss": 1.0849, + "step": 48948 + }, + { + "epoch": 0.05, + "learning_rate": 4.970612997062414e-05, + "loss": 1.0853, + "step": 48949 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706117962357074e-05, + "loss": 1.1075, + "step": 48950 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706105953846115e-05, + "loss": 1.2274, + "step": 48951 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706093945091264e-05, + "loss": 0.9885, + "step": 48952 + }, + { + "epoch": 0.05, + "learning_rate": 4.970608193609253e-05, + "loss": 0.9868, + "step": 48953 + }, + { + "epoch": 0.05, + "learning_rate": 4.97060699268499e-05, + "loss": 0.9946, + "step": 48954 + }, + { + "epoch": 0.05, + "learning_rate": 4.970605791736339e-05, + "loss": 1.0814, + "step": 48955 + }, + { + "epoch": 0.05, + "learning_rate": 4.970604590763299e-05, + "loss": 1.2041, + "step": 48956 + }, + { + "epoch": 0.05, + "learning_rate": 4.97060338976587e-05, + "loss": 1.1891, + "step": 48957 + }, + { + "epoch": 0.05, + "learning_rate": 4.9706021887440524e-05, + "loss": 1.1241, + "step": 48958 + }, + { + "epoch": 0.05, + "learning_rate": 4.970600987697847e-05, + "loss": 1.0261, + "step": 48959 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705997866272515e-05, + "loss": 1.1307, + "step": 48960 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705985855322676e-05, + "loss": 0.9569, + "step": 48961 + }, + { + "epoch": 0.05, + "learning_rate": 4.970597384412895e-05, + "loss": 1.2506, + "step": 48962 + }, + { + "epoch": 0.05, + "learning_rate": 4.970596183269134e-05, + "loss": 1.0214, + "step": 48963 + }, + { + "epoch": 0.05, + "learning_rate": 4.970594982100985e-05, + "loss": 0.9108, + "step": 48964 + }, + { + "epoch": 0.05, + "learning_rate": 4.970593780908446e-05, + "loss": 0.875, + "step": 48965 + }, + { + "epoch": 0.05, + "learning_rate": 4.970592579691519e-05, + "loss": 0.8331, + "step": 48966 + }, + { + "epoch": 0.05, + "learning_rate": 4.970591378450203e-05, + "loss": 1.1666, + "step": 48967 + }, + { + "epoch": 0.05, + "learning_rate": 4.970590177184499e-05, + "loss": 0.9976, + "step": 48968 + }, + { + "epoch": 0.05, + "learning_rate": 4.970588975894405e-05, + "loss": 1.1223, + "step": 48969 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705877745799234e-05, + "loss": 1.4969, + "step": 48970 + }, + { + "epoch": 0.05, + "learning_rate": 4.970586573241054e-05, + "loss": 1.3035, + "step": 48971 + }, + { + "epoch": 0.05, + "learning_rate": 4.970585371877794e-05, + "loss": 1.3049, + "step": 48972 + }, + { + "epoch": 0.05, + "learning_rate": 4.970584170490147e-05, + "loss": 1.0364, + "step": 48973 + }, + { + "epoch": 0.05, + "learning_rate": 4.970582969078111e-05, + "loss": 0.9351, + "step": 48974 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705817676416863e-05, + "loss": 1.4206, + "step": 48975 + }, + { + "epoch": 0.05, + "learning_rate": 4.970580566180874e-05, + "loss": 1.0781, + "step": 48976 + }, + { + "epoch": 0.05, + "learning_rate": 4.970579364695672e-05, + "loss": 0.8913, + "step": 48977 + }, + { + "epoch": 0.05, + "learning_rate": 4.970578163186082e-05, + "loss": 0.9942, + "step": 48978 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705769616521024e-05, + "loss": 1.0861, + "step": 48979 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705757600937353e-05, + "loss": 1.2575, + "step": 48980 + }, + { + "epoch": 0.05, + "learning_rate": 4.97057455851098e-05, + "loss": 0.9042, + "step": 48981 + }, + { + "epoch": 0.05, + "learning_rate": 4.970573356903836e-05, + "loss": 0.8147, + "step": 48982 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705721552723033e-05, + "loss": 1.1221, + "step": 48983 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705709536163824e-05, + "loss": 1.2505, + "step": 48984 + }, + { + "epoch": 0.05, + "learning_rate": 4.970569751936073e-05, + "loss": 0.6904, + "step": 48985 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705685502313746e-05, + "loss": 0.9995, + "step": 48986 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705673485022884e-05, + "loss": 1.0201, + "step": 48987 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705661467488143e-05, + "loss": 1.3103, + "step": 48988 + }, + { + "epoch": 0.05, + "learning_rate": 4.970564944970951e-05, + "loss": 1.3457, + "step": 48989 + }, + { + "epoch": 0.05, + "learning_rate": 4.970563743168699e-05, + "loss": 1.1403, + "step": 48990 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705625413420595e-05, + "loss": 1.1423, + "step": 48991 + }, + { + "epoch": 0.05, + "learning_rate": 4.970561339491031e-05, + "loss": 1.1039, + "step": 48992 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705601376156154e-05, + "loss": 1.0456, + "step": 48993 + }, + { + "epoch": 0.05, + "learning_rate": 4.97055893571581e-05, + "loss": 1.3295, + "step": 48994 + }, + { + "epoch": 0.05, + "learning_rate": 4.970557733791616e-05, + "loss": 1.2445, + "step": 48995 + }, + { + "epoch": 0.05, + "learning_rate": 4.970556531843036e-05, + "loss": 0.9055, + "step": 48996 + }, + { + "epoch": 0.05, + "learning_rate": 4.970555329870066e-05, + "loss": 1.0812, + "step": 48997 + }, + { + "epoch": 0.05, + "learning_rate": 4.970554127872707e-05, + "loss": 0.9255, + "step": 48998 + }, + { + "epoch": 0.05, + "learning_rate": 4.970552925850962e-05, + "loss": 1.116, + "step": 48999 + }, + { + "epoch": 0.05, + "learning_rate": 4.970551723804827e-05, + "loss": 1.0922, + "step": 49000 + }, + { + "epoch": 0.05, + "eval_loss": 1.0718774795532227, + "eval_runtime": 125.6122, + "eval_samples_per_second": 11.026, + "eval_steps_per_second": 5.517, + "step": 49000 + }, + { + "epoch": 0.05, + "learning_rate": 4.970550521734304e-05, + "loss": 0.9225, + "step": 49001 + }, + { + "epoch": 0.05, + "learning_rate": 4.970549319639393e-05, + "loss": 1.263, + "step": 49002 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705481175200934e-05, + "loss": 1.0193, + "step": 49003 + }, + { + "epoch": 0.05, + "learning_rate": 4.970546915376406e-05, + "loss": 0.9589, + "step": 49004 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705457132083305e-05, + "loss": 0.9834, + "step": 49005 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705445110158664e-05, + "loss": 0.8971, + "step": 49006 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705433087990146e-05, + "loss": 0.7668, + "step": 49007 + }, + { + "epoch": 0.05, + "learning_rate": 4.970542106557774e-05, + "loss": 0.9955, + "step": 49008 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705409042921455e-05, + "loss": 0.9534, + "step": 49009 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705397020021296e-05, + "loss": 1.1577, + "step": 49010 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705384996877246e-05, + "loss": 1.0056, + "step": 49011 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705372973489325e-05, + "loss": 0.9174, + "step": 49012 + }, + { + "epoch": 0.05, + "learning_rate": 4.970536094985751e-05, + "loss": 1.0104, + "step": 49013 + }, + { + "epoch": 0.05, + "learning_rate": 4.970534892598182e-05, + "loss": 1.0427, + "step": 49014 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705336901862256e-05, + "loss": 0.941, + "step": 49015 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705324877498804e-05, + "loss": 0.7982, + "step": 49016 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705312852891474e-05, + "loss": 0.9185, + "step": 49017 + }, + { + "epoch": 0.05, + "learning_rate": 4.970530082804026e-05, + "loss": 1.2151, + "step": 49018 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705288802945174e-05, + "loss": 1.0527, + "step": 49019 + }, + { + "epoch": 0.05, + "learning_rate": 4.97052767776062e-05, + "loss": 0.8504, + "step": 49020 + }, + { + "epoch": 0.05, + "learning_rate": 4.970526475202334e-05, + "loss": 0.9415, + "step": 49021 + }, + { + "epoch": 0.05, + "learning_rate": 4.970525272619661e-05, + "loss": 0.9535, + "step": 49022 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705240700126e-05, + "loss": 0.8174, + "step": 49023 + }, + { + "epoch": 0.05, + "learning_rate": 4.970522867381151e-05, + "loss": 0.7424, + "step": 49024 + }, + { + "epoch": 0.05, + "learning_rate": 4.970521664725314e-05, + "loss": 1.2314, + "step": 49025 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705204620450886e-05, + "loss": 1.1007, + "step": 49026 + }, + { + "epoch": 0.05, + "learning_rate": 4.970519259340475e-05, + "loss": 0.9628, + "step": 49027 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705180566114746e-05, + "loss": 1.8821, + "step": 49028 + }, + { + "epoch": 0.05, + "learning_rate": 4.970516853858086e-05, + "loss": 0.9577, + "step": 49029 + }, + { + "epoch": 0.05, + "learning_rate": 4.970515651080308e-05, + "loss": 0.6846, + "step": 49030 + }, + { + "epoch": 0.05, + "learning_rate": 4.970514448278144e-05, + "loss": 1.1478, + "step": 49031 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705132454515915e-05, + "loss": 1.1534, + "step": 49032 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705120426006514e-05, + "loss": 0.8412, + "step": 49033 + }, + { + "epoch": 0.05, + "learning_rate": 4.970510839725323e-05, + "loss": 1.2867, + "step": 49034 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705096368256066e-05, + "loss": 1.1493, + "step": 49035 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705084339015026e-05, + "loss": 1.289, + "step": 49036 + }, + { + "epoch": 0.05, + "learning_rate": 4.970507230953011e-05, + "loss": 1.2544, + "step": 49037 + }, + { + "epoch": 0.05, + "learning_rate": 4.970506027980132e-05, + "loss": 0.995, + "step": 49038 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705048249828645e-05, + "loss": 1.2466, + "step": 49039 + }, + { + "epoch": 0.05, + "learning_rate": 4.970503621961209e-05, + "loss": 0.7899, + "step": 49040 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705024189151664e-05, + "loss": 1.3663, + "step": 49041 + }, + { + "epoch": 0.05, + "learning_rate": 4.970501215844736e-05, + "loss": 0.7427, + "step": 49042 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705000127499166e-05, + "loss": 0.9482, + "step": 49043 + }, + { + "epoch": 0.05, + "learning_rate": 4.970498809630712e-05, + "loss": 0.7633, + "step": 49044 + }, + { + "epoch": 0.05, + "learning_rate": 4.970497606487117e-05, + "loss": 0.915, + "step": 49045 + }, + { + "epoch": 0.05, + "learning_rate": 4.970496403319136e-05, + "loss": 0.8745, + "step": 49046 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704952001267664e-05, + "loss": 1.1871, + "step": 49047 + }, + { + "epoch": 0.05, + "learning_rate": 4.97049399691001e-05, + "loss": 0.847, + "step": 49048 + }, + { + "epoch": 0.05, + "learning_rate": 4.970492793668865e-05, + "loss": 0.9823, + "step": 49049 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704915904033325e-05, + "loss": 0.676, + "step": 49050 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704903871134125e-05, + "loss": 1.0746, + "step": 49051 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704891837991055e-05, + "loss": 1.0121, + "step": 49052 + }, + { + "epoch": 0.05, + "learning_rate": 4.97048798046041e-05, + "loss": 0.944, + "step": 49053 + }, + { + "epoch": 0.05, + "learning_rate": 4.970486777097327e-05, + "loss": 1.3412, + "step": 49054 + }, + { + "epoch": 0.05, + "learning_rate": 4.970485573709856e-05, + "loss": 0.8623, + "step": 49055 + }, + { + "epoch": 0.05, + "learning_rate": 4.970484370297999e-05, + "loss": 0.874, + "step": 49056 + }, + { + "epoch": 0.05, + "learning_rate": 4.970483166861752e-05, + "loss": 0.899, + "step": 49057 + }, + { + "epoch": 0.05, + "learning_rate": 4.97048196340112e-05, + "loss": 1.224, + "step": 49058 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704807599160984e-05, + "loss": 0.5762, + "step": 49059 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704795564066906e-05, + "loss": 0.6094, + "step": 49060 + }, + { + "epoch": 0.05, + "learning_rate": 4.970478352872895e-05, + "loss": 0.5228, + "step": 49061 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704771493147115e-05, + "loss": 0.696, + "step": 49062 + }, + { + "epoch": 0.05, + "learning_rate": 4.97047594573214e-05, + "loss": 0.6848, + "step": 49063 + }, + { + "epoch": 0.05, + "learning_rate": 4.970474742125182e-05, + "loss": 0.4994, + "step": 49064 + }, + { + "epoch": 0.05, + "learning_rate": 4.970473538493836e-05, + "loss": 0.5694, + "step": 49065 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704723348381034e-05, + "loss": 0.5883, + "step": 49066 + }, + { + "epoch": 0.05, + "learning_rate": 4.970471131157982e-05, + "loss": 0.6094, + "step": 49067 + }, + { + "epoch": 0.05, + "learning_rate": 4.970469927453474e-05, + "loss": 0.501, + "step": 49068 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704687237245787e-05, + "loss": 0.6425, + "step": 49069 + }, + { + "epoch": 0.05, + "learning_rate": 4.970467519971295e-05, + "loss": 0.699, + "step": 49070 + }, + { + "epoch": 0.05, + "learning_rate": 4.970466316193625e-05, + "loss": 0.5859, + "step": 49071 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704651123915666e-05, + "loss": 0.6325, + "step": 49072 + }, + { + "epoch": 0.05, + "learning_rate": 4.970463908565122e-05, + "loss": 0.633, + "step": 49073 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704627047142885e-05, + "loss": 0.5955, + "step": 49074 + }, + { + "epoch": 0.05, + "learning_rate": 4.970461500839069e-05, + "loss": 0.8627, + "step": 49075 + }, + { + "epoch": 0.05, + "learning_rate": 4.970460296939461e-05, + "loss": 0.8267, + "step": 49076 + }, + { + "epoch": 0.05, + "learning_rate": 4.970459093015467e-05, + "loss": 0.7594, + "step": 49077 + }, + { + "epoch": 0.05, + "learning_rate": 4.970457889067085e-05, + "loss": 1.0365, + "step": 49078 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704566850943155e-05, + "loss": 1.088, + "step": 49079 + }, + { + "epoch": 0.05, + "learning_rate": 4.970455481097158e-05, + "loss": 1.1838, + "step": 49080 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704542770756146e-05, + "loss": 1.1609, + "step": 49081 + }, + { + "epoch": 0.05, + "learning_rate": 4.970453073029683e-05, + "loss": 1.1923, + "step": 49082 + }, + { + "epoch": 0.05, + "learning_rate": 4.970451868959365e-05, + "loss": 1.2772, + "step": 49083 + }, + { + "epoch": 0.05, + "learning_rate": 4.970450664864659e-05, + "loss": 0.9725, + "step": 49084 + }, + { + "epoch": 0.05, + "learning_rate": 4.970449460745566e-05, + "loss": 1.2527, + "step": 49085 + }, + { + "epoch": 0.05, + "learning_rate": 4.970448256602085e-05, + "loss": 1.0655, + "step": 49086 + }, + { + "epoch": 0.05, + "learning_rate": 4.970447052434218e-05, + "loss": 1.0104, + "step": 49087 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704458482419636e-05, + "loss": 1.1331, + "step": 49088 + }, + { + "epoch": 0.05, + "learning_rate": 4.970444644025322e-05, + "loss": 1.0112, + "step": 49089 + }, + { + "epoch": 0.05, + "learning_rate": 4.970443439784292e-05, + "loss": 0.8151, + "step": 49090 + }, + { + "epoch": 0.05, + "learning_rate": 4.970442235518876e-05, + "loss": 1.1967, + "step": 49091 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704410312290726e-05, + "loss": 1.6702, + "step": 49092 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704398269148825e-05, + "loss": 1.3195, + "step": 49093 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704386225763045e-05, + "loss": 1.3374, + "step": 49094 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704374182133395e-05, + "loss": 0.941, + "step": 49095 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704362138259874e-05, + "loss": 0.9783, + "step": 49096 + }, + { + "epoch": 0.05, + "learning_rate": 4.970435009414248e-05, + "loss": 0.8569, + "step": 49097 + }, + { + "epoch": 0.05, + "learning_rate": 4.970433804978122e-05, + "loss": 1.1719, + "step": 49098 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704326005176085e-05, + "loss": 1.2234, + "step": 49099 + }, + { + "epoch": 0.05, + "learning_rate": 4.970431396032709e-05, + "loss": 1.4857, + "step": 49100 + }, + { + "epoch": 0.05, + "learning_rate": 4.970430191523421e-05, + "loss": 1.1595, + "step": 49101 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704289869897464e-05, + "loss": 1.1469, + "step": 49102 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704277824316847e-05, + "loss": 0.8056, + "step": 49103 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704265778492365e-05, + "loss": 0.9138, + "step": 49104 + }, + { + "epoch": 0.05, + "learning_rate": 4.970425373242401e-05, + "loss": 1.3034, + "step": 49105 + }, + { + "epoch": 0.05, + "learning_rate": 4.970424168611179e-05, + "loss": 1.0007, + "step": 49106 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704229639555686e-05, + "loss": 1.0522, + "step": 49107 + }, + { + "epoch": 0.05, + "learning_rate": 4.970421759275573e-05, + "loss": 0.9415, + "step": 49108 + }, + { + "epoch": 0.05, + "learning_rate": 4.970420554571189e-05, + "loss": 0.9291, + "step": 49109 + }, + { + "epoch": 0.05, + "learning_rate": 4.970419349842419e-05, + "loss": 0.9965, + "step": 49110 + }, + { + "epoch": 0.05, + "learning_rate": 4.970418145089262e-05, + "loss": 1.228, + "step": 49111 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704169403117175e-05, + "loss": 1.151, + "step": 49112 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704157355097855e-05, + "loss": 1.0583, + "step": 49113 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704145306834684e-05, + "loss": 0.8792, + "step": 49114 + }, + { + "epoch": 0.05, + "learning_rate": 4.970413325832763e-05, + "loss": 1.1194, + "step": 49115 + }, + { + "epoch": 0.05, + "learning_rate": 4.970412120957671e-05, + "loss": 0.8482, + "step": 49116 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704109160581924e-05, + "loss": 1.0055, + "step": 49117 + }, + { + "epoch": 0.05, + "learning_rate": 4.970409711134327e-05, + "loss": 0.9937, + "step": 49118 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704085061860744e-05, + "loss": 0.7969, + "step": 49119 + }, + { + "epoch": 0.05, + "learning_rate": 4.970407301213435e-05, + "loss": 0.9367, + "step": 49120 + }, + { + "epoch": 0.05, + "learning_rate": 4.970406096216409e-05, + "loss": 1.1268, + "step": 49121 + }, + { + "epoch": 0.05, + "learning_rate": 4.970404891194996e-05, + "loss": 1.1483, + "step": 49122 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704036861491965e-05, + "loss": 0.7075, + "step": 49123 + }, + { + "epoch": 0.05, + "learning_rate": 4.97040248107901e-05, + "loss": 0.9617, + "step": 49124 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704012759844366e-05, + "loss": 1.1791, + "step": 49125 + }, + { + "epoch": 0.05, + "learning_rate": 4.970400070865477e-05, + "loss": 0.7995, + "step": 49126 + }, + { + "epoch": 0.05, + "learning_rate": 4.97039886572213e-05, + "loss": 0.9303, + "step": 49127 + }, + { + "epoch": 0.05, + "learning_rate": 4.970397660554397e-05, + "loss": 0.9439, + "step": 49128 + }, + { + "epoch": 0.05, + "learning_rate": 4.970396455362276e-05, + "loss": 1.0105, + "step": 49129 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703952501457693e-05, + "loss": 1.0824, + "step": 49130 + }, + { + "epoch": 0.05, + "learning_rate": 4.970394044904876e-05, + "loss": 1.0056, + "step": 49131 + }, + { + "epoch": 0.05, + "learning_rate": 4.970392839639596e-05, + "loss": 1.1438, + "step": 49132 + }, + { + "epoch": 0.05, + "learning_rate": 4.970391634349929e-05, + "loss": 1.0926, + "step": 49133 + }, + { + "epoch": 0.05, + "learning_rate": 4.970390429035875e-05, + "loss": 1.1894, + "step": 49134 + }, + { + "epoch": 0.05, + "learning_rate": 4.970389223697435e-05, + "loss": 1.0072, + "step": 49135 + }, + { + "epoch": 0.05, + "learning_rate": 4.970388018334608e-05, + "loss": 0.9159, + "step": 49136 + }, + { + "epoch": 0.05, + "learning_rate": 4.970386812947394e-05, + "loss": 0.9507, + "step": 49137 + }, + { + "epoch": 0.05, + "learning_rate": 4.970385607535794e-05, + "loss": 1.0092, + "step": 49138 + }, + { + "epoch": 0.05, + "learning_rate": 4.970384402099807e-05, + "loss": 0.5426, + "step": 49139 + }, + { + "epoch": 0.05, + "learning_rate": 4.970383196639434e-05, + "loss": 1.1667, + "step": 49140 + }, + { + "epoch": 0.05, + "learning_rate": 4.970381991154674e-05, + "loss": 1.1706, + "step": 49141 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703807856455277e-05, + "loss": 1.1304, + "step": 49142 + }, + { + "epoch": 0.05, + "learning_rate": 4.970379580111994e-05, + "loss": 0.8809, + "step": 49143 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703783745540745e-05, + "loss": 1.1021, + "step": 49144 + }, + { + "epoch": 0.05, + "learning_rate": 4.970377168971768e-05, + "loss": 1.5809, + "step": 49145 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703759633650756e-05, + "loss": 1.2629, + "step": 49146 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703747577339966e-05, + "loss": 0.9902, + "step": 49147 + }, + { + "epoch": 0.05, + "learning_rate": 4.970373552078531e-05, + "loss": 1.1831, + "step": 49148 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703723463986786e-05, + "loss": 1.1929, + "step": 49149 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703711406944396e-05, + "loss": 0.8317, + "step": 49150 + }, + { + "epoch": 0.05, + "learning_rate": 4.970369934965815e-05, + "loss": 0.758, + "step": 49151 + }, + { + "epoch": 0.05, + "learning_rate": 4.970368729212803e-05, + "loss": 0.8953, + "step": 49152 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703675234354055e-05, + "loss": 0.8258, + "step": 49153 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703663176336215e-05, + "loss": 0.9047, + "step": 49154 + }, + { + "epoch": 0.05, + "learning_rate": 4.97036511180745e-05, + "loss": 1.1103, + "step": 49155 + }, + { + "epoch": 0.05, + "learning_rate": 4.970363905956893e-05, + "loss": 1.3213, + "step": 49156 + }, + { + "epoch": 0.05, + "learning_rate": 4.970362700081949e-05, + "loss": 0.9242, + "step": 49157 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703614941826195e-05, + "loss": 1.0202, + "step": 49158 + }, + { + "epoch": 0.05, + "learning_rate": 4.970360288258903e-05, + "loss": 0.5289, + "step": 49159 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703590823107996e-05, + "loss": 0.6044, + "step": 49160 + }, + { + "epoch": 0.05, + "learning_rate": 4.970357876338311e-05, + "loss": 0.4397, + "step": 49161 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703566703414354e-05, + "loss": 0.4052, + "step": 49162 + }, + { + "epoch": 0.05, + "learning_rate": 4.970355464320173e-05, + "loss": 0.4156, + "step": 49163 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703542582745255e-05, + "loss": 0.9121, + "step": 49164 + }, + { + "epoch": 0.05, + "learning_rate": 4.970353052204491e-05, + "loss": 0.969, + "step": 49165 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703518461100706e-05, + "loss": 1.12, + "step": 49166 + }, + { + "epoch": 0.05, + "learning_rate": 4.970350639991264e-05, + "loss": 0.7309, + "step": 49167 + }, + { + "epoch": 0.05, + "learning_rate": 4.970349433848071e-05, + "loss": 1.0151, + "step": 49168 + }, + { + "epoch": 0.05, + "learning_rate": 4.970348227680491e-05, + "loss": 1.0263, + "step": 49169 + }, + { + "epoch": 0.05, + "learning_rate": 4.970347021488525e-05, + "loss": 1.1288, + "step": 49170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703458152721736e-05, + "loss": 0.3055, + "step": 49171 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703446090314354e-05, + "loss": 0.2432, + "step": 49172 + }, + { + "epoch": 0.05, + "learning_rate": 4.970343402766311e-05, + "loss": 0.7339, + "step": 49173 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703421964768006e-05, + "loss": 0.9211, + "step": 49174 + }, + { + "epoch": 0.05, + "learning_rate": 4.970340990162904e-05, + "loss": 0.6416, + "step": 49175 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703397838246215e-05, + "loss": 1.0328, + "step": 49176 + }, + { + "epoch": 0.05, + "learning_rate": 4.970338577461952e-05, + "loss": 0.9996, + "step": 49177 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703373710748967e-05, + "loss": 1.2433, + "step": 49178 + }, + { + "epoch": 0.05, + "learning_rate": 4.970336164663456e-05, + "loss": 1.2472, + "step": 49179 + }, + { + "epoch": 0.05, + "learning_rate": 4.970334958227628e-05, + "loss": 1.3543, + "step": 49180 + }, + { + "epoch": 0.05, + "learning_rate": 4.970333751767415e-05, + "loss": 1.1434, + "step": 49181 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703325452828154e-05, + "loss": 1.075, + "step": 49182 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703313387738294e-05, + "loss": 0.9395, + "step": 49183 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703301322404584e-05, + "loss": 1.009, + "step": 49184 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703289256827e-05, + "loss": 1.1244, + "step": 49185 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703277191005556e-05, + "loss": 1.1026, + "step": 49186 + }, + { + "epoch": 0.05, + "learning_rate": 4.970326512494027e-05, + "loss": 0.9147, + "step": 49187 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703253058631106e-05, + "loss": 0.8856, + "step": 49188 + }, + { + "epoch": 0.05, + "learning_rate": 4.970324099207809e-05, + "loss": 0.8799, + "step": 49189 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703228925281206e-05, + "loss": 1.0212, + "step": 49190 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703216858240466e-05, + "loss": 1.1555, + "step": 49191 + }, + { + "epoch": 0.05, + "learning_rate": 4.970320479095587e-05, + "loss": 1.1913, + "step": 49192 + }, + { + "epoch": 0.05, + "learning_rate": 4.970319272342741e-05, + "loss": 0.644, + "step": 49193 + }, + { + "epoch": 0.05, + "learning_rate": 4.970318065565509e-05, + "loss": 0.5525, + "step": 49194 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703168587638914e-05, + "loss": 1.0745, + "step": 49195 + }, + { + "epoch": 0.05, + "learning_rate": 4.970315651937888e-05, + "loss": 0.9688, + "step": 49196 + }, + { + "epoch": 0.05, + "learning_rate": 4.970314445087498e-05, + "loss": 1.0551, + "step": 49197 + }, + { + "epoch": 0.05, + "learning_rate": 4.970313238212723e-05, + "loss": 1.2486, + "step": 49198 + }, + { + "epoch": 0.05, + "learning_rate": 4.970312031313562e-05, + "loss": 0.8232, + "step": 49199 + }, + { + "epoch": 0.05, + "learning_rate": 4.9703108243900146e-05, + "loss": 0.8392, + "step": 49200 + }, + { + "epoch": 0.05, + "learning_rate": 4.970309617442082e-05, + "loss": 1.2476, + "step": 49201 + }, + { + "epoch": 0.05, + "learning_rate": 4.970308410469763e-05, + "loss": 1.1765, + "step": 49202 + }, + { + "epoch": 0.05, + "learning_rate": 4.970307203473058e-05, + "loss": 1.0508, + "step": 49203 + }, + { + "epoch": 0.05, + "learning_rate": 4.970305996451967e-05, + "loss": 0.8849, + "step": 49204 + }, + { + "epoch": 0.05, + "learning_rate": 4.97030478940649e-05, + "loss": 0.6046, + "step": 49205 + }, + { + "epoch": 0.05, + "learning_rate": 4.970303582336628e-05, + "loss": 0.656, + "step": 49206 + }, + { + "epoch": 0.05, + "learning_rate": 4.97030237524238e-05, + "loss": 1.0301, + "step": 49207 + }, + { + "epoch": 0.05, + "learning_rate": 4.970301168123747e-05, + "loss": 1.4154, + "step": 49208 + }, + { + "epoch": 0.05, + "learning_rate": 4.970299960980727e-05, + "loss": 1.0479, + "step": 49209 + }, + { + "epoch": 0.05, + "learning_rate": 4.970298753813322e-05, + "loss": 0.9415, + "step": 49210 + }, + { + "epoch": 0.05, + "learning_rate": 4.970297546621531e-05, + "loss": 1.0544, + "step": 49211 + }, + { + "epoch": 0.05, + "learning_rate": 4.970296339405354e-05, + "loss": 1.1165, + "step": 49212 + }, + { + "epoch": 0.05, + "learning_rate": 4.970295132164791e-05, + "loss": 1.0928, + "step": 49213 + }, + { + "epoch": 0.05, + "learning_rate": 4.970293924899843e-05, + "loss": 0.9414, + "step": 49214 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702927176105084e-05, + "loss": 1.0549, + "step": 49215 + }, + { + "epoch": 0.05, + "learning_rate": 4.970291510296789e-05, + "loss": 0.4215, + "step": 49216 + }, + { + "epoch": 0.05, + "learning_rate": 4.970290302958684e-05, + "loss": 0.7109, + "step": 49217 + }, + { + "epoch": 0.05, + "learning_rate": 4.970289095596192e-05, + "loss": 1.0233, + "step": 49218 + }, + { + "epoch": 0.05, + "learning_rate": 4.970287888209316e-05, + "loss": 1.1946, + "step": 49219 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702866807980536e-05, + "loss": 0.6813, + "step": 49220 + }, + { + "epoch": 0.05, + "learning_rate": 4.970285473362406e-05, + "loss": 0.9359, + "step": 49221 + }, + { + "epoch": 0.05, + "learning_rate": 4.970284265902372e-05, + "loss": 0.9165, + "step": 49222 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702830584179525e-05, + "loss": 0.8366, + "step": 49223 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702818509091484e-05, + "loss": 0.7746, + "step": 49224 + }, + { + "epoch": 0.05, + "learning_rate": 4.970280643375958e-05, + "loss": 0.8766, + "step": 49225 + }, + { + "epoch": 0.05, + "learning_rate": 4.970279435818382e-05, + "loss": 0.98, + "step": 49226 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702782282364205e-05, + "loss": 1.1382, + "step": 49227 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702770206300735e-05, + "loss": 0.504, + "step": 49228 + }, + { + "epoch": 0.05, + "learning_rate": 4.970275812999341e-05, + "loss": 0.6278, + "step": 49229 + }, + { + "epoch": 0.05, + "learning_rate": 4.970274605344223e-05, + "loss": 0.921, + "step": 49230 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702733976647194e-05, + "loss": 0.865, + "step": 49231 + }, + { + "epoch": 0.05, + "learning_rate": 4.97027218996083e-05, + "loss": 1.2186, + "step": 49232 + }, + { + "epoch": 0.05, + "learning_rate": 4.970270982232556e-05, + "loss": 1.156, + "step": 49233 + }, + { + "epoch": 0.05, + "learning_rate": 4.970269774479896e-05, + "loss": 1.1751, + "step": 49234 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702685667028505e-05, + "loss": 0.9222, + "step": 49235 + }, + { + "epoch": 0.05, + "learning_rate": 4.970267358901419e-05, + "loss": 0.7707, + "step": 49236 + }, + { + "epoch": 0.05, + "learning_rate": 4.970266151075603e-05, + "loss": 1.1664, + "step": 49237 + }, + { + "epoch": 0.05, + "learning_rate": 4.970264943225401e-05, + "loss": 0.6423, + "step": 49238 + }, + { + "epoch": 0.05, + "learning_rate": 4.970263735350814e-05, + "loss": 0.9882, + "step": 49239 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702625274518414e-05, + "loss": 1.2393, + "step": 49240 + }, + { + "epoch": 0.05, + "learning_rate": 4.970261319528483e-05, + "loss": 1.4499, + "step": 49241 + }, + { + "epoch": 0.05, + "learning_rate": 4.97026011158074e-05, + "loss": 1.2296, + "step": 49242 + }, + { + "epoch": 0.05, + "learning_rate": 4.970258903608611e-05, + "loss": 0.9282, + "step": 49243 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702576956120974e-05, + "loss": 0.8888, + "step": 49244 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702564875911976e-05, + "loss": 1.5979, + "step": 49245 + }, + { + "epoch": 0.05, + "learning_rate": 4.970255279545912e-05, + "loss": 1.8558, + "step": 49246 + }, + { + "epoch": 0.05, + "learning_rate": 4.970254071476243e-05, + "loss": 0.8785, + "step": 49247 + }, + { + "epoch": 0.05, + "learning_rate": 4.970252863382187e-05, + "loss": 0.4065, + "step": 49248 + }, + { + "epoch": 0.05, + "learning_rate": 4.970251655263746e-05, + "loss": 0.4805, + "step": 49249 + }, + { + "epoch": 0.05, + "learning_rate": 4.97025044712092e-05, + "loss": 0.8593, + "step": 49250 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702492389537095e-05, + "loss": 0.9746, + "step": 49251 + }, + { + "epoch": 0.05, + "learning_rate": 4.970248030762112e-05, + "loss": 0.9178, + "step": 49252 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702468225461305e-05, + "loss": 1.0239, + "step": 49253 + }, + { + "epoch": 0.05, + "learning_rate": 4.970245614305764e-05, + "loss": 0.6192, + "step": 49254 + }, + { + "epoch": 0.05, + "learning_rate": 4.970244406041011e-05, + "loss": 0.7455, + "step": 49255 + }, + { + "epoch": 0.05, + "learning_rate": 4.970243197751874e-05, + "loss": 1.0458, + "step": 49256 + }, + { + "epoch": 0.05, + "learning_rate": 4.970241989438351e-05, + "loss": 0.9976, + "step": 49257 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702407811004434e-05, + "loss": 0.8518, + "step": 49258 + }, + { + "epoch": 0.05, + "learning_rate": 4.97023957273815e-05, + "loss": 0.7962, + "step": 49259 + }, + { + "epoch": 0.05, + "learning_rate": 4.970238364351473e-05, + "loss": 1.018, + "step": 49260 + }, + { + "epoch": 0.05, + "learning_rate": 4.970237155940409e-05, + "loss": 1.123, + "step": 49261 + }, + { + "epoch": 0.05, + "learning_rate": 4.970235947504961e-05, + "loss": 0.8241, + "step": 49262 + }, + { + "epoch": 0.05, + "learning_rate": 4.970234739045127e-05, + "loss": 0.9632, + "step": 49263 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702335305609083e-05, + "loss": 1.0769, + "step": 49264 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702323220523045e-05, + "loss": 1.2423, + "step": 49265 + }, + { + "epoch": 0.05, + "learning_rate": 4.970231113519316e-05, + "loss": 0.6706, + "step": 49266 + }, + { + "epoch": 0.05, + "learning_rate": 4.970229904961942e-05, + "loss": 1.0386, + "step": 49267 + }, + { + "epoch": 0.05, + "learning_rate": 4.970228696380183e-05, + "loss": 1.2034, + "step": 49268 + }, + { + "epoch": 0.05, + "learning_rate": 4.970227487774039e-05, + "loss": 0.7983, + "step": 49269 + }, + { + "epoch": 0.05, + "learning_rate": 4.97022627914351e-05, + "loss": 0.9121, + "step": 49270 + }, + { + "epoch": 0.05, + "learning_rate": 4.970225070488595e-05, + "loss": 1.0182, + "step": 49271 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702238618092956e-05, + "loss": 0.9931, + "step": 49272 + }, + { + "epoch": 0.05, + "learning_rate": 4.970222653105612e-05, + "loss": 0.5544, + "step": 49273 + }, + { + "epoch": 0.05, + "learning_rate": 4.970221444377542e-05, + "loss": 0.9423, + "step": 49274 + }, + { + "epoch": 0.05, + "learning_rate": 4.970220235625088e-05, + "loss": 0.8785, + "step": 49275 + }, + { + "epoch": 0.05, + "learning_rate": 4.970219026848249e-05, + "loss": 1.0599, + "step": 49276 + }, + { + "epoch": 0.05, + "learning_rate": 4.970217818047025e-05, + "loss": 1.0101, + "step": 49277 + }, + { + "epoch": 0.05, + "learning_rate": 4.970216609221415e-05, + "loss": 0.9442, + "step": 49278 + }, + { + "epoch": 0.05, + "learning_rate": 4.970215400371422e-05, + "loss": 1.0618, + "step": 49279 + }, + { + "epoch": 0.05, + "learning_rate": 4.970214191497042e-05, + "loss": 1.1712, + "step": 49280 + }, + { + "epoch": 0.05, + "learning_rate": 4.970212982598279e-05, + "loss": 1.1162, + "step": 49281 + }, + { + "epoch": 0.05, + "learning_rate": 4.97021177367513e-05, + "loss": 1.012, + "step": 49282 + }, + { + "epoch": 0.05, + "learning_rate": 4.970210564727597e-05, + "loss": 0.7131, + "step": 49283 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702093557556775e-05, + "loss": 0.2382, + "step": 49284 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702081467593745e-05, + "loss": 0.2728, + "step": 49285 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702069377386864e-05, + "loss": 0.1948, + "step": 49286 + }, + { + "epoch": 0.05, + "learning_rate": 4.970205728693613e-05, + "loss": 0.0659, + "step": 49287 + }, + { + "epoch": 0.05, + "learning_rate": 4.970204519624155e-05, + "loss": 0.6515, + "step": 49288 + }, + { + "epoch": 0.05, + "learning_rate": 4.9702033105303126e-05, + "loss": 0.8122, + "step": 49289 + }, + { + "epoch": 0.05, + "learning_rate": 4.970202101412085e-05, + "loss": 1.0526, + "step": 49290 + }, + { + "epoch": 0.05, + "learning_rate": 4.970200892269472e-05, + "loss": 0.9306, + "step": 49291 + }, + { + "epoch": 0.05, + "learning_rate": 4.970199683102476e-05, + "loss": 1.1721, + "step": 49292 + }, + { + "epoch": 0.05, + "learning_rate": 4.970198473911094e-05, + "loss": 0.9256, + "step": 49293 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701972646953265e-05, + "loss": 0.7625, + "step": 49294 + }, + { + "epoch": 0.05, + "learning_rate": 4.970196055455175e-05, + "loss": 0.2986, + "step": 49295 + }, + { + "epoch": 0.05, + "learning_rate": 4.970194846190639e-05, + "loss": 0.9155, + "step": 49296 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701936369017186e-05, + "loss": 1.0114, + "step": 49297 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701924275884125e-05, + "loss": 0.9612, + "step": 49298 + }, + { + "epoch": 0.05, + "learning_rate": 4.970191218250723e-05, + "loss": 0.8865, + "step": 49299 + }, + { + "epoch": 0.05, + "learning_rate": 4.970190008888648e-05, + "loss": 1.0928, + "step": 49300 + }, + { + "epoch": 0.05, + "learning_rate": 4.970188799502188e-05, + "loss": 0.9871, + "step": 49301 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701875900913436e-05, + "loss": 0.7684, + "step": 49302 + }, + { + "epoch": 0.05, + "learning_rate": 4.970186380656114e-05, + "loss": 0.8558, + "step": 49303 + }, + { + "epoch": 0.05, + "learning_rate": 4.970185171196501e-05, + "loss": 1.1148, + "step": 49304 + }, + { + "epoch": 0.05, + "learning_rate": 4.970183961712503e-05, + "loss": 1.0677, + "step": 49305 + }, + { + "epoch": 0.05, + "learning_rate": 4.97018275220412e-05, + "loss": 1.0732, + "step": 49306 + }, + { + "epoch": 0.05, + "learning_rate": 4.970181542671353e-05, + "loss": 1.2291, + "step": 49307 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701803331142e-05, + "loss": 1.1861, + "step": 49308 + }, + { + "epoch": 0.05, + "learning_rate": 4.970179123532664e-05, + "loss": 1.352, + "step": 49309 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701779139267424e-05, + "loss": 0.8551, + "step": 49310 + }, + { + "epoch": 0.05, + "learning_rate": 4.970176704296437e-05, + "loss": 0.631, + "step": 49311 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701754946417464e-05, + "loss": 0.9091, + "step": 49312 + }, + { + "epoch": 0.05, + "learning_rate": 4.970174284962672e-05, + "loss": 0.8498, + "step": 49313 + }, + { + "epoch": 0.05, + "learning_rate": 4.970173075259212e-05, + "loss": 1.1056, + "step": 49314 + }, + { + "epoch": 0.05, + "learning_rate": 4.970171865531369e-05, + "loss": 1.0499, + "step": 49315 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701706557791405e-05, + "loss": 0.9585, + "step": 49316 + }, + { + "epoch": 0.05, + "learning_rate": 4.970169446002528e-05, + "loss": 1.6578, + "step": 49317 + }, + { + "epoch": 0.05, + "learning_rate": 4.97016823620153e-05, + "loss": 1.0333, + "step": 49318 + }, + { + "epoch": 0.05, + "learning_rate": 4.970167026376148e-05, + "loss": 1.0478, + "step": 49319 + }, + { + "epoch": 0.05, + "learning_rate": 4.970165816526382e-05, + "loss": 0.7092, + "step": 49320 + }, + { + "epoch": 0.05, + "learning_rate": 4.970164606652232e-05, + "loss": 1.0351, + "step": 49321 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701633967536966e-05, + "loss": 1.1692, + "step": 49322 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701621868307775e-05, + "loss": 1.1672, + "step": 49323 + }, + { + "epoch": 0.05, + "learning_rate": 4.970160976883473e-05, + "loss": 1.025, + "step": 49324 + }, + { + "epoch": 0.05, + "learning_rate": 4.970159766911785e-05, + "loss": 0.1514, + "step": 49325 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701585569157125e-05, + "loss": 1.1686, + "step": 49326 + }, + { + "epoch": 0.05, + "learning_rate": 4.970157346895256e-05, + "loss": 0.6867, + "step": 49327 + }, + { + "epoch": 0.05, + "learning_rate": 4.970156136850415e-05, + "loss": 1.1729, + "step": 49328 + }, + { + "epoch": 0.05, + "learning_rate": 4.970154926781189e-05, + "loss": 1.0695, + "step": 49329 + }, + { + "epoch": 0.05, + "learning_rate": 4.970153716687579e-05, + "loss": 1.0858, + "step": 49330 + }, + { + "epoch": 0.05, + "learning_rate": 4.970152506569584e-05, + "loss": 0.6815, + "step": 49331 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701512964272056e-05, + "loss": 0.9542, + "step": 49332 + }, + { + "epoch": 0.05, + "learning_rate": 4.970150086260442e-05, + "loss": 0.6903, + "step": 49333 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701488760692953e-05, + "loss": 0.7983, + "step": 49334 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701476658537636e-05, + "loss": 0.8928, + "step": 49335 + }, + { + "epoch": 0.05, + "learning_rate": 4.970146455613848e-05, + "loss": 0.8243, + "step": 49336 + }, + { + "epoch": 0.05, + "learning_rate": 4.970145245349548e-05, + "loss": 0.2803, + "step": 49337 + }, + { + "epoch": 0.05, + "learning_rate": 4.970144035060864e-05, + "loss": 0.1855, + "step": 49338 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701428247477957e-05, + "loss": 0.1977, + "step": 49339 + }, + { + "epoch": 0.05, + "learning_rate": 4.970141614410343e-05, + "loss": 0.2214, + "step": 49340 + }, + { + "epoch": 0.05, + "learning_rate": 4.970140404048506e-05, + "loss": 0.1916, + "step": 49341 + }, + { + "epoch": 0.05, + "learning_rate": 4.970139193662286e-05, + "loss": 0.3204, + "step": 49342 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701379832516796e-05, + "loss": 0.8594, + "step": 49343 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701367728166904e-05, + "loss": 0.9501, + "step": 49344 + }, + { + "epoch": 0.05, + "learning_rate": 4.970135562357317e-05, + "loss": 0.828, + "step": 49345 + }, + { + "epoch": 0.05, + "learning_rate": 4.97013435187356e-05, + "loss": 0.784, + "step": 49346 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701331413654174e-05, + "loss": 0.6562, + "step": 49347 + }, + { + "epoch": 0.05, + "learning_rate": 4.970131930832892e-05, + "loss": 0.9191, + "step": 49348 + }, + { + "epoch": 0.05, + "learning_rate": 4.970130720275982e-05, + "loss": 1.0661, + "step": 49349 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701295096946876e-05, + "loss": 1.1562, + "step": 49350 + }, + { + "epoch": 0.05, + "learning_rate": 4.97012829908901e-05, + "loss": 0.7336, + "step": 49351 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701270884589476e-05, + "loss": 1.0653, + "step": 49352 + }, + { + "epoch": 0.05, + "learning_rate": 4.970125877804501e-05, + "loss": 0.8774, + "step": 49353 + }, + { + "epoch": 0.05, + "learning_rate": 4.970124667125671e-05, + "loss": 1.0351, + "step": 49354 + }, + { + "epoch": 0.05, + "learning_rate": 4.970123456422456e-05, + "loss": 1.1957, + "step": 49355 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701222456948584e-05, + "loss": 0.6754, + "step": 49356 + }, + { + "epoch": 0.05, + "learning_rate": 4.970121034942876e-05, + "loss": 0.693, + "step": 49357 + }, + { + "epoch": 0.05, + "learning_rate": 4.970119824166509e-05, + "loss": 1.5728, + "step": 49358 + }, + { + "epoch": 0.05, + "learning_rate": 4.970118613365759e-05, + "loss": 1.3361, + "step": 49359 + }, + { + "epoch": 0.05, + "learning_rate": 4.970117402540625e-05, + "loss": 0.9865, + "step": 49360 + }, + { + "epoch": 0.05, + "learning_rate": 4.970116191691106e-05, + "loss": 0.4171, + "step": 49361 + }, + { + "epoch": 0.05, + "learning_rate": 4.970114980817204e-05, + "loss": 0.2866, + "step": 49362 + }, + { + "epoch": 0.05, + "learning_rate": 4.970113769918918e-05, + "loss": 0.7806, + "step": 49363 + }, + { + "epoch": 0.05, + "learning_rate": 4.970112558996248e-05, + "loss": 1.3016, + "step": 49364 + }, + { + "epoch": 0.05, + "learning_rate": 4.970111348049194e-05, + "loss": 1.1949, + "step": 49365 + }, + { + "epoch": 0.05, + "learning_rate": 4.970110137077756e-05, + "loss": 1.1182, + "step": 49366 + }, + { + "epoch": 0.05, + "learning_rate": 4.970108926081934e-05, + "loss": 0.8209, + "step": 49367 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701077150617286e-05, + "loss": 0.7123, + "step": 49368 + }, + { + "epoch": 0.05, + "learning_rate": 4.970106504017139e-05, + "loss": 0.4967, + "step": 49369 + }, + { + "epoch": 0.05, + "learning_rate": 4.970105292948166e-05, + "loss": 0.3679, + "step": 49370 + }, + { + "epoch": 0.05, + "learning_rate": 4.970104081854808e-05, + "loss": 0.5399, + "step": 49371 + }, + { + "epoch": 0.05, + "learning_rate": 4.970102870737068e-05, + "loss": 0.4392, + "step": 49372 + }, + { + "epoch": 0.05, + "learning_rate": 4.9701016595949425e-05, + "loss": 0.5869, + "step": 49373 + }, + { + "epoch": 0.05, + "learning_rate": 4.970100448428434e-05, + "loss": 1.0535, + "step": 49374 + }, + { + "epoch": 0.05, + "learning_rate": 4.970099237237542e-05, + "loss": 0.8251, + "step": 49375 + }, + { + "epoch": 0.05, + "learning_rate": 4.970098026022265e-05, + "loss": 1.3761, + "step": 49376 + }, + { + "epoch": 0.05, + "learning_rate": 4.970096814782606e-05, + "loss": 1.0517, + "step": 49377 + }, + { + "epoch": 0.05, + "learning_rate": 4.970095603518562e-05, + "loss": 1.1177, + "step": 49378 + }, + { + "epoch": 0.05, + "learning_rate": 4.970094392230134e-05, + "loss": 1.5543, + "step": 49379 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700931809173226e-05, + "loss": 1.6791, + "step": 49380 + }, + { + "epoch": 0.05, + "learning_rate": 4.970091969580128e-05, + "loss": 1.7187, + "step": 49381 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700907582185494e-05, + "loss": 1.2126, + "step": 49382 + }, + { + "epoch": 0.05, + "learning_rate": 4.970089546832587e-05, + "loss": 1.1443, + "step": 49383 + }, + { + "epoch": 0.05, + "learning_rate": 4.970088335422241e-05, + "loss": 0.9018, + "step": 49384 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700871239875114e-05, + "loss": 1.2226, + "step": 49385 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700859125283984e-05, + "loss": 1.0516, + "step": 49386 + }, + { + "epoch": 0.05, + "learning_rate": 4.970084701044902e-05, + "loss": 1.1128, + "step": 49387 + }, + { + "epoch": 0.05, + "learning_rate": 4.970083489537021e-05, + "loss": 0.8111, + "step": 49388 + }, + { + "epoch": 0.05, + "learning_rate": 4.970082278004756e-05, + "loss": 1.1613, + "step": 49389 + }, + { + "epoch": 0.05, + "learning_rate": 4.970081066448108e-05, + "loss": 1.0762, + "step": 49390 + }, + { + "epoch": 0.05, + "learning_rate": 4.970079854867077e-05, + "loss": 0.9629, + "step": 49391 + }, + { + "epoch": 0.05, + "learning_rate": 4.970078643261662e-05, + "loss": 1.0423, + "step": 49392 + }, + { + "epoch": 0.05, + "learning_rate": 4.970077431631863e-05, + "loss": 1.1219, + "step": 49393 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700762199776806e-05, + "loss": 0.9233, + "step": 49394 + }, + { + "epoch": 0.05, + "learning_rate": 4.970075008299116e-05, + "loss": 0.9822, + "step": 49395 + }, + { + "epoch": 0.05, + "learning_rate": 4.970073796596166e-05, + "loss": 1.0145, + "step": 49396 + }, + { + "epoch": 0.05, + "learning_rate": 4.970072584868833e-05, + "loss": 0.9013, + "step": 49397 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700713731171165e-05, + "loss": 0.7693, + "step": 49398 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700701613410174e-05, + "loss": 1.0783, + "step": 49399 + }, + { + "epoch": 0.05, + "learning_rate": 4.970068949540534e-05, + "loss": 0.0041, + "step": 49400 + }, + { + "epoch": 0.05, + "learning_rate": 4.970067737715667e-05, + "loss": 0.0022, + "step": 49401 + }, + { + "epoch": 0.05, + "learning_rate": 4.970066525866417e-05, + "loss": 0.0014, + "step": 49402 + }, + { + "epoch": 0.05, + "learning_rate": 4.970065313992783e-05, + "loss": 0.7213, + "step": 49403 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700641020947655e-05, + "loss": 0.5455, + "step": 49404 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700628901723655e-05, + "loss": 0.0007, + "step": 49405 + }, + { + "epoch": 0.05, + "learning_rate": 4.970061678225582e-05, + "loss": 0.0005, + "step": 49406 + }, + { + "epoch": 0.05, + "learning_rate": 4.970060466254414e-05, + "loss": 0.0005, + "step": 49407 + }, + { + "epoch": 0.05, + "learning_rate": 4.970059254258863e-05, + "loss": 0.7466, + "step": 49408 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700580422389285e-05, + "loss": 1.0046, + "step": 49409 + }, + { + "epoch": 0.05, + "learning_rate": 4.970056830194611e-05, + "loss": 0.9007, + "step": 49410 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700556181259104e-05, + "loss": 0.953, + "step": 49411 + }, + { + "epoch": 0.05, + "learning_rate": 4.970054406032826e-05, + "loss": 0.4956, + "step": 49412 + }, + { + "epoch": 0.05, + "learning_rate": 4.970053193915358e-05, + "loss": 1.1025, + "step": 49413 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700519817735075e-05, + "loss": 1.1426, + "step": 49414 + }, + { + "epoch": 0.05, + "learning_rate": 4.970050769607273e-05, + "loss": 0.8417, + "step": 49415 + }, + { + "epoch": 0.05, + "learning_rate": 4.970049557416656e-05, + "loss": 1.1481, + "step": 49416 + }, + { + "epoch": 0.05, + "learning_rate": 4.970048345201655e-05, + "loss": 0.9196, + "step": 49417 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700471329622706e-05, + "loss": 0.8762, + "step": 49418 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700459206985034e-05, + "loss": 0.8657, + "step": 49419 + }, + { + "epoch": 0.05, + "learning_rate": 4.970044708410353e-05, + "loss": 0.8328, + "step": 49420 + }, + { + "epoch": 0.05, + "learning_rate": 4.970043496097819e-05, + "loss": 1.3251, + "step": 49421 + }, + { + "epoch": 0.05, + "learning_rate": 4.970042283760902e-05, + "loss": 1.3648, + "step": 49422 + }, + { + "epoch": 0.05, + "learning_rate": 4.970041071399602e-05, + "loss": 1.1356, + "step": 49423 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700398590139175e-05, + "loss": 1.0187, + "step": 49424 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700386466038515e-05, + "loss": 0.6391, + "step": 49425 + }, + { + "epoch": 0.05, + "learning_rate": 4.970037434169401e-05, + "loss": 0.4627, + "step": 49426 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700362217105677e-05, + "loss": 0.9848, + "step": 49427 + }, + { + "epoch": 0.05, + "learning_rate": 4.970035009227352e-05, + "loss": 0.9061, + "step": 49428 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700337967197524e-05, + "loss": 0.8277, + "step": 49429 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700325841877706e-05, + "loss": 1.1171, + "step": 49430 + }, + { + "epoch": 0.05, + "learning_rate": 4.970031371631404e-05, + "loss": 1.3235, + "step": 49431 + }, + { + "epoch": 0.05, + "learning_rate": 4.970030159050656e-05, + "loss": 1.2504, + "step": 49432 + }, + { + "epoch": 0.05, + "learning_rate": 4.970028946445524e-05, + "loss": 0.9477, + "step": 49433 + }, + { + "epoch": 0.05, + "learning_rate": 4.970027733816009e-05, + "loss": 1.2332, + "step": 49434 + }, + { + "epoch": 0.05, + "learning_rate": 4.970026521162111e-05, + "loss": 1.347, + "step": 49435 + }, + { + "epoch": 0.05, + "learning_rate": 4.97002530848383e-05, + "loss": 1.0928, + "step": 49436 + }, + { + "epoch": 0.05, + "learning_rate": 4.970024095781166e-05, + "loss": 1.0312, + "step": 49437 + }, + { + "epoch": 0.05, + "learning_rate": 4.970022883054119e-05, + "loss": 1.2203, + "step": 49438 + }, + { + "epoch": 0.05, + "learning_rate": 4.970021670302688e-05, + "loss": 1.2077, + "step": 49439 + }, + { + "epoch": 0.05, + "learning_rate": 4.970020457526875e-05, + "loss": 0.8798, + "step": 49440 + }, + { + "epoch": 0.05, + "learning_rate": 4.970019244726679e-05, + "loss": 0.9077, + "step": 49441 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700180319021e-05, + "loss": 0.663, + "step": 49442 + }, + { + "epoch": 0.05, + "learning_rate": 4.970016819053138e-05, + "loss": 0.894, + "step": 49443 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700156061797927e-05, + "loss": 0.9391, + "step": 49444 + }, + { + "epoch": 0.05, + "learning_rate": 4.970014393282065e-05, + "loss": 1.0318, + "step": 49445 + }, + { + "epoch": 0.05, + "learning_rate": 4.970013180359954e-05, + "loss": 0.9967, + "step": 49446 + }, + { + "epoch": 0.05, + "learning_rate": 4.970011967413459e-05, + "loss": 0.4571, + "step": 49447 + }, + { + "epoch": 0.05, + "learning_rate": 4.970010754442583e-05, + "loss": 1.0297, + "step": 49448 + }, + { + "epoch": 0.05, + "learning_rate": 4.970009541447323e-05, + "loss": 1.4043, + "step": 49449 + }, + { + "epoch": 0.05, + "learning_rate": 4.97000832842768e-05, + "loss": 1.1019, + "step": 49450 + }, + { + "epoch": 0.05, + "learning_rate": 4.970007115383655e-05, + "loss": 0.7921, + "step": 49451 + }, + { + "epoch": 0.05, + "learning_rate": 4.970005902315247e-05, + "loss": 0.7592, + "step": 49452 + }, + { + "epoch": 0.05, + "learning_rate": 4.970004689222455e-05, + "loss": 0.897, + "step": 49453 + }, + { + "epoch": 0.05, + "learning_rate": 4.970003476105281e-05, + "loss": 1.1784, + "step": 49454 + }, + { + "epoch": 0.05, + "learning_rate": 4.970002262963724e-05, + "loss": 0.9718, + "step": 49455 + }, + { + "epoch": 0.05, + "learning_rate": 4.970001049797784e-05, + "loss": 1.0498, + "step": 49456 + }, + { + "epoch": 0.05, + "learning_rate": 4.969999836607462e-05, + "loss": 0.8403, + "step": 49457 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699986233927566e-05, + "loss": 0.6607, + "step": 49458 + }, + { + "epoch": 0.05, + "learning_rate": 4.969997410153668e-05, + "loss": 0.5229, + "step": 49459 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699961968901976e-05, + "loss": 0.5823, + "step": 49460 + }, + { + "epoch": 0.05, + "learning_rate": 4.969994983602344e-05, + "loss": 0.7571, + "step": 49461 + }, + { + "epoch": 0.05, + "learning_rate": 4.969993770290107e-05, + "loss": 1.1239, + "step": 49462 + }, + { + "epoch": 0.05, + "learning_rate": 4.969992556953488e-05, + "loss": 0.8606, + "step": 49463 + }, + { + "epoch": 0.05, + "learning_rate": 4.969991343592486e-05, + "loss": 0.6372, + "step": 49464 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699901302071015e-05, + "loss": 1.2361, + "step": 49465 + }, + { + "epoch": 0.05, + "learning_rate": 4.969988916797335e-05, + "loss": 0.9431, + "step": 49466 + }, + { + "epoch": 0.05, + "learning_rate": 4.969987703363185e-05, + "loss": 0.7066, + "step": 49467 + }, + { + "epoch": 0.05, + "learning_rate": 4.969986489904652e-05, + "loss": 1.1989, + "step": 49468 + }, + { + "epoch": 0.05, + "learning_rate": 4.969985276421736e-05, + "loss": 1.0192, + "step": 49469 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699840629144385e-05, + "loss": 1.0511, + "step": 49470 + }, + { + "epoch": 0.05, + "learning_rate": 4.969982849382758e-05, + "loss": 0.9123, + "step": 49471 + }, + { + "epoch": 0.05, + "learning_rate": 4.969981635826695e-05, + "loss": 1.1574, + "step": 49472 + }, + { + "epoch": 0.05, + "learning_rate": 4.969980422246249e-05, + "loss": 1.0224, + "step": 49473 + }, + { + "epoch": 0.05, + "learning_rate": 4.96997920864142e-05, + "loss": 1.5881, + "step": 49474 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699779950122085e-05, + "loss": 2.2991, + "step": 49475 + }, + { + "epoch": 0.05, + "learning_rate": 4.969976781358615e-05, + "loss": 2.1611, + "step": 49476 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699755676806394e-05, + "loss": 2.1603, + "step": 49477 + }, + { + "epoch": 0.05, + "learning_rate": 4.96997435397828e-05, + "loss": 2.0936, + "step": 49478 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699731402515396e-05, + "loss": 2.0773, + "step": 49479 + }, + { + "epoch": 0.05, + "learning_rate": 4.969971926500415e-05, + "loss": 2.0896, + "step": 49480 + }, + { + "epoch": 0.05, + "learning_rate": 4.969970712724908e-05, + "loss": 2.0648, + "step": 49481 + }, + { + "epoch": 0.05, + "learning_rate": 4.96996949892502e-05, + "loss": 2.0087, + "step": 49482 + }, + { + "epoch": 0.05, + "learning_rate": 4.969968285100748e-05, + "loss": 2.0301, + "step": 49483 + }, + { + "epoch": 0.05, + "learning_rate": 4.969967071252094e-05, + "loss": 1.9798, + "step": 49484 + }, + { + "epoch": 0.05, + "learning_rate": 4.969965857379058e-05, + "loss": 1.9174, + "step": 49485 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699646434816395e-05, + "loss": 1.9022, + "step": 49486 + }, + { + "epoch": 0.05, + "learning_rate": 4.969963429559838e-05, + "loss": 1.7967, + "step": 49487 + }, + { + "epoch": 0.05, + "learning_rate": 4.969962215613654e-05, + "loss": 1.718, + "step": 49488 + }, + { + "epoch": 0.05, + "learning_rate": 4.969961001643088e-05, + "loss": 1.6496, + "step": 49489 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699597876481396e-05, + "loss": 1.604, + "step": 49490 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699585736288085e-05, + "loss": 1.4073, + "step": 49491 + }, + { + "epoch": 0.05, + "learning_rate": 4.969957359585095e-05, + "loss": 1.4813, + "step": 49492 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699561455169994e-05, + "loss": 1.4679, + "step": 49493 + }, + { + "epoch": 0.05, + "learning_rate": 4.969954931424521e-05, + "loss": 1.4021, + "step": 49494 + }, + { + "epoch": 0.05, + "learning_rate": 4.969953717307661e-05, + "loss": 1.3697, + "step": 49495 + }, + { + "epoch": 0.05, + "learning_rate": 4.969952503166419e-05, + "loss": 1.4195, + "step": 49496 + }, + { + "epoch": 0.05, + "learning_rate": 4.969951289000793e-05, + "loss": 1.3195, + "step": 49497 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699500748107855e-05, + "loss": 1.3799, + "step": 49498 + }, + { + "epoch": 0.05, + "learning_rate": 4.969948860596396e-05, + "loss": 1.3484, + "step": 49499 + }, + { + "epoch": 0.05, + "learning_rate": 4.969947646357624e-05, + "loss": 1.3555, + "step": 49500 + }, + { + "epoch": 0.05, + "eval_loss": 1.0935750007629395, + "eval_runtime": 126.1201, + "eval_samples_per_second": 10.982, + "eval_steps_per_second": 5.495, + "step": 49500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699464320944696e-05, + "loss": 1.2435, + "step": 49501 + }, + { + "epoch": 0.05, + "learning_rate": 4.969945217806933e-05, + "loss": 1.2915, + "step": 49502 + }, + { + "epoch": 0.05, + "learning_rate": 4.969944003495014e-05, + "loss": 1.3409, + "step": 49503 + }, + { + "epoch": 0.05, + "learning_rate": 4.969942789158713e-05, + "loss": 1.2208, + "step": 49504 + }, + { + "epoch": 0.05, + "learning_rate": 4.969941574798029e-05, + "loss": 1.2062, + "step": 49505 + }, + { + "epoch": 0.05, + "learning_rate": 4.969940360412964e-05, + "loss": 1.2802, + "step": 49506 + }, + { + "epoch": 0.05, + "learning_rate": 4.969939146003516e-05, + "loss": 1.4211, + "step": 49507 + }, + { + "epoch": 0.05, + "learning_rate": 4.969937931569686e-05, + "loss": 1.346, + "step": 49508 + }, + { + "epoch": 0.05, + "learning_rate": 4.969936717111474e-05, + "loss": 1.267, + "step": 49509 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699355026288797e-05, + "loss": 1.2506, + "step": 49510 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699342881219036e-05, + "loss": 1.3876, + "step": 49511 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699330735905446e-05, + "loss": 1.3895, + "step": 49512 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699318590348045e-05, + "loss": 1.3027, + "step": 49513 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699306444546814e-05, + "loss": 1.2795, + "step": 49514 + }, + { + "epoch": 0.05, + "learning_rate": 4.969929429850176e-05, + "loss": 1.2022, + "step": 49515 + }, + { + "epoch": 0.05, + "learning_rate": 4.969928215221289e-05, + "loss": 1.1407, + "step": 49516 + }, + { + "epoch": 0.05, + "learning_rate": 4.96992700056802e-05, + "loss": 1.1968, + "step": 49517 + }, + { + "epoch": 0.05, + "learning_rate": 4.969925785890368e-05, + "loss": 1.1713, + "step": 49518 + }, + { + "epoch": 0.05, + "learning_rate": 4.969924571188335e-05, + "loss": 1.1054, + "step": 49519 + }, + { + "epoch": 0.05, + "learning_rate": 4.96992335646192e-05, + "loss": 1.1878, + "step": 49520 + }, + { + "epoch": 0.05, + "learning_rate": 4.969922141711122e-05, + "loss": 1.177, + "step": 49521 + }, + { + "epoch": 0.05, + "learning_rate": 4.969920926935942e-05, + "loss": 1.1719, + "step": 49522 + }, + { + "epoch": 0.05, + "learning_rate": 4.969919712136381e-05, + "loss": 1.013, + "step": 49523 + }, + { + "epoch": 0.05, + "learning_rate": 4.969918497312438e-05, + "loss": 1.3259, + "step": 49524 + }, + { + "epoch": 0.05, + "learning_rate": 4.969917282464112e-05, + "loss": 0.9604, + "step": 49525 + }, + { + "epoch": 0.05, + "learning_rate": 4.969916067591405e-05, + "loss": 1.1188, + "step": 49526 + }, + { + "epoch": 0.05, + "learning_rate": 4.969914852694315e-05, + "loss": 0.6523, + "step": 49527 + }, + { + "epoch": 0.05, + "learning_rate": 4.969913637772844e-05, + "loss": 0.6287, + "step": 49528 + }, + { + "epoch": 0.05, + "learning_rate": 4.96991242282699e-05, + "loss": 0.9732, + "step": 49529 + }, + { + "epoch": 0.05, + "learning_rate": 4.969911207856755e-05, + "loss": 0.8476, + "step": 49530 + }, + { + "epoch": 0.05, + "learning_rate": 4.969909992862138e-05, + "loss": 0.8036, + "step": 49531 + }, + { + "epoch": 0.05, + "learning_rate": 4.969908777843139e-05, + "loss": 0.8076, + "step": 49532 + }, + { + "epoch": 0.05, + "learning_rate": 4.969907562799758e-05, + "loss": 1.3709, + "step": 49533 + }, + { + "epoch": 0.05, + "learning_rate": 4.969906347731995e-05, + "loss": 0.9924, + "step": 49534 + }, + { + "epoch": 0.05, + "learning_rate": 4.96990513263985e-05, + "loss": 1.0036, + "step": 49535 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699039175233234e-05, + "loss": 0.9827, + "step": 49536 + }, + { + "epoch": 0.05, + "learning_rate": 4.969902702382415e-05, + "loss": 0.681, + "step": 49537 + }, + { + "epoch": 0.05, + "learning_rate": 4.969901487217124e-05, + "loss": 0.7802, + "step": 49538 + }, + { + "epoch": 0.05, + "learning_rate": 4.9699002720274526e-05, + "loss": 1.1471, + "step": 49539 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698990568133986e-05, + "loss": 1.2588, + "step": 49540 + }, + { + "epoch": 0.05, + "learning_rate": 4.969897841574963e-05, + "loss": 1.3532, + "step": 49541 + }, + { + "epoch": 0.05, + "learning_rate": 4.969896626312145e-05, + "loss": 1.1488, + "step": 49542 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698954110249457e-05, + "loss": 0.9893, + "step": 49543 + }, + { + "epoch": 0.05, + "learning_rate": 4.969894195713364e-05, + "loss": 0.8748, + "step": 49544 + }, + { + "epoch": 0.05, + "learning_rate": 4.969892980377402e-05, + "loss": 1.0352, + "step": 49545 + }, + { + "epoch": 0.05, + "learning_rate": 4.969891765017057e-05, + "loss": 1.1608, + "step": 49546 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698905496323306e-05, + "loss": 1.0458, + "step": 49547 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698893342232224e-05, + "loss": 1.0557, + "step": 49548 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698881187897326e-05, + "loss": 1.1132, + "step": 49549 + }, + { + "epoch": 0.05, + "learning_rate": 4.969886903331861e-05, + "loss": 1.5232, + "step": 49550 + }, + { + "epoch": 0.05, + "learning_rate": 4.969885687849608e-05, + "loss": 1.0261, + "step": 49551 + }, + { + "epoch": 0.05, + "learning_rate": 4.969884472342973e-05, + "loss": 0.9377, + "step": 49552 + }, + { + "epoch": 0.05, + "learning_rate": 4.969883256811957e-05, + "loss": 0.9147, + "step": 49553 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698820412565585e-05, + "loss": 1.0346, + "step": 49554 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698808256767793e-05, + "loss": 0.5517, + "step": 49555 + }, + { + "epoch": 0.05, + "learning_rate": 4.969879610072618e-05, + "loss": 0.425, + "step": 49556 + }, + { + "epoch": 0.05, + "learning_rate": 4.969878394444074e-05, + "loss": 0.5034, + "step": 49557 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698771787911504e-05, + "loss": 1.1721, + "step": 49558 + }, + { + "epoch": 0.05, + "learning_rate": 4.969875963113844e-05, + "loss": 1.1936, + "step": 49559 + }, + { + "epoch": 0.05, + "learning_rate": 4.969874747412156e-05, + "loss": 0.528, + "step": 49560 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698735316860865e-05, + "loss": 0.9329, + "step": 49561 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698723159356356e-05, + "loss": 1.1106, + "step": 49562 + }, + { + "epoch": 0.05, + "learning_rate": 4.969871100160804e-05, + "loss": 1.5094, + "step": 49563 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698698843615895e-05, + "loss": 1.0401, + "step": 49564 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698686685379936e-05, + "loss": 0.7505, + "step": 49565 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698674526900166e-05, + "loss": 1.1889, + "step": 49566 + }, + { + "epoch": 0.05, + "learning_rate": 4.969866236817659e-05, + "loss": 0.865, + "step": 49567 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698650209209185e-05, + "loss": 0.9643, + "step": 49568 + }, + { + "epoch": 0.05, + "learning_rate": 4.969863804999797e-05, + "loss": 1.1951, + "step": 49569 + }, + { + "epoch": 0.05, + "learning_rate": 4.969862589054294e-05, + "loss": 1.1032, + "step": 49570 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698613730844104e-05, + "loss": 1.1435, + "step": 49571 + }, + { + "epoch": 0.05, + "learning_rate": 4.969860157090144e-05, + "loss": 1.2125, + "step": 49572 + }, + { + "epoch": 0.05, + "learning_rate": 4.969858941071497e-05, + "loss": 1.2278, + "step": 49573 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698577250284686e-05, + "loss": 1.0812, + "step": 49574 + }, + { + "epoch": 0.05, + "learning_rate": 4.969856508961059e-05, + "loss": 0.7089, + "step": 49575 + }, + { + "epoch": 0.05, + "learning_rate": 4.969855292869268e-05, + "loss": 0.9869, + "step": 49576 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698540767530944e-05, + "loss": 1.1345, + "step": 49577 + }, + { + "epoch": 0.05, + "learning_rate": 4.969852860612541e-05, + "loss": 1.4024, + "step": 49578 + }, + { + "epoch": 0.05, + "learning_rate": 4.969851644447605e-05, + "loss": 1.3261, + "step": 49579 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698504282582886e-05, + "loss": 1.1558, + "step": 49580 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698492120445907e-05, + "loss": 1.1577, + "step": 49581 + }, + { + "epoch": 0.05, + "learning_rate": 4.969847995806512e-05, + "loss": 1.2197, + "step": 49582 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698467795440504e-05, + "loss": 0.961, + "step": 49583 + }, + { + "epoch": 0.05, + "learning_rate": 4.969845563257208e-05, + "loss": 0.543, + "step": 49584 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698443469459855e-05, + "loss": 1.2644, + "step": 49585 + }, + { + "epoch": 0.05, + "learning_rate": 4.969843130610381e-05, + "loss": 0.9875, + "step": 49586 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698419142503946e-05, + "loss": 1.0654, + "step": 49587 + }, + { + "epoch": 0.05, + "learning_rate": 4.969840697866028e-05, + "loss": 1.1196, + "step": 49588 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698394814572804e-05, + "loss": 1.2648, + "step": 49589 + }, + { + "epoch": 0.05, + "learning_rate": 4.969838265024151e-05, + "loss": 1.5457, + "step": 49590 + }, + { + "epoch": 0.05, + "learning_rate": 4.96983704856664e-05, + "loss": 1.0869, + "step": 49591 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698358320847485e-05, + "loss": 0.7752, + "step": 49592 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698346155784745e-05, + "loss": 1.1288, + "step": 49593 + }, + { + "epoch": 0.05, + "learning_rate": 4.969833399047821e-05, + "loss": 0.8041, + "step": 49594 + }, + { + "epoch": 0.05, + "learning_rate": 4.969832182492785e-05, + "loss": 1.443, + "step": 49595 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698309659133693e-05, + "loss": 1.3066, + "step": 49596 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698297493095714e-05, + "loss": 0.9796, + "step": 49597 + }, + { + "epoch": 0.05, + "learning_rate": 4.969828532681393e-05, + "loss": 0.4759, + "step": 49598 + }, + { + "epoch": 0.05, + "learning_rate": 4.969827316028833e-05, + "loss": 0.5464, + "step": 49599 + }, + { + "epoch": 0.05, + "learning_rate": 4.969826099351892e-05, + "loss": 0.86, + "step": 49600 + }, + { + "epoch": 0.05, + "learning_rate": 4.96982488265057e-05, + "loss": 1.992, + "step": 49601 + }, + { + "epoch": 0.05, + "learning_rate": 4.969823665924867e-05, + "loss": 0.6496, + "step": 49602 + }, + { + "epoch": 0.05, + "learning_rate": 4.969822449174783e-05, + "loss": 1.3965, + "step": 49603 + }, + { + "epoch": 0.05, + "learning_rate": 4.969821232400318e-05, + "loss": 0.9961, + "step": 49604 + }, + { + "epoch": 0.05, + "learning_rate": 4.969820015601472e-05, + "loss": 0.3599, + "step": 49605 + }, + { + "epoch": 0.05, + "learning_rate": 4.969818798778244e-05, + "loss": 0.2867, + "step": 49606 + }, + { + "epoch": 0.05, + "learning_rate": 4.969817581930636e-05, + "loss": 1.0636, + "step": 49607 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698163650586474e-05, + "loss": 0.9888, + "step": 49608 + }, + { + "epoch": 0.05, + "learning_rate": 4.969815148162277e-05, + "loss": 0.5891, + "step": 49609 + }, + { + "epoch": 0.05, + "learning_rate": 4.969813931241525e-05, + "loss": 0.3398, + "step": 49610 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698127142963933e-05, + "loss": 1.2536, + "step": 49611 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698114973268804e-05, + "loss": 1.3141, + "step": 49612 + }, + { + "epoch": 0.05, + "learning_rate": 4.969810280332986e-05, + "loss": 0.6533, + "step": 49613 + }, + { + "epoch": 0.05, + "learning_rate": 4.969809063314711e-05, + "loss": 0.412, + "step": 49614 + }, + { + "epoch": 0.05, + "learning_rate": 4.969807846272055e-05, + "loss": 0.4006, + "step": 49615 + }, + { + "epoch": 0.05, + "learning_rate": 4.969806629205018e-05, + "loss": 1.0651, + "step": 49616 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698054121136e-05, + "loss": 0.684, + "step": 49617 + }, + { + "epoch": 0.05, + "learning_rate": 4.969804194997803e-05, + "loss": 0.525, + "step": 49618 + }, + { + "epoch": 0.05, + "learning_rate": 4.969802977857623e-05, + "loss": 0.9319, + "step": 49619 + }, + { + "epoch": 0.05, + "learning_rate": 4.969801760693063e-05, + "loss": 0.7828, + "step": 49620 + }, + { + "epoch": 0.05, + "learning_rate": 4.9698005435041215e-05, + "loss": 1.0305, + "step": 49621 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697993262907993e-05, + "loss": 0.4524, + "step": 49622 + }, + { + "epoch": 0.05, + "learning_rate": 4.969798109053097e-05, + "loss": 0.882, + "step": 49623 + }, + { + "epoch": 0.05, + "learning_rate": 4.969796891791013e-05, + "loss": 1.0141, + "step": 49624 + }, + { + "epoch": 0.05, + "learning_rate": 4.969795674504549e-05, + "loss": 0.7707, + "step": 49625 + }, + { + "epoch": 0.05, + "learning_rate": 4.969794457193704e-05, + "loss": 1.0838, + "step": 49626 + }, + { + "epoch": 0.05, + "learning_rate": 4.969793239858478e-05, + "loss": 1.075, + "step": 49627 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697920224988714e-05, + "loss": 1.1766, + "step": 49628 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697908051148836e-05, + "loss": 0.8916, + "step": 49629 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697895877065155e-05, + "loss": 1.1048, + "step": 49630 + }, + { + "epoch": 0.05, + "learning_rate": 4.969788370273767e-05, + "loss": 1.28, + "step": 49631 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697871528166376e-05, + "loss": 0.9347, + "step": 49632 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697859353351265e-05, + "loss": 0.3774, + "step": 49633 + }, + { + "epoch": 0.05, + "learning_rate": 4.969784717829236e-05, + "loss": 0.4556, + "step": 49634 + }, + { + "epoch": 0.05, + "learning_rate": 4.969783500298964e-05, + "loss": 1.1809, + "step": 49635 + }, + { + "epoch": 0.05, + "learning_rate": 4.969782282744312e-05, + "loss": 0.9175, + "step": 49636 + }, + { + "epoch": 0.05, + "learning_rate": 4.969781065165279e-05, + "loss": 0.9305, + "step": 49637 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697798475618654e-05, + "loss": 1.0161, + "step": 49638 + }, + { + "epoch": 0.05, + "learning_rate": 4.969778629934071e-05, + "loss": 0.9165, + "step": 49639 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697774122818964e-05, + "loss": 1.0382, + "step": 49640 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697761946053407e-05, + "loss": 1.1913, + "step": 49641 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697749769044046e-05, + "loss": 1.1504, + "step": 49642 + }, + { + "epoch": 0.05, + "learning_rate": 4.969773759179088e-05, + "loss": 0.8729, + "step": 49643 + }, + { + "epoch": 0.05, + "learning_rate": 4.96977254142939e-05, + "loss": 0.1837, + "step": 49644 + }, + { + "epoch": 0.05, + "learning_rate": 4.969771323655313e-05, + "loss": 0.2127, + "step": 49645 + }, + { + "epoch": 0.05, + "learning_rate": 4.969770105856855e-05, + "loss": 0.9363, + "step": 49646 + }, + { + "epoch": 0.05, + "learning_rate": 4.969768888034016e-05, + "loss": 1.2882, + "step": 49647 + }, + { + "epoch": 0.05, + "learning_rate": 4.969767670186797e-05, + "loss": 1.1148, + "step": 49648 + }, + { + "epoch": 0.05, + "learning_rate": 4.969766452315197e-05, + "loss": 1.0263, + "step": 49649 + }, + { + "epoch": 0.05, + "learning_rate": 4.969765234419217e-05, + "loss": 1.034, + "step": 49650 + }, + { + "epoch": 0.05, + "learning_rate": 4.969764016498856e-05, + "loss": 0.8938, + "step": 49651 + }, + { + "epoch": 0.05, + "learning_rate": 4.969762798554115e-05, + "loss": 0.8941, + "step": 49652 + }, + { + "epoch": 0.05, + "learning_rate": 4.969761580584993e-05, + "loss": 0.7833, + "step": 49653 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697603625914906e-05, + "loss": 1.0886, + "step": 49654 + }, + { + "epoch": 0.05, + "learning_rate": 4.969759144573608e-05, + "loss": 0.9649, + "step": 49655 + }, + { + "epoch": 0.05, + "learning_rate": 4.969757926531345e-05, + "loss": 1.1394, + "step": 49656 + }, + { + "epoch": 0.05, + "learning_rate": 4.969756708464702e-05, + "loss": 0.7321, + "step": 49657 + }, + { + "epoch": 0.05, + "learning_rate": 4.969755490373678e-05, + "loss": 0.9447, + "step": 49658 + }, + { + "epoch": 0.05, + "learning_rate": 4.969754272258274e-05, + "loss": 1.8479, + "step": 49659 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697530541184886e-05, + "loss": 1.1168, + "step": 49660 + }, + { + "epoch": 0.05, + "learning_rate": 4.969751835954324e-05, + "loss": 0.9562, + "step": 49661 + }, + { + "epoch": 0.05, + "learning_rate": 4.969750617765779e-05, + "loss": 1.235, + "step": 49662 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697493995528533e-05, + "loss": 1.2884, + "step": 49663 + }, + { + "epoch": 0.05, + "learning_rate": 4.969748181315547e-05, + "loss": 1.054, + "step": 49664 + }, + { + "epoch": 0.05, + "learning_rate": 4.969746963053861e-05, + "loss": 1.2897, + "step": 49665 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697457447677945e-05, + "loss": 1.0387, + "step": 49666 + }, + { + "epoch": 0.05, + "learning_rate": 4.969744526457347e-05, + "loss": 1.0432, + "step": 49667 + }, + { + "epoch": 0.05, + "learning_rate": 4.96974330812252e-05, + "loss": 1.0432, + "step": 49668 + }, + { + "epoch": 0.05, + "learning_rate": 4.969742089763313e-05, + "loss": 1.007, + "step": 49669 + }, + { + "epoch": 0.05, + "learning_rate": 4.969740871379726e-05, + "loss": 1.0385, + "step": 49670 + }, + { + "epoch": 0.05, + "learning_rate": 4.969739652971758e-05, + "loss": 1.0894, + "step": 49671 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697384345394095e-05, + "loss": 0.8759, + "step": 49672 + }, + { + "epoch": 0.05, + "learning_rate": 4.96973721608268e-05, + "loss": 0.8847, + "step": 49673 + }, + { + "epoch": 0.05, + "learning_rate": 4.969735997601572e-05, + "loss": 1.0698, + "step": 49674 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697347790960834e-05, + "loss": 1.1048, + "step": 49675 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697335605662146e-05, + "loss": 0.9947, + "step": 49676 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697323420119654e-05, + "loss": 1.0487, + "step": 49677 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697311234333365e-05, + "loss": 0.3078, + "step": 49678 + }, + { + "epoch": 0.05, + "learning_rate": 4.969729904830327e-05, + "loss": 0.1891, + "step": 49679 + }, + { + "epoch": 0.05, + "learning_rate": 4.969728686202937e-05, + "loss": 0.1652, + "step": 49680 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697274675511674e-05, + "loss": 0.14, + "step": 49681 + }, + { + "epoch": 0.05, + "learning_rate": 4.969726248875018e-05, + "loss": 0.1433, + "step": 49682 + }, + { + "epoch": 0.05, + "learning_rate": 4.969725030174488e-05, + "loss": 0.1577, + "step": 49683 + }, + { + "epoch": 0.05, + "learning_rate": 4.969723811449578e-05, + "loss": 0.1872, + "step": 49684 + }, + { + "epoch": 0.05, + "learning_rate": 4.969722592700288e-05, + "loss": 0.2934, + "step": 49685 + }, + { + "epoch": 0.05, + "learning_rate": 4.969721373926618e-05, + "loss": 0.2357, + "step": 49686 + }, + { + "epoch": 0.05, + "learning_rate": 4.969720155128568e-05, + "loss": 0.4073, + "step": 49687 + }, + { + "epoch": 0.05, + "learning_rate": 4.969718936306138e-05, + "loss": 1.0075, + "step": 49688 + }, + { + "epoch": 0.05, + "learning_rate": 4.969717717459328e-05, + "loss": 1.1764, + "step": 49689 + }, + { + "epoch": 0.05, + "learning_rate": 4.969716498588137e-05, + "loss": 0.9801, + "step": 49690 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697152796925674e-05, + "loss": 1.0481, + "step": 49691 + }, + { + "epoch": 0.05, + "learning_rate": 4.969714060772617e-05, + "loss": 0.7534, + "step": 49692 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697128418282866e-05, + "loss": 0.7131, + "step": 49693 + }, + { + "epoch": 0.05, + "learning_rate": 4.969711622859576e-05, + "loss": 0.9923, + "step": 49694 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697104038664865e-05, + "loss": 0.8353, + "step": 49695 + }, + { + "epoch": 0.05, + "learning_rate": 4.969709184849016e-05, + "loss": 0.942, + "step": 49696 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697079658071665e-05, + "loss": 0.8323, + "step": 49697 + }, + { + "epoch": 0.05, + "learning_rate": 4.969706746740937e-05, + "loss": 0.8779, + "step": 49698 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697055276503266e-05, + "loss": 1.4664, + "step": 49699 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697043085353375e-05, + "loss": 0.6431, + "step": 49700 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697030893959674e-05, + "loss": 0.9939, + "step": 49701 + }, + { + "epoch": 0.05, + "learning_rate": 4.9697018702322184e-05, + "loss": 1.1597, + "step": 49702 + }, + { + "epoch": 0.05, + "learning_rate": 4.969700651044089e-05, + "loss": 0.8568, + "step": 49703 + }, + { + "epoch": 0.05, + "learning_rate": 4.96969943183158e-05, + "loss": 0.9661, + "step": 49704 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696982125946914e-05, + "loss": 1.0379, + "step": 49705 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696969933334224e-05, + "loss": 0.8297, + "step": 49706 + }, + { + "epoch": 0.05, + "learning_rate": 4.969695774047773e-05, + "loss": 0.6301, + "step": 49707 + }, + { + "epoch": 0.05, + "learning_rate": 4.969694554737745e-05, + "loss": 1.1226, + "step": 49708 + }, + { + "epoch": 0.05, + "learning_rate": 4.969693335403337e-05, + "loss": 0.936, + "step": 49709 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696921160445495e-05, + "loss": 1.0519, + "step": 49710 + }, + { + "epoch": 0.05, + "learning_rate": 4.969690896661382e-05, + "loss": 1.3315, + "step": 49711 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696896772538335e-05, + "loss": 1.2498, + "step": 49712 + }, + { + "epoch": 0.05, + "learning_rate": 4.969688457821907e-05, + "loss": 0.83, + "step": 49713 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696872383656e-05, + "loss": 0.7352, + "step": 49714 + }, + { + "epoch": 0.05, + "learning_rate": 4.969686018884914e-05, + "loss": 0.9151, + "step": 49715 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696847993798465e-05, + "loss": 0.8976, + "step": 49716 + }, + { + "epoch": 0.05, + "learning_rate": 4.969683579850401e-05, + "loss": 1.1012, + "step": 49717 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696823602965755e-05, + "loss": 1.2216, + "step": 49718 + }, + { + "epoch": 0.05, + "learning_rate": 4.96968114071837e-05, + "loss": 1.094, + "step": 49719 + }, + { + "epoch": 0.05, + "learning_rate": 4.969679921115785e-05, + "loss": 0.639, + "step": 49720 + }, + { + "epoch": 0.05, + "learning_rate": 4.96967870148882e-05, + "loss": 0.5589, + "step": 49721 + }, + { + "epoch": 0.05, + "learning_rate": 4.969677481837476e-05, + "loss": 1.0153, + "step": 49722 + }, + { + "epoch": 0.05, + "learning_rate": 4.969676262161752e-05, + "loss": 1.3416, + "step": 49723 + }, + { + "epoch": 0.05, + "learning_rate": 4.969675042461649e-05, + "loss": 0.9628, + "step": 49724 + }, + { + "epoch": 0.05, + "learning_rate": 4.969673822737166e-05, + "loss": 0.9555, + "step": 49725 + }, + { + "epoch": 0.05, + "learning_rate": 4.969672602988304e-05, + "loss": 1.11, + "step": 49726 + }, + { + "epoch": 0.05, + "learning_rate": 4.969671383215061e-05, + "loss": 0.9237, + "step": 49727 + }, + { + "epoch": 0.05, + "learning_rate": 4.969670163417439e-05, + "loss": 1.0586, + "step": 49728 + }, + { + "epoch": 0.05, + "learning_rate": 4.969668943595438e-05, + "loss": 1.1072, + "step": 49729 + }, + { + "epoch": 0.05, + "learning_rate": 4.969667723749057e-05, + "loss": 0.9803, + "step": 49730 + }, + { + "epoch": 0.05, + "learning_rate": 4.969666503878296e-05, + "loss": 0.8866, + "step": 49731 + }, + { + "epoch": 0.05, + "learning_rate": 4.969665283983157e-05, + "loss": 0.9125, + "step": 49732 + }, + { + "epoch": 0.05, + "learning_rate": 4.969664064063637e-05, + "loss": 0.8739, + "step": 49733 + }, + { + "epoch": 0.05, + "learning_rate": 4.969662844119738e-05, + "loss": 1.2365, + "step": 49734 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696616241514604e-05, + "loss": 1.0378, + "step": 49735 + }, + { + "epoch": 0.05, + "learning_rate": 4.969660404158802e-05, + "loss": 0.7777, + "step": 49736 + }, + { + "epoch": 0.05, + "learning_rate": 4.969659184141765e-05, + "loss": 0.9673, + "step": 49737 + }, + { + "epoch": 0.05, + "learning_rate": 4.969657964100348e-05, + "loss": 1.1977, + "step": 49738 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696567440345525e-05, + "loss": 1.1973, + "step": 49739 + }, + { + "epoch": 0.05, + "learning_rate": 4.969655523944377e-05, + "loss": 0.7573, + "step": 49740 + }, + { + "epoch": 0.05, + "learning_rate": 4.969654303829822e-05, + "loss": 1.9593, + "step": 49741 + }, + { + "epoch": 0.05, + "learning_rate": 4.969653083690888e-05, + "loss": 1.21, + "step": 49742 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696518635275744e-05, + "loss": 1.1018, + "step": 49743 + }, + { + "epoch": 0.05, + "learning_rate": 4.969650643339882e-05, + "loss": 1.1102, + "step": 49744 + }, + { + "epoch": 0.05, + "learning_rate": 4.969649423127809e-05, + "loss": 1.3353, + "step": 49745 + }, + { + "epoch": 0.05, + "learning_rate": 4.969648202891357e-05, + "loss": 1.1271, + "step": 49746 + }, + { + "epoch": 0.05, + "learning_rate": 4.969646982630526e-05, + "loss": 1.359, + "step": 49747 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696457623453156e-05, + "loss": 1.3025, + "step": 49748 + }, + { + "epoch": 0.05, + "learning_rate": 4.969644542035726e-05, + "loss": 1.4284, + "step": 49749 + }, + { + "epoch": 0.05, + "learning_rate": 4.969643321701757e-05, + "loss": 0.6043, + "step": 49750 + }, + { + "epoch": 0.05, + "learning_rate": 4.969642101343409e-05, + "loss": 0.2466, + "step": 49751 + }, + { + "epoch": 0.05, + "learning_rate": 4.969640880960681e-05, + "loss": 0.3081, + "step": 49752 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696396605535746e-05, + "loss": 0.2278, + "step": 49753 + }, + { + "epoch": 0.05, + "learning_rate": 4.969638440122088e-05, + "loss": 0.0813, + "step": 49754 + }, + { + "epoch": 0.05, + "learning_rate": 4.969637219666223e-05, + "loss": 0.1634, + "step": 49755 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696359991859795e-05, + "loss": 1.022, + "step": 49756 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696347786813556e-05, + "loss": 0.9686, + "step": 49757 + }, + { + "epoch": 0.05, + "learning_rate": 4.969633558152353e-05, + "loss": 1.0521, + "step": 49758 + }, + { + "epoch": 0.05, + "learning_rate": 4.969632337598971e-05, + "loss": 1.3703, + "step": 49759 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696311170212095e-05, + "loss": 0.8926, + "step": 49760 + }, + { + "epoch": 0.05, + "learning_rate": 4.969629896419069e-05, + "loss": 0.7261, + "step": 49761 + }, + { + "epoch": 0.05, + "learning_rate": 4.96962867579255e-05, + "loss": 0.5952, + "step": 49762 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696274551416513e-05, + "loss": 0.5858, + "step": 49763 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696262344663734e-05, + "loss": 0.4573, + "step": 49764 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696250137667164e-05, + "loss": 0.6311, + "step": 49765 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696237930426805e-05, + "loss": 0.8575, + "step": 49766 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696225722942656e-05, + "loss": 0.7521, + "step": 49767 + }, + { + "epoch": 0.05, + "learning_rate": 4.969621351521472e-05, + "loss": 1.2539, + "step": 49768 + }, + { + "epoch": 0.05, + "learning_rate": 4.969620130724298e-05, + "loss": 1.1484, + "step": 49769 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696189099027466e-05, + "loss": 1.0434, + "step": 49770 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696176890568145e-05, + "loss": 0.9848, + "step": 49771 + }, + { + "epoch": 0.05, + "learning_rate": 4.969616468186504e-05, + "loss": 1.086, + "step": 49772 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696152472918155e-05, + "loss": 0.9542, + "step": 49773 + }, + { + "epoch": 0.05, + "learning_rate": 4.969614026372748e-05, + "loss": 0.7507, + "step": 49774 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696128054293e-05, + "loss": 0.9552, + "step": 49775 + }, + { + "epoch": 0.05, + "learning_rate": 4.969611584461473e-05, + "loss": 1.1592, + "step": 49776 + }, + { + "epoch": 0.05, + "learning_rate": 4.969610363469268e-05, + "loss": 1.1277, + "step": 49777 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696091424526844e-05, + "loss": 0.7748, + "step": 49778 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696079214117206e-05, + "loss": 1.0627, + "step": 49779 + }, + { + "epoch": 0.05, + "learning_rate": 4.969606700346379e-05, + "loss": 0.9632, + "step": 49780 + }, + { + "epoch": 0.05, + "learning_rate": 4.969605479256658e-05, + "loss": 1.0756, + "step": 49781 + }, + { + "epoch": 0.05, + "learning_rate": 4.969604258142558e-05, + "loss": 1.0718, + "step": 49782 + }, + { + "epoch": 0.05, + "learning_rate": 4.969603037004079e-05, + "loss": 1.2339, + "step": 49783 + }, + { + "epoch": 0.05, + "learning_rate": 4.969601815841222e-05, + "loss": 2.156, + "step": 49784 + }, + { + "epoch": 0.05, + "learning_rate": 4.969600594653985e-05, + "loss": 1.696, + "step": 49785 + }, + { + "epoch": 0.05, + "learning_rate": 4.96959937344237e-05, + "loss": 1.4737, + "step": 49786 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695981522063764e-05, + "loss": 1.8363, + "step": 49787 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695969309460026e-05, + "loss": 1.5532, + "step": 49788 + }, + { + "epoch": 0.05, + "learning_rate": 4.969595709661251e-05, + "loss": 0.8044, + "step": 49789 + }, + { + "epoch": 0.05, + "learning_rate": 4.969594488352121e-05, + "loss": 1.0397, + "step": 49790 + }, + { + "epoch": 0.05, + "learning_rate": 4.969593267018611e-05, + "loss": 1.0725, + "step": 49791 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695920456607226e-05, + "loss": 1.0724, + "step": 49792 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695908242784554e-05, + "loss": 1.117, + "step": 49793 + }, + { + "epoch": 0.05, + "learning_rate": 4.96958960287181e-05, + "loss": 0.9318, + "step": 49794 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695883814407854e-05, + "loss": 1.3754, + "step": 49795 + }, + { + "epoch": 0.05, + "learning_rate": 4.969587159985382e-05, + "loss": 1.1107, + "step": 49796 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695859385056e-05, + "loss": 1.9901, + "step": 49797 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695847170014395e-05, + "loss": 1.0514, + "step": 49798 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695834954729005e-05, + "loss": 0.8561, + "step": 49799 + }, + { + "epoch": 0.05, + "learning_rate": 4.969582273919982e-05, + "loss": 2.4102, + "step": 49800 + }, + { + "epoch": 0.05, + "learning_rate": 4.969581052342685e-05, + "loss": 0.7532, + "step": 49801 + }, + { + "epoch": 0.05, + "learning_rate": 4.96957983074101e-05, + "loss": 1.2143, + "step": 49802 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695786091149555e-05, + "loss": 0.8929, + "step": 49803 + }, + { + "epoch": 0.05, + "learning_rate": 4.969577387464523e-05, + "loss": 0.8831, + "step": 49804 + }, + { + "epoch": 0.05, + "learning_rate": 4.969576165789712e-05, + "loss": 1.2123, + "step": 49805 + }, + { + "epoch": 0.05, + "learning_rate": 4.969574944090521e-05, + "loss": 1.3749, + "step": 49806 + }, + { + "epoch": 0.05, + "learning_rate": 4.969573722366953e-05, + "loss": 0.9374, + "step": 49807 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695725006190064e-05, + "loss": 0.8615, + "step": 49808 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695712788466804e-05, + "loss": 0.9641, + "step": 49809 + }, + { + "epoch": 0.05, + "learning_rate": 4.969570057049976e-05, + "loss": 0.9, + "step": 49810 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695688352288924e-05, + "loss": 0.5106, + "step": 49811 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695676133834316e-05, + "loss": 0.6764, + "step": 49812 + }, + { + "epoch": 0.05, + "learning_rate": 4.969566391513591e-05, + "loss": 0.9302, + "step": 49813 + }, + { + "epoch": 0.05, + "learning_rate": 4.969565169619373e-05, + "loss": 0.9687, + "step": 49814 + }, + { + "epoch": 0.05, + "learning_rate": 4.969563947700776e-05, + "loss": 1.0153, + "step": 49815 + }, + { + "epoch": 0.05, + "learning_rate": 4.969562725757801e-05, + "loss": 1.0214, + "step": 49816 + }, + { + "epoch": 0.05, + "learning_rate": 4.969561503790447e-05, + "loss": 1.0197, + "step": 49817 + }, + { + "epoch": 0.05, + "learning_rate": 4.969560281798715e-05, + "loss": 1.0999, + "step": 49818 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695590597826034e-05, + "loss": 1.0691, + "step": 49819 + }, + { + "epoch": 0.05, + "learning_rate": 4.969557837742115e-05, + "loss": 0.8658, + "step": 49820 + }, + { + "epoch": 0.05, + "learning_rate": 4.969556615677247e-05, + "loss": 1.2161, + "step": 49821 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695553935880006e-05, + "loss": 0.9085, + "step": 49822 + }, + { + "epoch": 0.05, + "learning_rate": 4.969554171474376e-05, + "loss": 1.0172, + "step": 49823 + }, + { + "epoch": 0.05, + "learning_rate": 4.969552949336373e-05, + "loss": 0.9024, + "step": 49824 + }, + { + "epoch": 0.05, + "learning_rate": 4.969551727173992e-05, + "loss": 1.0045, + "step": 49825 + }, + { + "epoch": 0.05, + "learning_rate": 4.969550504987232e-05, + "loss": 0.9776, + "step": 49826 + }, + { + "epoch": 0.05, + "learning_rate": 4.969549282776094e-05, + "loss": 1.1668, + "step": 49827 + }, + { + "epoch": 0.05, + "learning_rate": 4.969548060540578e-05, + "loss": 1.0248, + "step": 49828 + }, + { + "epoch": 0.05, + "learning_rate": 4.969546838280683e-05, + "loss": 0.8696, + "step": 49829 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695456159964095e-05, + "loss": 0.9332, + "step": 49830 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695443936877586e-05, + "loss": 1.1245, + "step": 49831 + }, + { + "epoch": 0.05, + "learning_rate": 4.969543171354729e-05, + "loss": 1.1271, + "step": 49832 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695419489973205e-05, + "loss": 0.9743, + "step": 49833 + }, + { + "epoch": 0.05, + "learning_rate": 4.969540726615534e-05, + "loss": 0.9047, + "step": 49834 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695395042093694e-05, + "loss": 1.1996, + "step": 49835 + }, + { + "epoch": 0.05, + "learning_rate": 4.969538281778827e-05, + "loss": 1.2299, + "step": 49836 + }, + { + "epoch": 0.05, + "learning_rate": 4.969537059323906e-05, + "loss": 1.096, + "step": 49837 + }, + { + "epoch": 0.05, + "learning_rate": 4.969535836844607e-05, + "loss": 1.0143, + "step": 49838 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695346143409296e-05, + "loss": 0.848, + "step": 49839 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695333918128734e-05, + "loss": 0.9036, + "step": 49840 + }, + { + "epoch": 0.05, + "learning_rate": 4.969532169260439e-05, + "loss": 1.1232, + "step": 49841 + }, + { + "epoch": 0.05, + "learning_rate": 4.969530946683627e-05, + "loss": 1.0361, + "step": 49842 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695297240824366e-05, + "loss": 1.008, + "step": 49843 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695285014568686e-05, + "loss": 1.0287, + "step": 49844 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695272788069224e-05, + "loss": 0.9913, + "step": 49845 + }, + { + "epoch": 0.05, + "learning_rate": 4.969526056132597e-05, + "loss": 1.0735, + "step": 49846 + }, + { + "epoch": 0.05, + "learning_rate": 4.969524833433895e-05, + "loss": 0.9346, + "step": 49847 + }, + { + "epoch": 0.05, + "learning_rate": 4.969523610710814e-05, + "loss": 0.8704, + "step": 49848 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695223879633546e-05, + "loss": 0.936, + "step": 49849 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695211651915176e-05, + "loss": 0.9723, + "step": 49850 + }, + { + "epoch": 0.05, + "learning_rate": 4.969519942395302e-05, + "loss": 0.909, + "step": 49851 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695187195747094e-05, + "loss": 1.122, + "step": 49852 + }, + { + "epoch": 0.05, + "learning_rate": 4.969517496729738e-05, + "loss": 0.9545, + "step": 49853 + }, + { + "epoch": 0.05, + "learning_rate": 4.969516273860388e-05, + "loss": 0.8646, + "step": 49854 + }, + { + "epoch": 0.05, + "learning_rate": 4.969515050966662e-05, + "loss": 1.073, + "step": 49855 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695138280485564e-05, + "loss": 1.1728, + "step": 49856 + }, + { + "epoch": 0.05, + "learning_rate": 4.969512605106073e-05, + "loss": 0.7787, + "step": 49857 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695113821392115e-05, + "loss": 1.2445, + "step": 49858 + }, + { + "epoch": 0.05, + "learning_rate": 4.969510159147972e-05, + "loss": 1.0143, + "step": 49859 + }, + { + "epoch": 0.05, + "learning_rate": 4.969508936132355e-05, + "loss": 1.0043, + "step": 49860 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695077130923594e-05, + "loss": 0.8929, + "step": 49861 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695064900279864e-05, + "loss": 0.9368, + "step": 49862 + }, + { + "epoch": 0.05, + "learning_rate": 4.969505266939236e-05, + "loss": 0.6908, + "step": 49863 + }, + { + "epoch": 0.05, + "learning_rate": 4.969504043826106e-05, + "loss": 0.9316, + "step": 49864 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695028206885996e-05, + "loss": 0.9747, + "step": 49865 + }, + { + "epoch": 0.05, + "learning_rate": 4.969501597526715e-05, + "loss": 1.0985, + "step": 49866 + }, + { + "epoch": 0.05, + "learning_rate": 4.9695003743404523e-05, + "loss": 1.1519, + "step": 49867 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694991511298116e-05, + "loss": 0.9294, + "step": 49868 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694979278947926e-05, + "loss": 1.4085, + "step": 49869 + }, + { + "epoch": 0.05, + "learning_rate": 4.969496704635397e-05, + "loss": 1.0229, + "step": 49870 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694954813516225e-05, + "loss": 0.6884, + "step": 49871 + }, + { + "epoch": 0.05, + "learning_rate": 4.969494258043471e-05, + "loss": 0.4123, + "step": 49872 + }, + { + "epoch": 0.05, + "learning_rate": 4.969493034710941e-05, + "loss": 0.3302, + "step": 49873 + }, + { + "epoch": 0.05, + "learning_rate": 4.969491811354034e-05, + "loss": 0.3047, + "step": 49874 + }, + { + "epoch": 0.05, + "learning_rate": 4.969490587972748e-05, + "loss": 0.3815, + "step": 49875 + }, + { + "epoch": 0.05, + "learning_rate": 4.969489364567086e-05, + "loss": 0.4077, + "step": 49876 + }, + { + "epoch": 0.05, + "learning_rate": 4.969488141137045e-05, + "loss": 0.3996, + "step": 49877 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694869176826265e-05, + "loss": 0.3707, + "step": 49878 + }, + { + "epoch": 0.05, + "learning_rate": 4.96948569420383e-05, + "loss": 0.2931, + "step": 49879 + }, + { + "epoch": 0.05, + "learning_rate": 4.969484470700656e-05, + "loss": 0.3841, + "step": 49880 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694832471731044e-05, + "loss": 0.3155, + "step": 49881 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694820236211745e-05, + "loss": 0.263, + "step": 49882 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694808000448676e-05, + "loss": 0.2912, + "step": 49883 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694795764441824e-05, + "loss": 0.2952, + "step": 49884 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694783528191203e-05, + "loss": 0.2818, + "step": 49885 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694771291696807e-05, + "loss": 0.3286, + "step": 49886 + }, + { + "epoch": 0.05, + "learning_rate": 4.969475905495863e-05, + "loss": 0.522, + "step": 49887 + }, + { + "epoch": 0.05, + "learning_rate": 4.969474681797667e-05, + "loss": 0.4233, + "step": 49888 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694734580750946e-05, + "loss": 0.3557, + "step": 49889 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694722343281444e-05, + "loss": 0.2749, + "step": 49890 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694710105568153e-05, + "loss": 0.1949, + "step": 49891 + }, + { + "epoch": 0.05, + "learning_rate": 4.96946978676111e-05, + "loss": 0.2091, + "step": 49892 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694685629410264e-05, + "loss": 0.2097, + "step": 49893 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694673390965665e-05, + "loss": 0.2099, + "step": 49894 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694661152277276e-05, + "loss": 0.2058, + "step": 49895 + }, + { + "epoch": 0.05, + "learning_rate": 4.969464891334512e-05, + "loss": 0.2694, + "step": 49896 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694636674169185e-05, + "loss": 0.2724, + "step": 49897 + }, + { + "epoch": 0.05, + "learning_rate": 4.969462443474947e-05, + "loss": 0.1675, + "step": 49898 + }, + { + "epoch": 0.05, + "learning_rate": 4.969461219508599e-05, + "loss": 0.2191, + "step": 49899 + }, + { + "epoch": 0.05, + "learning_rate": 4.969459995517873e-05, + "loss": 0.2088, + "step": 49900 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694587715027695e-05, + "loss": 0.1629, + "step": 49901 + }, + { + "epoch": 0.05, + "learning_rate": 4.969457547463289e-05, + "loss": 0.1767, + "step": 49902 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694563233994304e-05, + "loss": 0.2686, + "step": 49903 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694550993111945e-05, + "loss": 0.1919, + "step": 49904 + }, + { + "epoch": 0.05, + "learning_rate": 4.969453875198582e-05, + "loss": 0.1692, + "step": 49905 + }, + { + "epoch": 0.05, + "learning_rate": 4.969452651061591e-05, + "loss": 0.1675, + "step": 49906 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694514269002236e-05, + "loss": 0.2759, + "step": 49907 + }, + { + "epoch": 0.05, + "learning_rate": 4.969450202714478e-05, + "loss": 0.214, + "step": 49908 + }, + { + "epoch": 0.05, + "learning_rate": 4.969448978504355e-05, + "loss": 0.203, + "step": 49909 + }, + { + "epoch": 0.05, + "learning_rate": 4.969447754269855e-05, + "loss": 0.2164, + "step": 49910 + }, + { + "epoch": 0.05, + "learning_rate": 4.969446530010978e-05, + "loss": 0.2406, + "step": 49911 + }, + { + "epoch": 0.05, + "learning_rate": 4.969445305727723e-05, + "loss": 0.2969, + "step": 49912 + }, + { + "epoch": 0.05, + "learning_rate": 4.969444081420091e-05, + "loss": 0.6572, + "step": 49913 + }, + { + "epoch": 0.05, + "learning_rate": 4.969442857088081e-05, + "loss": 0.8024, + "step": 49914 + }, + { + "epoch": 0.05, + "learning_rate": 4.969441632731695e-05, + "loss": 1.1187, + "step": 49915 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694404083509304e-05, + "loss": 0.8753, + "step": 49916 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694391839457896e-05, + "loss": 0.6985, + "step": 49917 + }, + { + "epoch": 0.05, + "learning_rate": 4.969437959516271e-05, + "loss": 1.137, + "step": 49918 + }, + { + "epoch": 0.05, + "learning_rate": 4.969436735062375e-05, + "loss": 0.6411, + "step": 49919 + }, + { + "epoch": 0.05, + "learning_rate": 4.969435510584103e-05, + "loss": 0.9207, + "step": 49920 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694342860814523e-05, + "loss": 1.131, + "step": 49921 + }, + { + "epoch": 0.05, + "learning_rate": 4.969433061554425e-05, + "loss": 1.0892, + "step": 49922 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694318370030204e-05, + "loss": 1.3272, + "step": 49923 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694306124272384e-05, + "loss": 1.2183, + "step": 49924 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694293878270794e-05, + "loss": 0.9243, + "step": 49925 + }, + { + "epoch": 0.05, + "learning_rate": 4.969428163202543e-05, + "loss": 0.9693, + "step": 49926 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694269385536294e-05, + "loss": 0.7393, + "step": 49927 + }, + { + "epoch": 0.05, + "learning_rate": 4.969425713880339e-05, + "loss": 0.8623, + "step": 49928 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694244891826716e-05, + "loss": 1.4185, + "step": 49929 + }, + { + "epoch": 0.05, + "learning_rate": 4.969423264460627e-05, + "loss": 1.057, + "step": 49930 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694220397142054e-05, + "loss": 1.2412, + "step": 49931 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694208149434065e-05, + "loss": 1.0736, + "step": 49932 + }, + { + "epoch": 0.05, + "learning_rate": 4.969419590148231e-05, + "loss": 1.1882, + "step": 49933 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694183653286774e-05, + "loss": 0.7187, + "step": 49934 + }, + { + "epoch": 0.05, + "learning_rate": 4.969417140484747e-05, + "loss": 1.3146, + "step": 49935 + }, + { + "epoch": 0.05, + "learning_rate": 4.96941591561644e-05, + "loss": 0.8151, + "step": 49936 + }, + { + "epoch": 0.05, + "learning_rate": 4.969414690723756e-05, + "loss": 1.126, + "step": 49937 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694134658066946e-05, + "loss": 1.2011, + "step": 49938 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694122408652566e-05, + "loss": 1.0565, + "step": 49939 + }, + { + "epoch": 0.05, + "learning_rate": 4.969411015899441e-05, + "loss": 1.0275, + "step": 49940 + }, + { + "epoch": 0.05, + "learning_rate": 4.969409790909249e-05, + "loss": 0.7647, + "step": 49941 + }, + { + "epoch": 0.05, + "learning_rate": 4.9694085658946796e-05, + "loss": 0.9902, + "step": 49942 + }, + { + "epoch": 0.05, + "learning_rate": 4.969407340855734e-05, + "loss": 0.8775, + "step": 49943 + }, + { + "epoch": 0.05, + "learning_rate": 4.969406115792411e-05, + "loss": 0.8389, + "step": 49944 + }, + { + "epoch": 0.05, + "learning_rate": 4.969404890704711e-05, + "loss": 0.7035, + "step": 49945 + }, + { + "epoch": 0.05, + "learning_rate": 4.969403665592634e-05, + "loss": 1.0765, + "step": 49946 + }, + { + "epoch": 0.05, + "learning_rate": 4.96940244045618e-05, + "loss": 0.9198, + "step": 49947 + }, + { + "epoch": 0.05, + "learning_rate": 4.96940121529535e-05, + "loss": 0.9035, + "step": 49948 + }, + { + "epoch": 0.05, + "learning_rate": 4.969399990110142e-05, + "loss": 0.9188, + "step": 49949 + }, + { + "epoch": 0.05, + "learning_rate": 4.969398764900557e-05, + "loss": 0.9807, + "step": 49950 + }, + { + "epoch": 0.05, + "learning_rate": 4.969397539666596e-05, + "loss": 1.088, + "step": 49951 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693963144082575e-05, + "loss": 1.1111, + "step": 49952 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693950891255425e-05, + "loss": 1.0279, + "step": 49953 + }, + { + "epoch": 0.05, + "learning_rate": 4.96939386381845e-05, + "loss": 0.7702, + "step": 49954 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693926384869816e-05, + "loss": 1.1718, + "step": 49955 + }, + { + "epoch": 0.05, + "learning_rate": 4.969391413131136e-05, + "loss": 1.1134, + "step": 49956 + }, + { + "epoch": 0.05, + "learning_rate": 4.969390187750914e-05, + "loss": 1.3949, + "step": 49957 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693889623463146e-05, + "loss": 1.2134, + "step": 49958 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693877369173393e-05, + "loss": 0.9508, + "step": 49959 + }, + { + "epoch": 0.05, + "learning_rate": 4.969386511463986e-05, + "loss": 1.1351, + "step": 49960 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693852859862566e-05, + "loss": 1.1318, + "step": 49961 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693840604841506e-05, + "loss": 1.1111, + "step": 49962 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693828349576676e-05, + "loss": 1.0734, + "step": 49963 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693816094068076e-05, + "loss": 1.1371, + "step": 49964 + }, + { + "epoch": 0.05, + "learning_rate": 4.969380383831572e-05, + "loss": 1.2483, + "step": 49965 + }, + { + "epoch": 0.05, + "learning_rate": 4.969379158231959e-05, + "loss": 1.2332, + "step": 49966 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693779326079696e-05, + "loss": 0.9402, + "step": 49967 + }, + { + "epoch": 0.05, + "learning_rate": 4.969376706959603e-05, + "loss": 0.5064, + "step": 49968 + }, + { + "epoch": 0.05, + "learning_rate": 4.969375481286861e-05, + "loss": 0.4163, + "step": 49969 + }, + { + "epoch": 0.05, + "learning_rate": 4.969374255589741e-05, + "loss": 0.6387, + "step": 49970 + }, + { + "epoch": 0.05, + "learning_rate": 4.969373029868245e-05, + "loss": 1.0652, + "step": 49971 + }, + { + "epoch": 0.05, + "learning_rate": 4.969371804122372e-05, + "loss": 1.1418, + "step": 49972 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693705783521226e-05, + "loss": 0.8738, + "step": 49973 + }, + { + "epoch": 0.05, + "learning_rate": 4.969369352557497e-05, + "loss": 1.1051, + "step": 49974 + }, + { + "epoch": 0.05, + "learning_rate": 4.969368126738495e-05, + "loss": 1.2677, + "step": 49975 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693669008951156e-05, + "loss": 1.1446, + "step": 49976 + }, + { + "epoch": 0.05, + "learning_rate": 4.96936567502736e-05, + "loss": 1.0077, + "step": 49977 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693644491352274e-05, + "loss": 0.8826, + "step": 49978 + }, + { + "epoch": 0.05, + "learning_rate": 4.969363223218719e-05, + "loss": 0.6697, + "step": 49979 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693619972778336e-05, + "loss": 0.9065, + "step": 49980 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693607713125726e-05, + "loss": 0.9905, + "step": 49981 + }, + { + "epoch": 0.05, + "learning_rate": 4.969359545322933e-05, + "loss": 1.0534, + "step": 49982 + }, + { + "epoch": 0.05, + "learning_rate": 4.969358319308919e-05, + "loss": 0.9926, + "step": 49983 + }, + { + "epoch": 0.05, + "learning_rate": 4.969357093270528e-05, + "loss": 1.5424, + "step": 49984 + }, + { + "epoch": 0.05, + "learning_rate": 4.969355867207761e-05, + "loss": 0.9209, + "step": 49985 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693546411206165e-05, + "loss": 1.2825, + "step": 49986 + }, + { + "epoch": 0.05, + "learning_rate": 4.969353415009096e-05, + "loss": 0.9465, + "step": 49987 + }, + { + "epoch": 0.05, + "learning_rate": 4.969352188873199e-05, + "loss": 0.7661, + "step": 49988 + }, + { + "epoch": 0.05, + "learning_rate": 4.969350962712926e-05, + "loss": 1.2285, + "step": 49989 + }, + { + "epoch": 0.05, + "learning_rate": 4.969349736528277e-05, + "loss": 1.1446, + "step": 49990 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693485103192506e-05, + "loss": 1.2737, + "step": 49991 + }, + { + "epoch": 0.05, + "learning_rate": 4.969347284085848e-05, + "loss": 1.0717, + "step": 49992 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693460578280694e-05, + "loss": 1.2913, + "step": 49993 + }, + { + "epoch": 0.05, + "learning_rate": 4.969344831545914e-05, + "loss": 1.2155, + "step": 49994 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693436052393825e-05, + "loss": 1.3321, + "step": 49995 + }, + { + "epoch": 0.05, + "learning_rate": 4.969342378908475e-05, + "loss": 1.537, + "step": 49996 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693411525531907e-05, + "loss": 1.6327, + "step": 49997 + }, + { + "epoch": 0.05, + "learning_rate": 4.969339926173531e-05, + "loss": 0.9082, + "step": 49998 + }, + { + "epoch": 0.05, + "learning_rate": 4.969338699769494e-05, + "loss": 0.882, + "step": 49999 + }, + { + "epoch": 0.05, + "learning_rate": 4.969337473341081e-05, + "loss": 0.8207, + "step": 50000 + }, + { + "epoch": 0.05, + "eval_loss": 1.0787854194641113, + "eval_runtime": 126.0457, + "eval_samples_per_second": 10.988, + "eval_steps_per_second": 5.498, + "step": 50000 + }, + { + "epoch": 0.05, + "learning_rate": 4.969336246888292e-05, + "loss": 0.9265, + "step": 50001 + }, + { + "epoch": 0.05, + "learning_rate": 4.969335020411127e-05, + "loss": 0.9232, + "step": 50002 + }, + { + "epoch": 0.05, + "learning_rate": 4.969333793909585e-05, + "loss": 1.3453, + "step": 50003 + }, + { + "epoch": 0.05, + "learning_rate": 4.969332567383667e-05, + "loss": 1.2305, + "step": 50004 + }, + { + "epoch": 0.05, + "learning_rate": 4.969331340833373e-05, + "loss": 1.3203, + "step": 50005 + }, + { + "epoch": 0.05, + "learning_rate": 4.969330114258702e-05, + "loss": 1.4891, + "step": 50006 + }, + { + "epoch": 0.05, + "learning_rate": 4.969328887659656e-05, + "loss": 1.4522, + "step": 50007 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693276610362335e-05, + "loss": 1.308, + "step": 50008 + }, + { + "epoch": 0.05, + "learning_rate": 4.969326434388435e-05, + "loss": 1.072, + "step": 50009 + }, + { + "epoch": 0.05, + "learning_rate": 4.96932520771626e-05, + "loss": 1.0883, + "step": 50010 + }, + { + "epoch": 0.05, + "learning_rate": 4.969323981019709e-05, + "loss": 1.1798, + "step": 50011 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693227542987816e-05, + "loss": 0.9228, + "step": 50012 + }, + { + "epoch": 0.05, + "learning_rate": 4.969321527553478e-05, + "loss": 0.725, + "step": 50013 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693203007837984e-05, + "loss": 0.8173, + "step": 50014 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693190739897435e-05, + "loss": 1.3859, + "step": 50015 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693178471713116e-05, + "loss": 1.0869, + "step": 50016 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693166203285035e-05, + "loss": 1.1415, + "step": 50017 + }, + { + "epoch": 0.05, + "learning_rate": 4.96931539346132e-05, + "loss": 1.3767, + "step": 50018 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693141665697605e-05, + "loss": 0.907, + "step": 50019 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693129396538243e-05, + "loss": 1.2152, + "step": 50020 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693117127135126e-05, + "loss": 1.0973, + "step": 50021 + }, + { + "epoch": 0.05, + "learning_rate": 4.9693104857488245e-05, + "loss": 1.3511, + "step": 50022 + }, + { + "epoch": 0.05, + "learning_rate": 4.969309258759761e-05, + "loss": 1.3359, + "step": 50023 + }, + { + "epoch": 0.05, + "learning_rate": 4.969308031746321e-05, + "loss": 1.1643, + "step": 50024 + }, + { + "epoch": 0.05, + "learning_rate": 4.969306804708506e-05, + "loss": 1.4927, + "step": 50025 + }, + { + "epoch": 0.05, + "learning_rate": 4.969305577646314e-05, + "loss": 1.3373, + "step": 50026 + }, + { + "epoch": 0.05, + "learning_rate": 4.969304350559747e-05, + "loss": 1.0642, + "step": 50027 + }, + { + "epoch": 0.05, + "learning_rate": 4.969303123448803e-05, + "loss": 1.0547, + "step": 50028 + }, + { + "epoch": 0.05, + "learning_rate": 4.969301896313483e-05, + "loss": 1.0169, + "step": 50029 + }, + { + "epoch": 0.05, + "learning_rate": 4.969300669153788e-05, + "loss": 1.2037, + "step": 50030 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692994419697165e-05, + "loss": 1.0391, + "step": 50031 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692982147612693e-05, + "loss": 1.1902, + "step": 50032 + }, + { + "epoch": 0.05, + "learning_rate": 4.969296987528446e-05, + "loss": 1.1499, + "step": 50033 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692957602712476e-05, + "loss": 1.4417, + "step": 50034 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692945329896724e-05, + "loss": 0.9829, + "step": 50035 + }, + { + "epoch": 0.05, + "learning_rate": 4.969293305683722e-05, + "loss": 0.9936, + "step": 50036 + }, + { + "epoch": 0.05, + "learning_rate": 4.969292078353396e-05, + "loss": 1.0131, + "step": 50037 + }, + { + "epoch": 0.05, + "learning_rate": 4.969290850998694e-05, + "loss": 1.1305, + "step": 50038 + }, + { + "epoch": 0.05, + "learning_rate": 4.969289623619615e-05, + "loss": 1.2216, + "step": 50039 + }, + { + "epoch": 0.05, + "learning_rate": 4.969288396216162e-05, + "loss": 1.1691, + "step": 50040 + }, + { + "epoch": 0.05, + "learning_rate": 4.969287168788332e-05, + "loss": 1.045, + "step": 50041 + }, + { + "epoch": 0.05, + "learning_rate": 4.969285941336127e-05, + "loss": 1.262, + "step": 50042 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692847138595456e-05, + "loss": 1.1367, + "step": 50043 + }, + { + "epoch": 0.05, + "learning_rate": 4.969283486358589e-05, + "loss": 0.9626, + "step": 50044 + }, + { + "epoch": 0.05, + "learning_rate": 4.969282258833256e-05, + "loss": 1.176, + "step": 50045 + }, + { + "epoch": 0.05, + "learning_rate": 4.969281031283548e-05, + "loss": 1.0801, + "step": 50046 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692798037094636e-05, + "loss": 1.2267, + "step": 50047 + }, + { + "epoch": 0.05, + "learning_rate": 4.969278576111004e-05, + "loss": 1.2711, + "step": 50048 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692773484881686e-05, + "loss": 1.1643, + "step": 50049 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692761208409576e-05, + "loss": 1.0452, + "step": 50050 + }, + { + "epoch": 0.05, + "learning_rate": 4.969274893169371e-05, + "loss": 0.8559, + "step": 50051 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692736654734084e-05, + "loss": 1.0576, + "step": 50052 + }, + { + "epoch": 0.05, + "learning_rate": 4.969272437753071e-05, + "loss": 0.9572, + "step": 50053 + }, + { + "epoch": 0.05, + "learning_rate": 4.969271210008357e-05, + "loss": 0.3703, + "step": 50054 + }, + { + "epoch": 0.05, + "learning_rate": 4.969269982239268e-05, + "loss": 1.0661, + "step": 50055 + }, + { + "epoch": 0.05, + "learning_rate": 4.969268754445803e-05, + "loss": 0.6895, + "step": 50056 + }, + { + "epoch": 0.05, + "learning_rate": 4.969267526627963e-05, + "loss": 0.8749, + "step": 50057 + }, + { + "epoch": 0.05, + "learning_rate": 4.969266298785747e-05, + "loss": 1.1254, + "step": 50058 + }, + { + "epoch": 0.05, + "learning_rate": 4.969265070919156e-05, + "loss": 1.4458, + "step": 50059 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692638430281885e-05, + "loss": 1.3262, + "step": 50060 + }, + { + "epoch": 0.05, + "learning_rate": 4.969262615112846e-05, + "loss": 1.4592, + "step": 50061 + }, + { + "epoch": 0.05, + "learning_rate": 4.969261387173128e-05, + "loss": 1.4125, + "step": 50062 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692601592090344e-05, + "loss": 1.1918, + "step": 50063 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692589312205654e-05, + "loss": 1.1115, + "step": 50064 + }, + { + "epoch": 0.05, + "learning_rate": 4.969257703207721e-05, + "loss": 1.4228, + "step": 50065 + }, + { + "epoch": 0.05, + "learning_rate": 4.969256475170501e-05, + "loss": 1.1127, + "step": 50066 + }, + { + "epoch": 0.05, + "learning_rate": 4.969255247108905e-05, + "loss": 1.2372, + "step": 50067 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692540190229344e-05, + "loss": 1.3721, + "step": 50068 + }, + { + "epoch": 0.05, + "learning_rate": 4.969252790912588e-05, + "loss": 1.0788, + "step": 50069 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692515627778665e-05, + "loss": 1.1009, + "step": 50070 + }, + { + "epoch": 0.05, + "learning_rate": 4.969250334618769e-05, + "loss": 1.1025, + "step": 50071 + }, + { + "epoch": 0.05, + "learning_rate": 4.969249106435296e-05, + "loss": 1.2841, + "step": 50072 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692478782274485e-05, + "loss": 1.1769, + "step": 50073 + }, + { + "epoch": 0.05, + "learning_rate": 4.969246649995225e-05, + "loss": 1.1476, + "step": 50074 + }, + { + "epoch": 0.05, + "learning_rate": 4.969245421738626e-05, + "loss": 1.2905, + "step": 50075 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692441934576524e-05, + "loss": 1.0822, + "step": 50076 + }, + { + "epoch": 0.05, + "learning_rate": 4.969242965152303e-05, + "loss": 0.3541, + "step": 50077 + }, + { + "epoch": 0.05, + "learning_rate": 4.969241736822579e-05, + "loss": 0.9507, + "step": 50078 + }, + { + "epoch": 0.05, + "learning_rate": 4.969240508468478e-05, + "loss": 1.0277, + "step": 50079 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692392800900026e-05, + "loss": 1.0669, + "step": 50080 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692380516871516e-05, + "loss": 1.2497, + "step": 50081 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692368232599264e-05, + "loss": 0.8934, + "step": 50082 + }, + { + "epoch": 0.05, + "learning_rate": 4.969235594808325e-05, + "loss": 0.7776, + "step": 50083 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692343663323484e-05, + "loss": 1.1478, + "step": 50084 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692331378319965e-05, + "loss": 0.8574, + "step": 50085 + }, + { + "epoch": 0.05, + "learning_rate": 4.96923190930727e-05, + "loss": 0.5705, + "step": 50086 + }, + { + "epoch": 0.05, + "learning_rate": 4.969230680758168e-05, + "loss": 0.9098, + "step": 50087 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692294521846904e-05, + "loss": 1.0843, + "step": 50088 + }, + { + "epoch": 0.05, + "learning_rate": 4.969228223586838e-05, + "loss": 0.9855, + "step": 50089 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692269949646104e-05, + "loss": 1.4671, + "step": 50090 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692257663180076e-05, + "loss": 1.4937, + "step": 50091 + }, + { + "epoch": 0.05, + "learning_rate": 4.969224537647029e-05, + "loss": 0.9719, + "step": 50092 + }, + { + "epoch": 0.05, + "learning_rate": 4.969223308951676e-05, + "loss": 1.2903, + "step": 50093 + }, + { + "epoch": 0.05, + "learning_rate": 4.969222080231948e-05, + "loss": 1.444, + "step": 50094 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692208514878444e-05, + "loss": 1.4068, + "step": 50095 + }, + { + "epoch": 0.05, + "learning_rate": 4.969219622719366e-05, + "loss": 1.4088, + "step": 50096 + }, + { + "epoch": 0.05, + "learning_rate": 4.969218393926512e-05, + "loss": 1.2315, + "step": 50097 + }, + { + "epoch": 0.05, + "learning_rate": 4.969217165109283e-05, + "loss": 1.1402, + "step": 50098 + }, + { + "epoch": 0.05, + "learning_rate": 4.96921593626768e-05, + "loss": 0.6752, + "step": 50099 + }, + { + "epoch": 0.05, + "learning_rate": 4.969214707401701e-05, + "loss": 0.6901, + "step": 50100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692134785113475e-05, + "loss": 0.8997, + "step": 50101 + }, + { + "epoch": 0.05, + "learning_rate": 4.969212249596619e-05, + "loss": 1.1378, + "step": 50102 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692110206575146e-05, + "loss": 1.1149, + "step": 50103 + }, + { + "epoch": 0.05, + "learning_rate": 4.969209791694035e-05, + "loss": 1.0704, + "step": 50104 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692085627061815e-05, + "loss": 1.0929, + "step": 50105 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692073336939526e-05, + "loss": 0.8369, + "step": 50106 + }, + { + "epoch": 0.05, + "learning_rate": 4.969206104657349e-05, + "loss": 1.3136, + "step": 50107 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692048755963694e-05, + "loss": 1.2859, + "step": 50108 + }, + { + "epoch": 0.05, + "learning_rate": 4.969203646511016e-05, + "loss": 1.1856, + "step": 50109 + }, + { + "epoch": 0.05, + "learning_rate": 4.9692024174012873e-05, + "loss": 1.2618, + "step": 50110 + }, + { + "epoch": 0.05, + "learning_rate": 4.969201188267183e-05, + "loss": 0.9462, + "step": 50111 + }, + { + "epoch": 0.05, + "learning_rate": 4.969199959108704e-05, + "loss": 1.456, + "step": 50112 + }, + { + "epoch": 0.05, + "learning_rate": 4.969198729925851e-05, + "loss": 1.3671, + "step": 50113 + }, + { + "epoch": 0.05, + "learning_rate": 4.969197500718623e-05, + "loss": 1.0982, + "step": 50114 + }, + { + "epoch": 0.05, + "learning_rate": 4.96919627148702e-05, + "loss": 0.8965, + "step": 50115 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691950422310415e-05, + "loss": 1.2522, + "step": 50116 + }, + { + "epoch": 0.05, + "learning_rate": 4.969193812950689e-05, + "loss": 1.0155, + "step": 50117 + }, + { + "epoch": 0.05, + "learning_rate": 4.969192583645961e-05, + "loss": 1.6562, + "step": 50118 + }, + { + "epoch": 0.05, + "learning_rate": 4.969191354316858e-05, + "loss": 1.1301, + "step": 50119 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691901249633803e-05, + "loss": 1.3085, + "step": 50120 + }, + { + "epoch": 0.05, + "learning_rate": 4.969188895585528e-05, + "loss": 1.0195, + "step": 50121 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691876661833014e-05, + "loss": 1.0883, + "step": 50122 + }, + { + "epoch": 0.05, + "learning_rate": 4.969186436756699e-05, + "loss": 1.1884, + "step": 50123 + }, + { + "epoch": 0.05, + "learning_rate": 4.969185207305723e-05, + "loss": 1.0959, + "step": 50124 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691839778303716e-05, + "loss": 1.0122, + "step": 50125 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691827483306454e-05, + "loss": 0.9166, + "step": 50126 + }, + { + "epoch": 0.05, + "learning_rate": 4.969181518806544e-05, + "loss": 1.1355, + "step": 50127 + }, + { + "epoch": 0.05, + "learning_rate": 4.969180289258068e-05, + "loss": 0.8262, + "step": 50128 + }, + { + "epoch": 0.05, + "learning_rate": 4.969179059685218e-05, + "loss": 0.8854, + "step": 50129 + }, + { + "epoch": 0.05, + "learning_rate": 4.969177830087993e-05, + "loss": 0.979, + "step": 50130 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691766004663944e-05, + "loss": 1.1598, + "step": 50131 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691753708204195e-05, + "loss": 1.5389, + "step": 50132 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691741411500704e-05, + "loss": 0.8558, + "step": 50133 + }, + { + "epoch": 0.05, + "learning_rate": 4.969172911455347e-05, + "loss": 1.552, + "step": 50134 + }, + { + "epoch": 0.05, + "learning_rate": 4.969171681736249e-05, + "loss": 1.1513, + "step": 50135 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691704519927764e-05, + "loss": 1.1285, + "step": 50136 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691692222249284e-05, + "loss": 0.9044, + "step": 50137 + }, + { + "epoch": 0.05, + "learning_rate": 4.969167992432706e-05, + "loss": 1.0104, + "step": 50138 + }, + { + "epoch": 0.05, + "learning_rate": 4.96916676261611e-05, + "loss": 0.9343, + "step": 50139 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691655327751383e-05, + "loss": 1.1002, + "step": 50140 + }, + { + "epoch": 0.05, + "learning_rate": 4.969164302909792e-05, + "loss": 1.6698, + "step": 50141 + }, + { + "epoch": 0.05, + "learning_rate": 4.969163073020072e-05, + "loss": 1.3258, + "step": 50142 + }, + { + "epoch": 0.05, + "learning_rate": 4.969161843105977e-05, + "loss": 1.1694, + "step": 50143 + }, + { + "epoch": 0.05, + "learning_rate": 4.969160613167508e-05, + "loss": 0.9562, + "step": 50144 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691593832046635e-05, + "loss": 1.1595, + "step": 50145 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691581532174455e-05, + "loss": 1.4098, + "step": 50146 + }, + { + "epoch": 0.05, + "learning_rate": 4.969156923205852e-05, + "loss": 0.8992, + "step": 50147 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691556931698854e-05, + "loss": 1.2168, + "step": 50148 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691544631095433e-05, + "loss": 1.1678, + "step": 50149 + }, + { + "epoch": 0.05, + "learning_rate": 4.969153233024827e-05, + "loss": 0.2817, + "step": 50150 + }, + { + "epoch": 0.05, + "learning_rate": 4.969152002915736e-05, + "loss": 0.8232, + "step": 50151 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691507727822705e-05, + "loss": 1.255, + "step": 50152 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691495426244316e-05, + "loss": 0.956, + "step": 50153 + }, + { + "epoch": 0.05, + "learning_rate": 4.969148312442217e-05, + "loss": 1.0599, + "step": 50154 + }, + { + "epoch": 0.05, + "learning_rate": 4.969147082235629e-05, + "loss": 0.7239, + "step": 50155 + }, + { + "epoch": 0.05, + "learning_rate": 4.969145852004666e-05, + "loss": 0.9194, + "step": 50156 + }, + { + "epoch": 0.05, + "learning_rate": 4.969144621749329e-05, + "loss": 1.3754, + "step": 50157 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691433914696175e-05, + "loss": 1.3171, + "step": 50158 + }, + { + "epoch": 0.05, + "learning_rate": 4.969142161165531e-05, + "loss": 1.1097, + "step": 50159 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691409308370714e-05, + "loss": 1.1191, + "step": 50160 + }, + { + "epoch": 0.05, + "learning_rate": 4.969139700484237e-05, + "loss": 1.2361, + "step": 50161 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691384701070285e-05, + "loss": 0.9541, + "step": 50162 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691372397054446e-05, + "loss": 1.3411, + "step": 50163 + }, + { + "epoch": 0.05, + "learning_rate": 4.969136009279487e-05, + "loss": 1.425, + "step": 50164 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691347788291556e-05, + "loss": 0.5531, + "step": 50165 + }, + { + "epoch": 0.05, + "learning_rate": 4.96913354835445e-05, + "loss": 0.4266, + "step": 50166 + }, + { + "epoch": 0.05, + "learning_rate": 4.96913231785537e-05, + "loss": 1.1559, + "step": 50167 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691310873319155e-05, + "loss": 0.9615, + "step": 50168 + }, + { + "epoch": 0.05, + "learning_rate": 4.969129856784087e-05, + "loss": 0.9701, + "step": 50169 + }, + { + "epoch": 0.05, + "learning_rate": 4.969128626211884e-05, + "loss": 0.5367, + "step": 50170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691273956153074e-05, + "loss": 0.7798, + "step": 50171 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691261649943556e-05, + "loss": 0.8064, + "step": 50172 + }, + { + "epoch": 0.05, + "learning_rate": 4.96912493434903e-05, + "loss": 0.9206, + "step": 50173 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691237036793306e-05, + "loss": 1.0711, + "step": 50174 + }, + { + "epoch": 0.05, + "learning_rate": 4.969122472985257e-05, + "loss": 1.1879, + "step": 50175 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691212422668095e-05, + "loss": 1.2325, + "step": 50176 + }, + { + "epoch": 0.05, + "learning_rate": 4.969120011523988e-05, + "loss": 1.1774, + "step": 50177 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691187807567914e-05, + "loss": 1.5569, + "step": 50178 + }, + { + "epoch": 0.05, + "learning_rate": 4.969117549965221e-05, + "loss": 1.4441, + "step": 50179 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691163191492765e-05, + "loss": 1.1897, + "step": 50180 + }, + { + "epoch": 0.05, + "learning_rate": 4.969115088308959e-05, + "loss": 0.9364, + "step": 50181 + }, + { + "epoch": 0.05, + "learning_rate": 4.969113857444266e-05, + "loss": 0.9471, + "step": 50182 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691126265552e-05, + "loss": 1.5567, + "step": 50183 + }, + { + "epoch": 0.05, + "learning_rate": 4.969111395641759e-05, + "loss": 1.4725, + "step": 50184 + }, + { + "epoch": 0.05, + "learning_rate": 4.969110164703945e-05, + "loss": 1.1961, + "step": 50185 + }, + { + "epoch": 0.05, + "learning_rate": 4.969108933741756e-05, + "loss": 1.1399, + "step": 50186 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691077027551935e-05, + "loss": 0.7422, + "step": 50187 + }, + { + "epoch": 0.05, + "learning_rate": 4.969106471744257e-05, + "loss": 0.8287, + "step": 50188 + }, + { + "epoch": 0.05, + "learning_rate": 4.969105240708947e-05, + "loss": 0.9399, + "step": 50189 + }, + { + "epoch": 0.05, + "learning_rate": 4.969104009649262e-05, + "loss": 1.0239, + "step": 50190 + }, + { + "epoch": 0.05, + "learning_rate": 4.969102778565204e-05, + "loss": 1.3618, + "step": 50191 + }, + { + "epoch": 0.05, + "learning_rate": 4.9691015474567715e-05, + "loss": 1.1004, + "step": 50192 + }, + { + "epoch": 0.05, + "learning_rate": 4.969100316323965e-05, + "loss": 0.9876, + "step": 50193 + }, + { + "epoch": 0.05, + "learning_rate": 4.969099085166785e-05, + "loss": 1.0203, + "step": 50194 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690978539852304e-05, + "loss": 1.1839, + "step": 50195 + }, + { + "epoch": 0.05, + "learning_rate": 4.969096622779303e-05, + "loss": 1.1351, + "step": 50196 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690953915490015e-05, + "loss": 1.2162, + "step": 50197 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690941602943254e-05, + "loss": 1.2797, + "step": 50198 + }, + { + "epoch": 0.05, + "learning_rate": 4.969092929015276e-05, + "loss": 0.9495, + "step": 50199 + }, + { + "epoch": 0.05, + "learning_rate": 4.969091697711852e-05, + "loss": 1.5007, + "step": 50200 + }, + { + "epoch": 0.05, + "learning_rate": 4.969090466384055e-05, + "loss": 1.3062, + "step": 50201 + }, + { + "epoch": 0.05, + "learning_rate": 4.969089235031884e-05, + "loss": 1.5275, + "step": 50202 + }, + { + "epoch": 0.05, + "learning_rate": 4.969088003655339e-05, + "loss": 1.5169, + "step": 50203 + }, + { + "epoch": 0.05, + "learning_rate": 4.96908677225442e-05, + "loss": 1.2134, + "step": 50204 + }, + { + "epoch": 0.05, + "learning_rate": 4.969085540829128e-05, + "loss": 1.0641, + "step": 50205 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690843093794614e-05, + "loss": 0.9515, + "step": 50206 + }, + { + "epoch": 0.05, + "learning_rate": 4.969083077905421e-05, + "loss": 1.4067, + "step": 50207 + }, + { + "epoch": 0.05, + "learning_rate": 4.969081846407008e-05, + "loss": 1.537, + "step": 50208 + }, + { + "epoch": 0.05, + "learning_rate": 4.96908061488422e-05, + "loss": 1.6268, + "step": 50209 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690793833370587e-05, + "loss": 1.5903, + "step": 50210 + }, + { + "epoch": 0.05, + "learning_rate": 4.969078151765524e-05, + "loss": 1.1734, + "step": 50211 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690769201696153e-05, + "loss": 1.0441, + "step": 50212 + }, + { + "epoch": 0.05, + "learning_rate": 4.969075688549333e-05, + "loss": 0.9855, + "step": 50213 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690744569046765e-05, + "loss": 1.2209, + "step": 50214 + }, + { + "epoch": 0.05, + "learning_rate": 4.969073225235647e-05, + "loss": 1.2278, + "step": 50215 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690719935422435e-05, + "loss": 1.3415, + "step": 50216 + }, + { + "epoch": 0.05, + "learning_rate": 4.969070761824467e-05, + "loss": 1.2137, + "step": 50217 + }, + { + "epoch": 0.05, + "learning_rate": 4.969069530082316e-05, + "loss": 1.3068, + "step": 50218 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690682983157924e-05, + "loss": 1.2409, + "step": 50219 + }, + { + "epoch": 0.05, + "learning_rate": 4.969067066524894e-05, + "loss": 0.7707, + "step": 50220 + }, + { + "epoch": 0.05, + "learning_rate": 4.969065834709623e-05, + "loss": 1.0701, + "step": 50221 + }, + { + "epoch": 0.05, + "learning_rate": 4.969064602869978e-05, + "loss": 0.8518, + "step": 50222 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690633710059593e-05, + "loss": 1.3164, + "step": 50223 + }, + { + "epoch": 0.05, + "learning_rate": 4.969062139117567e-05, + "loss": 1.0758, + "step": 50224 + }, + { + "epoch": 0.05, + "learning_rate": 4.969060907204801e-05, + "loss": 1.0641, + "step": 50225 + }, + { + "epoch": 0.05, + "learning_rate": 4.969059675267662e-05, + "loss": 1.0987, + "step": 50226 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690584433061496e-05, + "loss": 0.7311, + "step": 50227 + }, + { + "epoch": 0.05, + "learning_rate": 4.969057211320264e-05, + "loss": 0.8874, + "step": 50228 + }, + { + "epoch": 0.05, + "learning_rate": 4.969055979310004e-05, + "loss": 1.1476, + "step": 50229 + }, + { + "epoch": 0.05, + "learning_rate": 4.969054747275371e-05, + "loss": 0.9067, + "step": 50230 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690535152163645e-05, + "loss": 0.9064, + "step": 50231 + }, + { + "epoch": 0.05, + "learning_rate": 4.969052283132984e-05, + "loss": 1.1929, + "step": 50232 + }, + { + "epoch": 0.05, + "learning_rate": 4.969051051025231e-05, + "loss": 1.2879, + "step": 50233 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690498188931034e-05, + "loss": 1.0769, + "step": 50234 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690485867366034e-05, + "loss": 1.3929, + "step": 50235 + }, + { + "epoch": 0.05, + "learning_rate": 4.969047354555729e-05, + "loss": 1.3321, + "step": 50236 + }, + { + "epoch": 0.05, + "learning_rate": 4.969046122350482e-05, + "loss": 1.2955, + "step": 50237 + }, + { + "epoch": 0.05, + "learning_rate": 4.969044890120862e-05, + "loss": 1.3345, + "step": 50238 + }, + { + "epoch": 0.05, + "learning_rate": 4.969043657866868e-05, + "loss": 1.3366, + "step": 50239 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690424255885e-05, + "loss": 1.0592, + "step": 50240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690411932857596e-05, + "loss": 1.7487, + "step": 50241 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690399609586456e-05, + "loss": 1.0052, + "step": 50242 + }, + { + "epoch": 0.05, + "learning_rate": 4.969038728607158e-05, + "loss": 0.6897, + "step": 50243 + }, + { + "epoch": 0.05, + "learning_rate": 4.969037496231298e-05, + "loss": 0.7975, + "step": 50244 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690362638310636e-05, + "loss": 0.9263, + "step": 50245 + }, + { + "epoch": 0.05, + "learning_rate": 4.969035031406457e-05, + "loss": 1.0039, + "step": 50246 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690337989574764e-05, + "loss": 1.3635, + "step": 50247 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690325664841225e-05, + "loss": 1.1673, + "step": 50248 + }, + { + "epoch": 0.05, + "learning_rate": 4.969031333986396e-05, + "loss": 0.9665, + "step": 50249 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690301014642954e-05, + "loss": 1.4068, + "step": 50250 + }, + { + "epoch": 0.05, + "learning_rate": 4.969028868917822e-05, + "loss": 1.6678, + "step": 50251 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690276363469754e-05, + "loss": 0.9494, + "step": 50252 + }, + { + "epoch": 0.05, + "learning_rate": 4.969026403751755e-05, + "loss": 1.1755, + "step": 50253 + }, + { + "epoch": 0.05, + "learning_rate": 4.969025171132162e-05, + "loss": 1.2554, + "step": 50254 + }, + { + "epoch": 0.05, + "learning_rate": 4.969023938488197e-05, + "loss": 1.3305, + "step": 50255 + }, + { + "epoch": 0.05, + "learning_rate": 4.969022705819857e-05, + "loss": 1.0934, + "step": 50256 + }, + { + "epoch": 0.05, + "learning_rate": 4.969021473127145e-05, + "loss": 0.995, + "step": 50257 + }, + { + "epoch": 0.05, + "learning_rate": 4.969020240410058e-05, + "loss": 1.1988, + "step": 50258 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690190076686e-05, + "loss": 1.2621, + "step": 50259 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690177749027675e-05, + "loss": 1.1804, + "step": 50260 + }, + { + "epoch": 0.05, + "learning_rate": 4.969016542112564e-05, + "loss": 1.1012, + "step": 50261 + }, + { + "epoch": 0.05, + "learning_rate": 4.969015309297985e-05, + "loss": 1.5747, + "step": 50262 + }, + { + "epoch": 0.05, + "learning_rate": 4.969014076459034e-05, + "loss": 1.1985, + "step": 50263 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690128435957094e-05, + "loss": 1.2458, + "step": 50264 + }, + { + "epoch": 0.05, + "learning_rate": 4.969011610708012e-05, + "loss": 1.0176, + "step": 50265 + }, + { + "epoch": 0.05, + "learning_rate": 4.969010377795942e-05, + "loss": 1.2121, + "step": 50266 + }, + { + "epoch": 0.05, + "learning_rate": 4.969009144859499e-05, + "loss": 0.9925, + "step": 50267 + }, + { + "epoch": 0.05, + "learning_rate": 4.969007911898682e-05, + "loss": 1.5137, + "step": 50268 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690066789134926e-05, + "loss": 1.4066, + "step": 50269 + }, + { + "epoch": 0.05, + "learning_rate": 4.96900544590393e-05, + "loss": 1.5088, + "step": 50270 + }, + { + "epoch": 0.05, + "learning_rate": 4.969004212869995e-05, + "loss": 1.3241, + "step": 50271 + }, + { + "epoch": 0.05, + "learning_rate": 4.969002979811687e-05, + "loss": 1.2106, + "step": 50272 + }, + { + "epoch": 0.05, + "learning_rate": 4.969001746729005e-05, + "loss": 1.1327, + "step": 50273 + }, + { + "epoch": 0.05, + "learning_rate": 4.969000513621951e-05, + "loss": 1.0756, + "step": 50274 + }, + { + "epoch": 0.05, + "learning_rate": 4.968999280490524e-05, + "loss": 1.2531, + "step": 50275 + }, + { + "epoch": 0.05, + "learning_rate": 4.968998047334724e-05, + "loss": 1.2391, + "step": 50276 + }, + { + "epoch": 0.05, + "learning_rate": 4.968996814154551e-05, + "loss": 1.2702, + "step": 50277 + }, + { + "epoch": 0.05, + "learning_rate": 4.968995580950005e-05, + "loss": 1.3223, + "step": 50278 + }, + { + "epoch": 0.05, + "learning_rate": 4.968994347721086e-05, + "loss": 0.8433, + "step": 50279 + }, + { + "epoch": 0.05, + "learning_rate": 4.968993114467795e-05, + "loss": 1.0101, + "step": 50280 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689918811901296e-05, + "loss": 1.8617, + "step": 50281 + }, + { + "epoch": 0.05, + "learning_rate": 4.968990647888093e-05, + "loss": 1.616, + "step": 50282 + }, + { + "epoch": 0.05, + "learning_rate": 4.968989414561682e-05, + "loss": 1.3965, + "step": 50283 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689881812108994e-05, + "loss": 1.3253, + "step": 50284 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689869478357434e-05, + "loss": 0.9703, + "step": 50285 + }, + { + "epoch": 0.05, + "learning_rate": 4.968985714436215e-05, + "loss": 1.1987, + "step": 50286 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689844810123134e-05, + "loss": 0.6859, + "step": 50287 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689832475640395e-05, + "loss": 0.2644, + "step": 50288 + }, + { + "epoch": 0.05, + "learning_rate": 4.968982014091392e-05, + "loss": 0.3194, + "step": 50289 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689807805943724e-05, + "loss": 0.1523, + "step": 50290 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689795470729805e-05, + "loss": 0.1867, + "step": 50291 + }, + { + "epoch": 0.05, + "learning_rate": 4.968978313527215e-05, + "loss": 0.1773, + "step": 50292 + }, + { + "epoch": 0.05, + "learning_rate": 4.968977079957077e-05, + "loss": 0.1381, + "step": 50293 + }, + { + "epoch": 0.05, + "learning_rate": 4.968975846362567e-05, + "loss": 0.1476, + "step": 50294 + }, + { + "epoch": 0.05, + "learning_rate": 4.968974612743683e-05, + "loss": 0.5744, + "step": 50295 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689733791004276e-05, + "loss": 1.1355, + "step": 50296 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689721454327985e-05, + "loss": 1.2376, + "step": 50297 + }, + { + "epoch": 0.05, + "learning_rate": 4.968970911740797e-05, + "loss": 1.3838, + "step": 50298 + }, + { + "epoch": 0.05, + "learning_rate": 4.968969678024423e-05, + "loss": 1.3622, + "step": 50299 + }, + { + "epoch": 0.05, + "learning_rate": 4.968968444283677e-05, + "loss": 1.2848, + "step": 50300 + }, + { + "epoch": 0.05, + "learning_rate": 4.968967210518558e-05, + "loss": 0.6123, + "step": 50301 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689659767290664e-05, + "loss": 1.0353, + "step": 50302 + }, + { + "epoch": 0.05, + "learning_rate": 4.968964742915202e-05, + "loss": 0.8454, + "step": 50303 + }, + { + "epoch": 0.05, + "learning_rate": 4.968963509076965e-05, + "loss": 0.6054, + "step": 50304 + }, + { + "epoch": 0.05, + "learning_rate": 4.968962275214355e-05, + "loss": 0.6516, + "step": 50305 + }, + { + "epoch": 0.05, + "learning_rate": 4.968961041327374e-05, + "loss": 0.6522, + "step": 50306 + }, + { + "epoch": 0.05, + "learning_rate": 4.968959807416019e-05, + "loss": 0.5911, + "step": 50307 + }, + { + "epoch": 0.05, + "learning_rate": 4.968958573480291e-05, + "loss": 0.4508, + "step": 50308 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689573395201916e-05, + "loss": 0.4433, + "step": 50309 + }, + { + "epoch": 0.05, + "learning_rate": 4.96895610553572e-05, + "loss": 0.4547, + "step": 50310 + }, + { + "epoch": 0.05, + "learning_rate": 4.968954871526875e-05, + "loss": 0.4637, + "step": 50311 + }, + { + "epoch": 0.05, + "learning_rate": 4.968953637493658e-05, + "loss": 0.7691, + "step": 50312 + }, + { + "epoch": 0.05, + "learning_rate": 4.968952403436068e-05, + "loss": 0.5918, + "step": 50313 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689511693541066e-05, + "loss": 0.6168, + "step": 50314 + }, + { + "epoch": 0.05, + "learning_rate": 4.968949935247772e-05, + "loss": 0.2367, + "step": 50315 + }, + { + "epoch": 0.05, + "learning_rate": 4.968948701117065e-05, + "loss": 0.0995, + "step": 50316 + }, + { + "epoch": 0.05, + "learning_rate": 4.968947466961986e-05, + "loss": 0.1196, + "step": 50317 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689462327825334e-05, + "loss": 0.256, + "step": 50318 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689449985787095e-05, + "loss": 0.336, + "step": 50319 + }, + { + "epoch": 0.05, + "learning_rate": 4.968943764350513e-05, + "loss": 0.803, + "step": 50320 + }, + { + "epoch": 0.05, + "learning_rate": 4.968942530097944e-05, + "loss": 0.9721, + "step": 50321 + }, + { + "epoch": 0.05, + "learning_rate": 4.968941295821003e-05, + "loss": 0.7441, + "step": 50322 + }, + { + "epoch": 0.05, + "learning_rate": 4.968940061519689e-05, + "loss": 0.9942, + "step": 50323 + }, + { + "epoch": 0.05, + "learning_rate": 4.968938827194003e-05, + "loss": 1.2493, + "step": 50324 + }, + { + "epoch": 0.05, + "learning_rate": 4.968937592843945e-05, + "loss": 1.4395, + "step": 50325 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689363584695145e-05, + "loss": 2.0847, + "step": 50326 + }, + { + "epoch": 0.05, + "learning_rate": 4.968935124070712e-05, + "loss": 1.4805, + "step": 50327 + }, + { + "epoch": 0.05, + "learning_rate": 4.968933889647537e-05, + "loss": 1.1967, + "step": 50328 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689326551999896e-05, + "loss": 1.2455, + "step": 50329 + }, + { + "epoch": 0.05, + "learning_rate": 4.968931420728069e-05, + "loss": 0.7534, + "step": 50330 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689301862317774e-05, + "loss": 0.6517, + "step": 50331 + }, + { + "epoch": 0.05, + "learning_rate": 4.968928951711113e-05, + "loss": 1.206, + "step": 50332 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689277171660764e-05, + "loss": 1.2992, + "step": 50333 + }, + { + "epoch": 0.05, + "learning_rate": 4.968926482596668e-05, + "loss": 1.0756, + "step": 50334 + }, + { + "epoch": 0.05, + "learning_rate": 4.968925248002887e-05, + "loss": 1.089, + "step": 50335 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689240133847334e-05, + "loss": 1.1498, + "step": 50336 + }, + { + "epoch": 0.05, + "learning_rate": 4.968922778742209e-05, + "loss": 1.1426, + "step": 50337 + }, + { + "epoch": 0.05, + "learning_rate": 4.968921544075311e-05, + "loss": 1.2599, + "step": 50338 + }, + { + "epoch": 0.05, + "learning_rate": 4.968920309384042e-05, + "loss": 1.5619, + "step": 50339 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689190746684e-05, + "loss": 1.2112, + "step": 50340 + }, + { + "epoch": 0.05, + "learning_rate": 4.968917839928386e-05, + "loss": 1.4211, + "step": 50341 + }, + { + "epoch": 0.05, + "learning_rate": 4.968916605164e-05, + "loss": 1.4312, + "step": 50342 + }, + { + "epoch": 0.05, + "learning_rate": 4.968915370375242e-05, + "loss": 1.0387, + "step": 50343 + }, + { + "epoch": 0.05, + "learning_rate": 4.968914135562113e-05, + "loss": 1.2331, + "step": 50344 + }, + { + "epoch": 0.05, + "learning_rate": 4.968912900724611e-05, + "loss": 0.916, + "step": 50345 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689116658627364e-05, + "loss": 1.0444, + "step": 50346 + }, + { + "epoch": 0.05, + "learning_rate": 4.96891043097649e-05, + "loss": 1.0802, + "step": 50347 + }, + { + "epoch": 0.05, + "learning_rate": 4.968909196065871e-05, + "loss": 1.0135, + "step": 50348 + }, + { + "epoch": 0.05, + "learning_rate": 4.968907961130881e-05, + "loss": 0.9003, + "step": 50349 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689067261715184e-05, + "loss": 1.1972, + "step": 50350 + }, + { + "epoch": 0.05, + "learning_rate": 4.968905491187784e-05, + "loss": 1.4187, + "step": 50351 + }, + { + "epoch": 0.05, + "learning_rate": 4.968904256179677e-05, + "loss": 1.3722, + "step": 50352 + }, + { + "epoch": 0.05, + "learning_rate": 4.9689030211471994e-05, + "loss": 0.9662, + "step": 50353 + }, + { + "epoch": 0.05, + "learning_rate": 4.968901786090349e-05, + "loss": 1.2921, + "step": 50354 + }, + { + "epoch": 0.05, + "learning_rate": 4.968900551009127e-05, + "loss": 1.3374, + "step": 50355 + }, + { + "epoch": 0.05, + "learning_rate": 4.968899315903533e-05, + "loss": 0.9906, + "step": 50356 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688980807735664e-05, + "loss": 0.9692, + "step": 50357 + }, + { + "epoch": 0.05, + "learning_rate": 4.968896845619228e-05, + "loss": 1.0406, + "step": 50358 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688956104405184e-05, + "loss": 1.0051, + "step": 50359 + }, + { + "epoch": 0.05, + "learning_rate": 4.968894375237436e-05, + "loss": 0.9974, + "step": 50360 + }, + { + "epoch": 0.05, + "learning_rate": 4.968893140009982e-05, + "loss": 1.1905, + "step": 50361 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688919047581564e-05, + "loss": 1.2537, + "step": 50362 + }, + { + "epoch": 0.05, + "learning_rate": 4.968890669481959e-05, + "loss": 1.3002, + "step": 50363 + }, + { + "epoch": 0.05, + "learning_rate": 4.96888943418139e-05, + "loss": 1.7312, + "step": 50364 + }, + { + "epoch": 0.05, + "learning_rate": 4.968888198856448e-05, + "loss": 5.9811, + "step": 50365 + }, + { + "epoch": 0.05, + "learning_rate": 4.968886963507135e-05, + "loss": 2.9047, + "step": 50366 + }, + { + "epoch": 0.05, + "learning_rate": 4.96888572813345e-05, + "loss": 1.1046, + "step": 50367 + }, + { + "epoch": 0.05, + "learning_rate": 4.968884492735394e-05, + "loss": 1.0465, + "step": 50368 + }, + { + "epoch": 0.05, + "learning_rate": 4.968883257312965e-05, + "loss": 0.9032, + "step": 50369 + }, + { + "epoch": 0.05, + "learning_rate": 4.968882021866165e-05, + "loss": 1.1582, + "step": 50370 + }, + { + "epoch": 0.05, + "learning_rate": 4.968880786394993e-05, + "loss": 1.2027, + "step": 50371 + }, + { + "epoch": 0.05, + "learning_rate": 4.968879550899449e-05, + "loss": 1.1023, + "step": 50372 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688783153795336e-05, + "loss": 0.8389, + "step": 50373 + }, + { + "epoch": 0.05, + "learning_rate": 4.968877079835247e-05, + "loss": 1.3327, + "step": 50374 + }, + { + "epoch": 0.05, + "learning_rate": 4.968875844266587e-05, + "loss": 1.2686, + "step": 50375 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688746086735574e-05, + "loss": 1.4967, + "step": 50376 + }, + { + "epoch": 0.05, + "learning_rate": 4.968873373056154e-05, + "loss": 0.9205, + "step": 50377 + }, + { + "epoch": 0.05, + "learning_rate": 4.968872137414381e-05, + "loss": 1.3303, + "step": 50378 + }, + { + "epoch": 0.05, + "learning_rate": 4.968870901748235e-05, + "loss": 1.4834, + "step": 50379 + }, + { + "epoch": 0.05, + "learning_rate": 4.968869666057717e-05, + "loss": 1.2487, + "step": 50380 + }, + { + "epoch": 0.05, + "learning_rate": 4.968868430342829e-05, + "loss": 1.2391, + "step": 50381 + }, + { + "epoch": 0.05, + "learning_rate": 4.968867194603568e-05, + "loss": 1.1008, + "step": 50382 + }, + { + "epoch": 0.05, + "learning_rate": 4.968865958839936e-05, + "loss": 1.3623, + "step": 50383 + }, + { + "epoch": 0.05, + "learning_rate": 4.968864723051932e-05, + "loss": 1.5128, + "step": 50384 + }, + { + "epoch": 0.05, + "learning_rate": 4.968863487239557e-05, + "loss": 1.1415, + "step": 50385 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688622514028094e-05, + "loss": 0.9713, + "step": 50386 + }, + { + "epoch": 0.05, + "learning_rate": 4.968861015541692e-05, + "loss": 0.5782, + "step": 50387 + }, + { + "epoch": 0.05, + "learning_rate": 4.968859779656201e-05, + "loss": 1.0812, + "step": 50388 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688585437463396e-05, + "loss": 1.3112, + "step": 50389 + }, + { + "epoch": 0.05, + "learning_rate": 4.968857307812106e-05, + "loss": 1.4037, + "step": 50390 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688560718535015e-05, + "loss": 0.9516, + "step": 50391 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688548358705255e-05, + "loss": 1.1051, + "step": 50392 + }, + { + "epoch": 0.05, + "learning_rate": 4.968853599863177e-05, + "loss": 1.2581, + "step": 50393 + }, + { + "epoch": 0.05, + "learning_rate": 4.968852363831459e-05, + "loss": 1.4388, + "step": 50394 + }, + { + "epoch": 0.05, + "learning_rate": 4.968851127775368e-05, + "loss": 1.1946, + "step": 50395 + }, + { + "epoch": 0.05, + "learning_rate": 4.968849891694906e-05, + "loss": 2.1596, + "step": 50396 + }, + { + "epoch": 0.05, + "learning_rate": 4.968848655590072e-05, + "loss": 1.0954, + "step": 50397 + }, + { + "epoch": 0.05, + "learning_rate": 4.968847419460867e-05, + "loss": 0.9024, + "step": 50398 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688461833072906e-05, + "loss": 1.0251, + "step": 50399 + }, + { + "epoch": 0.05, + "learning_rate": 4.968844947129343e-05, + "loss": 1.3556, + "step": 50400 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688437109270237e-05, + "loss": 1.4037, + "step": 50401 + }, + { + "epoch": 0.05, + "learning_rate": 4.968842474700333e-05, + "loss": 1.0676, + "step": 50402 + }, + { + "epoch": 0.05, + "learning_rate": 4.968841238449271e-05, + "loss": 1.3859, + "step": 50403 + }, + { + "epoch": 0.05, + "learning_rate": 4.968840002173838e-05, + "loss": 1.139, + "step": 50404 + }, + { + "epoch": 0.05, + "learning_rate": 4.968838765874033e-05, + "loss": 1.1575, + "step": 50405 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688375295498565e-05, + "loss": 1.1804, + "step": 50406 + }, + { + "epoch": 0.05, + "learning_rate": 4.96883629320131e-05, + "loss": 1.051, + "step": 50407 + }, + { + "epoch": 0.05, + "learning_rate": 4.968835056828391e-05, + "loss": 1.3302, + "step": 50408 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688338204311015e-05, + "loss": 1.2629, + "step": 50409 + }, + { + "epoch": 0.05, + "learning_rate": 4.96883258400944e-05, + "loss": 1.1385, + "step": 50410 + }, + { + "epoch": 0.05, + "learning_rate": 4.968831347563407e-05, + "loss": 1.2707, + "step": 50411 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688301110930036e-05, + "loss": 0.8942, + "step": 50412 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688288745982286e-05, + "loss": 0.898, + "step": 50413 + }, + { + "epoch": 0.05, + "learning_rate": 4.968827638079082e-05, + "loss": 0.9379, + "step": 50414 + }, + { + "epoch": 0.05, + "learning_rate": 4.968826401535565e-05, + "loss": 0.967, + "step": 50415 + }, + { + "epoch": 0.05, + "learning_rate": 4.968825164967677e-05, + "loss": 1.4066, + "step": 50416 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688239283754163e-05, + "loss": 1.2109, + "step": 50417 + }, + { + "epoch": 0.05, + "learning_rate": 4.968822691758785e-05, + "loss": 1.1309, + "step": 50418 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688214551177825e-05, + "loss": 1.338, + "step": 50419 + }, + { + "epoch": 0.05, + "learning_rate": 4.968820218452409e-05, + "loss": 1.7629, + "step": 50420 + }, + { + "epoch": 0.05, + "learning_rate": 4.968818981762665e-05, + "loss": 1.4673, + "step": 50421 + }, + { + "epoch": 0.05, + "learning_rate": 4.968817745048549e-05, + "loss": 1.067, + "step": 50422 + }, + { + "epoch": 0.05, + "learning_rate": 4.968816508310062e-05, + "loss": 1.2031, + "step": 50423 + }, + { + "epoch": 0.05, + "learning_rate": 4.968815271547204e-05, + "loss": 0.8629, + "step": 50424 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688140347599756e-05, + "loss": 1.1961, + "step": 50425 + }, + { + "epoch": 0.05, + "learning_rate": 4.968812797948375e-05, + "loss": 1.5976, + "step": 50426 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688115611124036e-05, + "loss": 0.8529, + "step": 50427 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688103242520614e-05, + "loss": 0.986, + "step": 50428 + }, + { + "epoch": 0.05, + "learning_rate": 4.968809087367348e-05, + "loss": 0.8569, + "step": 50429 + }, + { + "epoch": 0.05, + "learning_rate": 4.968807850458264e-05, + "loss": 1.0374, + "step": 50430 + }, + { + "epoch": 0.05, + "learning_rate": 4.968806613524808e-05, + "loss": 1.2641, + "step": 50431 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688053765669816e-05, + "loss": 1.1731, + "step": 50432 + }, + { + "epoch": 0.05, + "learning_rate": 4.968804139584784e-05, + "loss": 1.3921, + "step": 50433 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688029025782153e-05, + "loss": 1.0876, + "step": 50434 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688016655472766e-05, + "loss": 1.1649, + "step": 50435 + }, + { + "epoch": 0.05, + "learning_rate": 4.968800428491966e-05, + "loss": 0.6649, + "step": 50436 + }, + { + "epoch": 0.05, + "learning_rate": 4.968799191412285e-05, + "loss": 0.6432, + "step": 50437 + }, + { + "epoch": 0.05, + "learning_rate": 4.968797954308232e-05, + "loss": 0.6037, + "step": 50438 + }, + { + "epoch": 0.05, + "learning_rate": 4.968796717179809e-05, + "loss": 0.7264, + "step": 50439 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687954800270146e-05, + "loss": 1.244, + "step": 50440 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687942428498495e-05, + "loss": 1.1628, + "step": 50441 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687930056483136e-05, + "loss": 1.149, + "step": 50442 + }, + { + "epoch": 0.05, + "learning_rate": 4.968791768422407e-05, + "loss": 1.0965, + "step": 50443 + }, + { + "epoch": 0.05, + "learning_rate": 4.968790531172129e-05, + "loss": 1.0737, + "step": 50444 + }, + { + "epoch": 0.05, + "learning_rate": 4.96878929389748e-05, + "loss": 1.0268, + "step": 50445 + }, + { + "epoch": 0.05, + "learning_rate": 4.968788056598461e-05, + "loss": 1.0583, + "step": 50446 + }, + { + "epoch": 0.05, + "learning_rate": 4.968786819275071e-05, + "loss": 1.0622, + "step": 50447 + }, + { + "epoch": 0.05, + "learning_rate": 4.96878558192731e-05, + "loss": 1.2944, + "step": 50448 + }, + { + "epoch": 0.05, + "learning_rate": 4.968784344555177e-05, + "loss": 1.299, + "step": 50449 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687831071586746e-05, + "loss": 1.2561, + "step": 50450 + }, + { + "epoch": 0.05, + "learning_rate": 4.968781869737801e-05, + "loss": 1.1515, + "step": 50451 + }, + { + "epoch": 0.05, + "learning_rate": 4.968780632292557e-05, + "loss": 1.3938, + "step": 50452 + }, + { + "epoch": 0.05, + "learning_rate": 4.968779394822942e-05, + "loss": 1.0283, + "step": 50453 + }, + { + "epoch": 0.05, + "learning_rate": 4.968778157328956e-05, + "loss": 1.2995, + "step": 50454 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687769198106e-05, + "loss": 0.9719, + "step": 50455 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687756822678726e-05, + "loss": 1.0724, + "step": 50456 + }, + { + "epoch": 0.05, + "learning_rate": 4.968774444700774e-05, + "loss": 0.8639, + "step": 50457 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687732071093064e-05, + "loss": 1.4, + "step": 50458 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687719694934664e-05, + "loss": 1.0921, + "step": 50459 + }, + { + "epoch": 0.05, + "learning_rate": 4.968770731853257e-05, + "loss": 1.0863, + "step": 50460 + }, + { + "epoch": 0.05, + "learning_rate": 4.968769494188676e-05, + "loss": 1.2435, + "step": 50461 + }, + { + "epoch": 0.05, + "learning_rate": 4.968768256499725e-05, + "loss": 1.1768, + "step": 50462 + }, + { + "epoch": 0.05, + "learning_rate": 4.968767018786402e-05, + "loss": 1.3336, + "step": 50463 + }, + { + "epoch": 0.05, + "learning_rate": 4.96876578104871e-05, + "loss": 1.3146, + "step": 50464 + }, + { + "epoch": 0.05, + "learning_rate": 4.968764543286647e-05, + "loss": 1.0182, + "step": 50465 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687633055002124e-05, + "loss": 1.415, + "step": 50466 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687620676894084e-05, + "loss": 1.4583, + "step": 50467 + }, + { + "epoch": 0.05, + "learning_rate": 4.968760829854233e-05, + "loss": 1.0633, + "step": 50468 + }, + { + "epoch": 0.05, + "learning_rate": 4.968759591994688e-05, + "loss": 0.8449, + "step": 50469 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687583541107715e-05, + "loss": 0.8162, + "step": 50470 + }, + { + "epoch": 0.05, + "learning_rate": 4.968757116202485e-05, + "loss": 0.653, + "step": 50471 + }, + { + "epoch": 0.05, + "learning_rate": 4.968755878269828e-05, + "loss": 0.6043, + "step": 50472 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687546403128005e-05, + "loss": 0.5813, + "step": 50473 + }, + { + "epoch": 0.05, + "learning_rate": 4.968753402331402e-05, + "loss": 0.2857, + "step": 50474 + }, + { + "epoch": 0.05, + "learning_rate": 4.968752164325633e-05, + "loss": 0.9749, + "step": 50475 + }, + { + "epoch": 0.05, + "learning_rate": 4.968750926295494e-05, + "loss": 0.8737, + "step": 50476 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687496882409845e-05, + "loss": 1.2045, + "step": 50477 + }, + { + "epoch": 0.05, + "learning_rate": 4.968748450162104e-05, + "loss": 1.213, + "step": 50478 + }, + { + "epoch": 0.05, + "learning_rate": 4.968747212058854e-05, + "loss": 0.8624, + "step": 50479 + }, + { + "epoch": 0.05, + "learning_rate": 4.968745973931233e-05, + "loss": 1.2382, + "step": 50480 + }, + { + "epoch": 0.05, + "learning_rate": 4.968744735779242e-05, + "loss": 0.8945, + "step": 50481 + }, + { + "epoch": 0.05, + "learning_rate": 4.968743497602879e-05, + "loss": 1.1984, + "step": 50482 + }, + { + "epoch": 0.05, + "learning_rate": 4.968742259402147e-05, + "loss": 1.1173, + "step": 50483 + }, + { + "epoch": 0.05, + "learning_rate": 4.968741021177045e-05, + "loss": 1.3938, + "step": 50484 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687397829275714e-05, + "loss": 1.3326, + "step": 50485 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687385446537284e-05, + "loss": 1.2099, + "step": 50486 + }, + { + "epoch": 0.05, + "learning_rate": 4.968737306355514e-05, + "loss": 1.1158, + "step": 50487 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687360680329306e-05, + "loss": 1.2149, + "step": 50488 + }, + { + "epoch": 0.05, + "learning_rate": 4.968734829685976e-05, + "loss": 1.4531, + "step": 50489 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687335913146515e-05, + "loss": 1.664, + "step": 50490 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687323529189564e-05, + "loss": 0.9381, + "step": 50491 + }, + { + "epoch": 0.05, + "learning_rate": 4.968731114498891e-05, + "loss": 1.0633, + "step": 50492 + }, + { + "epoch": 0.05, + "learning_rate": 4.968729876054456e-05, + "loss": 0.9401, + "step": 50493 + }, + { + "epoch": 0.05, + "learning_rate": 4.96872863758565e-05, + "loss": 1.1012, + "step": 50494 + }, + { + "epoch": 0.05, + "learning_rate": 4.968727399092474e-05, + "loss": 0.9159, + "step": 50495 + }, + { + "epoch": 0.05, + "learning_rate": 4.968726160574928e-05, + "loss": 0.9576, + "step": 50496 + }, + { + "epoch": 0.05, + "learning_rate": 4.968724922033011e-05, + "loss": 1.0214, + "step": 50497 + }, + { + "epoch": 0.05, + "learning_rate": 4.968723683466725e-05, + "loss": 1.1518, + "step": 50498 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687224448760685e-05, + "loss": 1.0461, + "step": 50499 + }, + { + "epoch": 0.05, + "learning_rate": 4.968721206261041e-05, + "loss": 0.8938, + "step": 50500 + }, + { + "epoch": 0.05, + "eval_loss": 1.0758792161941528, + "eval_runtime": 125.0988, + "eval_samples_per_second": 11.071, + "eval_steps_per_second": 5.54, + "step": 50500 + }, + { + "epoch": 0.05, + "learning_rate": 4.968719967621643e-05, + "loss": 0.9843, + "step": 50501 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687187289578764e-05, + "loss": 0.9399, + "step": 50502 + }, + { + "epoch": 0.05, + "learning_rate": 4.968717490269739e-05, + "loss": 1.0746, + "step": 50503 + }, + { + "epoch": 0.05, + "learning_rate": 4.96871625155723e-05, + "loss": 1.3559, + "step": 50504 + }, + { + "epoch": 0.05, + "learning_rate": 4.968715012820353e-05, + "loss": 1.0353, + "step": 50505 + }, + { + "epoch": 0.05, + "learning_rate": 4.968713774059105e-05, + "loss": 1.2984, + "step": 50506 + }, + { + "epoch": 0.05, + "learning_rate": 4.968712535273487e-05, + "loss": 1.2209, + "step": 50507 + }, + { + "epoch": 0.05, + "learning_rate": 4.968711296463499e-05, + "loss": 1.1862, + "step": 50508 + }, + { + "epoch": 0.05, + "learning_rate": 4.968710057629141e-05, + "loss": 1.033, + "step": 50509 + }, + { + "epoch": 0.05, + "learning_rate": 4.968708818770412e-05, + "loss": 1.4466, + "step": 50510 + }, + { + "epoch": 0.05, + "learning_rate": 4.968707579887314e-05, + "loss": 1.6689, + "step": 50511 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687063409798456e-05, + "loss": 1.4491, + "step": 50512 + }, + { + "epoch": 0.05, + "learning_rate": 4.968705102048008e-05, + "loss": 0.9212, + "step": 50513 + }, + { + "epoch": 0.05, + "learning_rate": 4.968703863091799e-05, + "loss": 1.1246, + "step": 50514 + }, + { + "epoch": 0.05, + "learning_rate": 4.96870262411122e-05, + "loss": 1.3274, + "step": 50515 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687013851062726e-05, + "loss": 1.1725, + "step": 50516 + }, + { + "epoch": 0.05, + "learning_rate": 4.9687001460769535e-05, + "loss": 1.2039, + "step": 50517 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686989070232655e-05, + "loss": 1.0857, + "step": 50518 + }, + { + "epoch": 0.05, + "learning_rate": 4.968697667945207e-05, + "loss": 1.0985, + "step": 50519 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686964288427785e-05, + "loss": 1.0643, + "step": 50520 + }, + { + "epoch": 0.05, + "learning_rate": 4.968695189715981e-05, + "loss": 1.032, + "step": 50521 + }, + { + "epoch": 0.05, + "learning_rate": 4.968693950564812e-05, + "loss": 0.7743, + "step": 50522 + }, + { + "epoch": 0.05, + "learning_rate": 4.968692711389274e-05, + "loss": 0.9065, + "step": 50523 + }, + { + "epoch": 0.05, + "learning_rate": 4.968691472189366e-05, + "loss": 0.909, + "step": 50524 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686902329650884e-05, + "loss": 0.9528, + "step": 50525 + }, + { + "epoch": 0.05, + "learning_rate": 4.968688993716441e-05, + "loss": 1.1075, + "step": 50526 + }, + { + "epoch": 0.05, + "learning_rate": 4.968687754443423e-05, + "loss": 1.2445, + "step": 50527 + }, + { + "epoch": 0.05, + "learning_rate": 4.968686515146036e-05, + "loss": 1.1638, + "step": 50528 + }, + { + "epoch": 0.05, + "learning_rate": 4.968685275824279e-05, + "loss": 1.0286, + "step": 50529 + }, + { + "epoch": 0.05, + "learning_rate": 4.968684036478152e-05, + "loss": 1.2723, + "step": 50530 + }, + { + "epoch": 0.05, + "learning_rate": 4.968682797107655e-05, + "loss": 1.3107, + "step": 50531 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686815577127885e-05, + "loss": 1.1101, + "step": 50532 + }, + { + "epoch": 0.05, + "learning_rate": 4.968680318293552e-05, + "loss": 1.1774, + "step": 50533 + }, + { + "epoch": 0.05, + "learning_rate": 4.968679078849946e-05, + "loss": 1.36, + "step": 50534 + }, + { + "epoch": 0.05, + "learning_rate": 4.96867783938197e-05, + "loss": 0.9718, + "step": 50535 + }, + { + "epoch": 0.05, + "learning_rate": 4.968676599889624e-05, + "loss": 1.1118, + "step": 50536 + }, + { + "epoch": 0.05, + "learning_rate": 4.968675360372909e-05, + "loss": 1.5287, + "step": 50537 + }, + { + "epoch": 0.05, + "learning_rate": 4.968674120831824e-05, + "loss": 1.066, + "step": 50538 + }, + { + "epoch": 0.05, + "learning_rate": 4.968672881266369e-05, + "loss": 1.1937, + "step": 50539 + }, + { + "epoch": 0.05, + "learning_rate": 4.968671641676544e-05, + "loss": 1.2313, + "step": 50540 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686704020623495e-05, + "loss": 0.9681, + "step": 50541 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686691624237866e-05, + "loss": 0.8284, + "step": 50542 + }, + { + "epoch": 0.05, + "learning_rate": 4.968667922760852e-05, + "loss": 1.3437, + "step": 50543 + }, + { + "epoch": 0.05, + "learning_rate": 4.96866668307355e-05, + "loss": 1.1446, + "step": 50544 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686654433618765e-05, + "loss": 1.1781, + "step": 50545 + }, + { + "epoch": 0.05, + "learning_rate": 4.968664203625834e-05, + "loss": 1.8631, + "step": 50546 + }, + { + "epoch": 0.05, + "learning_rate": 4.968662963865422e-05, + "loss": 1.6444, + "step": 50547 + }, + { + "epoch": 0.05, + "learning_rate": 4.96866172408064e-05, + "loss": 0.9768, + "step": 50548 + }, + { + "epoch": 0.05, + "learning_rate": 4.968660484271489e-05, + "loss": 1.2034, + "step": 50549 + }, + { + "epoch": 0.05, + "learning_rate": 4.968659244437968e-05, + "loss": 0.9031, + "step": 50550 + }, + { + "epoch": 0.05, + "learning_rate": 4.968658004580078e-05, + "loss": 1.0335, + "step": 50551 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686567646978174e-05, + "loss": 1.1368, + "step": 50552 + }, + { + "epoch": 0.05, + "learning_rate": 4.968655524791188e-05, + "loss": 1.0714, + "step": 50553 + }, + { + "epoch": 0.05, + "learning_rate": 4.968654284860189e-05, + "loss": 1.2127, + "step": 50554 + }, + { + "epoch": 0.05, + "learning_rate": 4.968653044904821e-05, + "loss": 1.1285, + "step": 50555 + }, + { + "epoch": 0.05, + "learning_rate": 4.968651804925083e-05, + "loss": 1.4134, + "step": 50556 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686505649209744e-05, + "loss": 1.2721, + "step": 50557 + }, + { + "epoch": 0.05, + "learning_rate": 4.968649324892498e-05, + "loss": 1.2159, + "step": 50558 + }, + { + "epoch": 0.05, + "learning_rate": 4.968648084839652e-05, + "loss": 1.0537, + "step": 50559 + }, + { + "epoch": 0.05, + "learning_rate": 4.968646844762436e-05, + "loss": 0.8211, + "step": 50560 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686456046608495e-05, + "loss": 1.4421, + "step": 50561 + }, + { + "epoch": 0.05, + "learning_rate": 4.968644364534896e-05, + "loss": 1.0636, + "step": 50562 + }, + { + "epoch": 0.05, + "learning_rate": 4.968643124384571e-05, + "loss": 1.1208, + "step": 50563 + }, + { + "epoch": 0.05, + "learning_rate": 4.968641884209877e-05, + "loss": 0.7277, + "step": 50564 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686406440108144e-05, + "loss": 1.1801, + "step": 50565 + }, + { + "epoch": 0.05, + "learning_rate": 4.968639403787382e-05, + "loss": 1.0424, + "step": 50566 + }, + { + "epoch": 0.05, + "learning_rate": 4.96863816353958e-05, + "loss": 0.8786, + "step": 50567 + }, + { + "epoch": 0.05, + "learning_rate": 4.968636923267409e-05, + "loss": 1.1532, + "step": 50568 + }, + { + "epoch": 0.05, + "learning_rate": 4.968635682970868e-05, + "loss": 1.1604, + "step": 50569 + }, + { + "epoch": 0.05, + "learning_rate": 4.968634442649959e-05, + "loss": 1.153, + "step": 50570 + }, + { + "epoch": 0.05, + "learning_rate": 4.96863320230468e-05, + "loss": 0.4108, + "step": 50571 + }, + { + "epoch": 0.05, + "learning_rate": 4.968631961935031e-05, + "loss": 0.3735, + "step": 50572 + }, + { + "epoch": 0.05, + "learning_rate": 4.968630721541013e-05, + "loss": 0.2491, + "step": 50573 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686294811226265e-05, + "loss": 1.0862, + "step": 50574 + }, + { + "epoch": 0.05, + "learning_rate": 4.96862824067987e-05, + "loss": 1.1915, + "step": 50575 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686270002127445e-05, + "loss": 1.4821, + "step": 50576 + }, + { + "epoch": 0.05, + "learning_rate": 4.96862575972125e-05, + "loss": 1.7841, + "step": 50577 + }, + { + "epoch": 0.05, + "learning_rate": 4.968624519205386e-05, + "loss": 2.1294, + "step": 50578 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686232786651526e-05, + "loss": 1.5932, + "step": 50579 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686220381005496e-05, + "loss": 1.1609, + "step": 50580 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686207975115785e-05, + "loss": 1.1539, + "step": 50581 + }, + { + "epoch": 0.05, + "learning_rate": 4.968619556898237e-05, + "loss": 1.0042, + "step": 50582 + }, + { + "epoch": 0.05, + "learning_rate": 4.968618316260528e-05, + "loss": 0.9466, + "step": 50583 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686170755984485e-05, + "loss": 1.141, + "step": 50584 + }, + { + "epoch": 0.05, + "learning_rate": 4.968615834912e-05, + "loss": 1.1894, + "step": 50585 + }, + { + "epoch": 0.05, + "learning_rate": 4.968614594201183e-05, + "loss": 1.1885, + "step": 50586 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686133534659965e-05, + "loss": 1.0109, + "step": 50587 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686121127064404e-05, + "loss": 1.2815, + "step": 50588 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686108719225156e-05, + "loss": 1.3006, + "step": 50589 + }, + { + "epoch": 0.05, + "learning_rate": 4.968609631114222e-05, + "loss": 1.2402, + "step": 50590 + }, + { + "epoch": 0.05, + "learning_rate": 4.968608390281559e-05, + "loss": 1.864, + "step": 50591 + }, + { + "epoch": 0.05, + "learning_rate": 4.968607149424527e-05, + "loss": 1.1567, + "step": 50592 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686059085431255e-05, + "loss": 1.0026, + "step": 50593 + }, + { + "epoch": 0.05, + "learning_rate": 4.968604667637355e-05, + "loss": 1.4619, + "step": 50594 + }, + { + "epoch": 0.05, + "learning_rate": 4.968603426707217e-05, + "loss": 1.2129, + "step": 50595 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686021857527085e-05, + "loss": 1.2547, + "step": 50596 + }, + { + "epoch": 0.05, + "learning_rate": 4.968600944773831e-05, + "loss": 1.1535, + "step": 50597 + }, + { + "epoch": 0.05, + "learning_rate": 4.968599703770586e-05, + "loss": 0.9949, + "step": 50598 + }, + { + "epoch": 0.05, + "learning_rate": 4.96859846274297e-05, + "loss": 1.2915, + "step": 50599 + }, + { + "epoch": 0.05, + "learning_rate": 4.968597221690986e-05, + "loss": 0.9793, + "step": 50600 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685959806146334e-05, + "loss": 0.8017, + "step": 50601 + }, + { + "epoch": 0.05, + "learning_rate": 4.968594739513911e-05, + "loss": 1.0749, + "step": 50602 + }, + { + "epoch": 0.05, + "learning_rate": 4.968593498388821e-05, + "loss": 1.2061, + "step": 50603 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685922572393605e-05, + "loss": 1.1996, + "step": 50604 + }, + { + "epoch": 0.05, + "learning_rate": 4.968591016065532e-05, + "loss": 1.1858, + "step": 50605 + }, + { + "epoch": 0.05, + "learning_rate": 4.968589774867334e-05, + "loss": 0.7212, + "step": 50606 + }, + { + "epoch": 0.05, + "learning_rate": 4.968588533644768e-05, + "loss": 0.895, + "step": 50607 + }, + { + "epoch": 0.05, + "learning_rate": 4.968587292397833e-05, + "loss": 1.0295, + "step": 50608 + }, + { + "epoch": 0.05, + "learning_rate": 4.968586051126528e-05, + "loss": 1.1373, + "step": 50609 + }, + { + "epoch": 0.05, + "learning_rate": 4.968584809830855e-05, + "loss": 1.0329, + "step": 50610 + }, + { + "epoch": 0.05, + "learning_rate": 4.968583568510813e-05, + "loss": 0.6696, + "step": 50611 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685823271664026e-05, + "loss": 1.1271, + "step": 50612 + }, + { + "epoch": 0.05, + "learning_rate": 4.968581085797623e-05, + "loss": 1.0513, + "step": 50613 + }, + { + "epoch": 0.05, + "learning_rate": 4.968579844404475e-05, + "loss": 0.9094, + "step": 50614 + }, + { + "epoch": 0.05, + "learning_rate": 4.968578602986958e-05, + "loss": 1.0173, + "step": 50615 + }, + { + "epoch": 0.05, + "learning_rate": 4.968577361545072e-05, + "loss": 1.0857, + "step": 50616 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685761200788175e-05, + "loss": 1.1016, + "step": 50617 + }, + { + "epoch": 0.05, + "learning_rate": 4.968574878588194e-05, + "loss": 1.0428, + "step": 50618 + }, + { + "epoch": 0.05, + "learning_rate": 4.968573637073202e-05, + "loss": 1.3919, + "step": 50619 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685723955338416e-05, + "loss": 1.3936, + "step": 50620 + }, + { + "epoch": 0.05, + "learning_rate": 4.968571153970112e-05, + "loss": 1.0674, + "step": 50621 + }, + { + "epoch": 0.05, + "learning_rate": 4.968569912382014e-05, + "loss": 1.3668, + "step": 50622 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685686707695464e-05, + "loss": 0.9921, + "step": 50623 + }, + { + "epoch": 0.05, + "learning_rate": 4.968567429132711e-05, + "loss": 0.9554, + "step": 50624 + }, + { + "epoch": 0.05, + "learning_rate": 4.968566187471507e-05, + "loss": 1.0859, + "step": 50625 + }, + { + "epoch": 0.05, + "learning_rate": 4.968564945785935e-05, + "loss": 1.5276, + "step": 50626 + }, + { + "epoch": 0.05, + "learning_rate": 4.968563704075993e-05, + "loss": 1.3005, + "step": 50627 + }, + { + "epoch": 0.05, + "learning_rate": 4.968562462341683e-05, + "loss": 1.2325, + "step": 50628 + }, + { + "epoch": 0.05, + "learning_rate": 4.968561220583005e-05, + "loss": 1.3031, + "step": 50629 + }, + { + "epoch": 0.05, + "learning_rate": 4.968559978799957e-05, + "loss": 1.4582, + "step": 50630 + }, + { + "epoch": 0.05, + "learning_rate": 4.968558736992541e-05, + "loss": 0.65, + "step": 50631 + }, + { + "epoch": 0.05, + "learning_rate": 4.968557495160757e-05, + "loss": 1.0494, + "step": 50632 + }, + { + "epoch": 0.05, + "learning_rate": 4.968556253304604e-05, + "loss": 0.7952, + "step": 50633 + }, + { + "epoch": 0.05, + "learning_rate": 4.968555011424082e-05, + "loss": 0.9401, + "step": 50634 + }, + { + "epoch": 0.05, + "learning_rate": 4.968553769519192e-05, + "loss": 0.7701, + "step": 50635 + }, + { + "epoch": 0.05, + "learning_rate": 4.968552527589934e-05, + "loss": 1.2951, + "step": 50636 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685512856363066e-05, + "loss": 1.0135, + "step": 50637 + }, + { + "epoch": 0.05, + "learning_rate": 4.968550043658311e-05, + "loss": 1.0303, + "step": 50638 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685488016559465e-05, + "loss": 1.0338, + "step": 50639 + }, + { + "epoch": 0.05, + "learning_rate": 4.968547559629214e-05, + "loss": 0.5703, + "step": 50640 + }, + { + "epoch": 0.05, + "learning_rate": 4.968546317578113e-05, + "loss": 0.4362, + "step": 50641 + }, + { + "epoch": 0.05, + "learning_rate": 4.968545075502643e-05, + "loss": 0.8595, + "step": 50642 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685438334028054e-05, + "loss": 0.8194, + "step": 50643 + }, + { + "epoch": 0.05, + "learning_rate": 4.968542591278599e-05, + "loss": 0.8719, + "step": 50644 + }, + { + "epoch": 0.05, + "learning_rate": 4.968541349130025e-05, + "loss": 0.9755, + "step": 50645 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685401069570814e-05, + "loss": 1.078, + "step": 50646 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685388647597696e-05, + "loss": 5.8518, + "step": 50647 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685376225380896e-05, + "loss": 5.7106, + "step": 50648 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685363802920416e-05, + "loss": 5.7889, + "step": 50649 + }, + { + "epoch": 0.05, + "learning_rate": 4.968535138021625e-05, + "loss": 2.8641, + "step": 50650 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685338957268404e-05, + "loss": 0.8263, + "step": 50651 + }, + { + "epoch": 0.05, + "learning_rate": 4.968532653407687e-05, + "loss": 0.8708, + "step": 50652 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685314110641654e-05, + "loss": 1.5, + "step": 50653 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685301686962754e-05, + "loss": 0.8209, + "step": 50654 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685289263040166e-05, + "loss": 0.8555, + "step": 50655 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685276838873904e-05, + "loss": 1.1675, + "step": 50656 + }, + { + "epoch": 0.05, + "learning_rate": 4.968526441446396e-05, + "loss": 1.1454, + "step": 50657 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685251989810335e-05, + "loss": 1.2498, + "step": 50658 + }, + { + "epoch": 0.05, + "learning_rate": 4.968523956491302e-05, + "loss": 1.1628, + "step": 50659 + }, + { + "epoch": 0.05, + "learning_rate": 4.968522713977203e-05, + "loss": 1.3042, + "step": 50660 + }, + { + "epoch": 0.05, + "learning_rate": 4.968521471438735e-05, + "loss": 1.3668, + "step": 50661 + }, + { + "epoch": 0.05, + "learning_rate": 4.968520228875899e-05, + "loss": 1.1077, + "step": 50662 + }, + { + "epoch": 0.05, + "learning_rate": 4.968518986288695e-05, + "loss": 1.0109, + "step": 50663 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685177436771225e-05, + "loss": 1.0797, + "step": 50664 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685165010411825e-05, + "loss": 1.0139, + "step": 50665 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685152583808736e-05, + "loss": 1.5015, + "step": 50666 + }, + { + "epoch": 0.05, + "learning_rate": 4.968514015696197e-05, + "loss": 1.3374, + "step": 50667 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685127729871516e-05, + "loss": 1.0127, + "step": 50668 + }, + { + "epoch": 0.05, + "learning_rate": 4.968511530253739e-05, + "loss": 1.3128, + "step": 50669 + }, + { + "epoch": 0.05, + "learning_rate": 4.968510287495958e-05, + "loss": 1.6846, + "step": 50670 + }, + { + "epoch": 0.05, + "learning_rate": 4.968509044713809e-05, + "loss": 1.1934, + "step": 50671 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685078019072915e-05, + "loss": 1.1875, + "step": 50672 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685065590764065e-05, + "loss": 1.3324, + "step": 50673 + }, + { + "epoch": 0.05, + "learning_rate": 4.968505316221153e-05, + "loss": 1.033, + "step": 50674 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685040733415314e-05, + "loss": 0.873, + "step": 50675 + }, + { + "epoch": 0.05, + "learning_rate": 4.968502830437543e-05, + "loss": 1.0884, + "step": 50676 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685015875091846e-05, + "loss": 1.0083, + "step": 50677 + }, + { + "epoch": 0.05, + "learning_rate": 4.968500344556459e-05, + "loss": 1.1854, + "step": 50678 + }, + { + "epoch": 0.05, + "learning_rate": 4.968499101579366e-05, + "loss": 0.9012, + "step": 50679 + }, + { + "epoch": 0.05, + "learning_rate": 4.968497858577904e-05, + "loss": 1.3898, + "step": 50680 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684966155520755e-05, + "loss": 1.7551, + "step": 50681 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684953725018774e-05, + "loss": 0.805, + "step": 50682 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684941294273125e-05, + "loss": 0.9115, + "step": 50683 + }, + { + "epoch": 0.05, + "learning_rate": 4.968492886328379e-05, + "loss": 1.361, + "step": 50684 + }, + { + "epoch": 0.05, + "learning_rate": 4.968491643205078e-05, + "loss": 1.1787, + "step": 50685 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684904000574085e-05, + "loss": 0.919, + "step": 50686 + }, + { + "epoch": 0.05, + "learning_rate": 4.968489156885372e-05, + "loss": 1.1966, + "step": 50687 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684879136889664e-05, + "loss": 1.0173, + "step": 50688 + }, + { + "epoch": 0.05, + "learning_rate": 4.968486670468194e-05, + "loss": 1.3011, + "step": 50689 + }, + { + "epoch": 0.05, + "learning_rate": 4.968485427223054e-05, + "loss": 1.1631, + "step": 50690 + }, + { + "epoch": 0.05, + "learning_rate": 4.968484183953545e-05, + "loss": 1.345, + "step": 50691 + }, + { + "epoch": 0.05, + "learning_rate": 4.968482940659669e-05, + "loss": 1.3897, + "step": 50692 + }, + { + "epoch": 0.05, + "learning_rate": 4.968481697341425e-05, + "loss": 1.0494, + "step": 50693 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684804539988125e-05, + "loss": 1.1035, + "step": 50694 + }, + { + "epoch": 0.05, + "learning_rate": 4.968479210631833e-05, + "loss": 1.4045, + "step": 50695 + }, + { + "epoch": 0.05, + "learning_rate": 4.968477967240486e-05, + "loss": 1.0621, + "step": 50696 + }, + { + "epoch": 0.05, + "learning_rate": 4.96847672382477e-05, + "loss": 1.0542, + "step": 50697 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684754803846874e-05, + "loss": 1.2194, + "step": 50698 + }, + { + "epoch": 0.05, + "learning_rate": 4.968474236920237e-05, + "loss": 1.2444, + "step": 50699 + }, + { + "epoch": 0.05, + "learning_rate": 4.968472993431418e-05, + "loss": 1.2558, + "step": 50700 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684717499182315e-05, + "loss": 1.3575, + "step": 50701 + }, + { + "epoch": 0.05, + "learning_rate": 4.968470506380678e-05, + "loss": 0.939, + "step": 50702 + }, + { + "epoch": 0.05, + "learning_rate": 4.968469262818756e-05, + "loss": 1.1093, + "step": 50703 + }, + { + "epoch": 0.05, + "learning_rate": 4.968468019232466e-05, + "loss": 1.1997, + "step": 50704 + }, + { + "epoch": 0.05, + "learning_rate": 4.96846677562181e-05, + "loss": 0.9268, + "step": 50705 + }, + { + "epoch": 0.05, + "learning_rate": 4.968465531986784e-05, + "loss": 0.9018, + "step": 50706 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684642883273926e-05, + "loss": 1.1868, + "step": 50707 + }, + { + "epoch": 0.05, + "learning_rate": 4.968463044643633e-05, + "loss": 1.1331, + "step": 50708 + }, + { + "epoch": 0.05, + "learning_rate": 4.968461800935505e-05, + "loss": 2.0497, + "step": 50709 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684605572030097e-05, + "loss": 1.6409, + "step": 50710 + }, + { + "epoch": 0.05, + "learning_rate": 4.968459313446147e-05, + "loss": 1.1042, + "step": 50711 + }, + { + "epoch": 0.05, + "learning_rate": 4.968458069664917e-05, + "loss": 1.162, + "step": 50712 + }, + { + "epoch": 0.05, + "learning_rate": 4.968456825859318e-05, + "loss": 1.3358, + "step": 50713 + }, + { + "epoch": 0.05, + "learning_rate": 4.968455582029353e-05, + "loss": 1.0529, + "step": 50714 + }, + { + "epoch": 0.05, + "learning_rate": 4.96845433817502e-05, + "loss": 1.2222, + "step": 50715 + }, + { + "epoch": 0.05, + "learning_rate": 4.968453094296319e-05, + "loss": 1.1785, + "step": 50716 + }, + { + "epoch": 0.05, + "learning_rate": 4.968451850393251e-05, + "loss": 1.0549, + "step": 50717 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684506064658157e-05, + "loss": 1.2296, + "step": 50718 + }, + { + "epoch": 0.05, + "learning_rate": 4.968449362514011e-05, + "loss": 1.2022, + "step": 50719 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684481185378415e-05, + "loss": 1.3128, + "step": 50720 + }, + { + "epoch": 0.05, + "learning_rate": 4.968446874537303e-05, + "loss": 0.8445, + "step": 50721 + }, + { + "epoch": 0.05, + "learning_rate": 4.968445630512397e-05, + "loss": 2.0535, + "step": 50722 + }, + { + "epoch": 0.05, + "learning_rate": 4.968444386463125e-05, + "loss": 1.7866, + "step": 50723 + }, + { + "epoch": 0.05, + "learning_rate": 4.968443142389484e-05, + "loss": 1.0909, + "step": 50724 + }, + { + "epoch": 0.05, + "learning_rate": 4.968441898291476e-05, + "loss": 1.0545, + "step": 50725 + }, + { + "epoch": 0.05, + "learning_rate": 4.968440654169101e-05, + "loss": 1.2269, + "step": 50726 + }, + { + "epoch": 0.05, + "learning_rate": 4.968439410022357e-05, + "loss": 1.283, + "step": 50727 + }, + { + "epoch": 0.05, + "learning_rate": 4.968438165851247e-05, + "loss": 1.0513, + "step": 50728 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684369216557705e-05, + "loss": 1.6702, + "step": 50729 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684356774359256e-05, + "loss": 0.9695, + "step": 50730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684344331917126e-05, + "loss": 1.0495, + "step": 50731 + }, + { + "epoch": 0.05, + "learning_rate": 4.968433188923133e-05, + "loss": 1.0332, + "step": 50732 + }, + { + "epoch": 0.05, + "learning_rate": 4.968431944630186e-05, + "loss": 1.077, + "step": 50733 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684307003128717e-05, + "loss": 1.0687, + "step": 50734 + }, + { + "epoch": 0.05, + "learning_rate": 4.96842945597119e-05, + "loss": 1.2462, + "step": 50735 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684282116051414e-05, + "loss": 1.0391, + "step": 50736 + }, + { + "epoch": 0.05, + "learning_rate": 4.968426967214725e-05, + "loss": 1.2062, + "step": 50737 + }, + { + "epoch": 0.05, + "learning_rate": 4.968425722799941e-05, + "loss": 1.1967, + "step": 50738 + }, + { + "epoch": 0.05, + "learning_rate": 4.968424478360791e-05, + "loss": 0.4573, + "step": 50739 + }, + { + "epoch": 0.05, + "learning_rate": 4.968423233897273e-05, + "loss": 1.1382, + "step": 50740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684219894093874e-05, + "loss": 1.1086, + "step": 50741 + }, + { + "epoch": 0.05, + "learning_rate": 4.968420744897135e-05, + "loss": 1.2346, + "step": 50742 + }, + { + "epoch": 0.05, + "learning_rate": 4.968419500360516e-05, + "loss": 1.2137, + "step": 50743 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684182557995285e-05, + "loss": 1.163, + "step": 50744 + }, + { + "epoch": 0.05, + "learning_rate": 4.968417011214175e-05, + "loss": 1.0762, + "step": 50745 + }, + { + "epoch": 0.05, + "learning_rate": 4.968415766604454e-05, + "loss": 1.2321, + "step": 50746 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684145219703654e-05, + "loss": 1.3075, + "step": 50747 + }, + { + "epoch": 0.05, + "learning_rate": 4.96841327731191e-05, + "loss": 1.1952, + "step": 50748 + }, + { + "epoch": 0.05, + "learning_rate": 4.968412032629087e-05, + "loss": 0.9622, + "step": 50749 + }, + { + "epoch": 0.05, + "learning_rate": 4.968410787921898e-05, + "loss": 0.8644, + "step": 50750 + }, + { + "epoch": 0.05, + "learning_rate": 4.968409543190341e-05, + "loss": 1.2293, + "step": 50751 + }, + { + "epoch": 0.05, + "learning_rate": 4.968408298434417e-05, + "loss": 1.114, + "step": 50752 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684070536541256e-05, + "loss": 1.0623, + "step": 50753 + }, + { + "epoch": 0.05, + "learning_rate": 4.968405808849468e-05, + "loss": 4.0675, + "step": 50754 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684045640204426e-05, + "loss": 5.9514, + "step": 50755 + }, + { + "epoch": 0.05, + "learning_rate": 4.9684033191670496e-05, + "loss": 5.2793, + "step": 50756 + }, + { + "epoch": 0.05, + "learning_rate": 4.968402074289291e-05, + "loss": 2.7785, + "step": 50757 + }, + { + "epoch": 0.05, + "learning_rate": 4.968400829387164e-05, + "loss": 1.231, + "step": 50758 + }, + { + "epoch": 0.05, + "learning_rate": 4.968399584460671e-05, + "loss": 1.0617, + "step": 50759 + }, + { + "epoch": 0.05, + "learning_rate": 4.968398339509811e-05, + "loss": 1.235, + "step": 50760 + }, + { + "epoch": 0.05, + "learning_rate": 4.968397094534584e-05, + "loss": 1.1425, + "step": 50761 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683958495349895e-05, + "loss": 1.085, + "step": 50762 + }, + { + "epoch": 0.05, + "learning_rate": 4.968394604511028e-05, + "loss": 1.2177, + "step": 50763 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683933594627e-05, + "loss": 1.2872, + "step": 50764 + }, + { + "epoch": 0.05, + "learning_rate": 4.968392114390005e-05, + "loss": 1.4421, + "step": 50765 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683908692929427e-05, + "loss": 1.4017, + "step": 50766 + }, + { + "epoch": 0.05, + "learning_rate": 4.968389624171514e-05, + "loss": 0.8728, + "step": 50767 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683883790257176e-05, + "loss": 1.3159, + "step": 50768 + }, + { + "epoch": 0.05, + "learning_rate": 4.968387133855555e-05, + "loss": 1.4579, + "step": 50769 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683858886610256e-05, + "loss": 1.3466, + "step": 50770 + }, + { + "epoch": 0.05, + "learning_rate": 4.968384643442129e-05, + "loss": 0.9025, + "step": 50771 + }, + { + "epoch": 0.05, + "learning_rate": 4.968383398198866e-05, + "loss": 1.1325, + "step": 50772 + }, + { + "epoch": 0.05, + "learning_rate": 4.968382152931236e-05, + "loss": 1.0077, + "step": 50773 + }, + { + "epoch": 0.05, + "learning_rate": 4.968380907639238e-05, + "loss": 1.3243, + "step": 50774 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683796623228746e-05, + "loss": 1.2161, + "step": 50775 + }, + { + "epoch": 0.05, + "learning_rate": 4.968378416982144e-05, + "loss": 0.8525, + "step": 50776 + }, + { + "epoch": 0.05, + "learning_rate": 4.968377171617047e-05, + "loss": 0.9308, + "step": 50777 + }, + { + "epoch": 0.05, + "learning_rate": 4.968375926227582e-05, + "loss": 0.7633, + "step": 50778 + }, + { + "epoch": 0.05, + "learning_rate": 4.968374680813751e-05, + "loss": 0.7785, + "step": 50779 + }, + { + "epoch": 0.05, + "learning_rate": 4.968373435375554e-05, + "loss": 0.7425, + "step": 50780 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683721899129896e-05, + "loss": 1.2356, + "step": 50781 + }, + { + "epoch": 0.05, + "learning_rate": 4.968370944426058e-05, + "loss": 0.9945, + "step": 50782 + }, + { + "epoch": 0.05, + "learning_rate": 4.96836969891476e-05, + "loss": 0.8024, + "step": 50783 + }, + { + "epoch": 0.05, + "learning_rate": 4.968368453379095e-05, + "loss": 1.2818, + "step": 50784 + }, + { + "epoch": 0.05, + "learning_rate": 4.968367207819064e-05, + "loss": 1.1973, + "step": 50785 + }, + { + "epoch": 0.05, + "learning_rate": 4.968365962234666e-05, + "loss": 1.1422, + "step": 50786 + }, + { + "epoch": 0.05, + "learning_rate": 4.968364716625902e-05, + "loss": 1.2474, + "step": 50787 + }, + { + "epoch": 0.05, + "learning_rate": 4.96836347099277e-05, + "loss": 1.0883, + "step": 50788 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683622253352725e-05, + "loss": 1.1235, + "step": 50789 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683609796534074e-05, + "loss": 0.9153, + "step": 50790 + }, + { + "epoch": 0.05, + "learning_rate": 4.968359733947177e-05, + "loss": 1.1252, + "step": 50791 + }, + { + "epoch": 0.05, + "learning_rate": 4.968358488216578e-05, + "loss": 1.3719, + "step": 50792 + }, + { + "epoch": 0.05, + "learning_rate": 4.968357242461614e-05, + "loss": 1.0248, + "step": 50793 + }, + { + "epoch": 0.05, + "learning_rate": 4.968355996682283e-05, + "loss": 0.3844, + "step": 50794 + }, + { + "epoch": 0.05, + "learning_rate": 4.968354750878585e-05, + "loss": 0.9308, + "step": 50795 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683535050505216e-05, + "loss": 1.3793, + "step": 50796 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683522591980905e-05, + "loss": 1.1254, + "step": 50797 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683510133212926e-05, + "loss": 0.7462, + "step": 50798 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683497674201293e-05, + "loss": 0.8863, + "step": 50799 + }, + { + "epoch": 0.05, + "learning_rate": 4.968348521494599e-05, + "loss": 1.2496, + "step": 50800 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683472755447025e-05, + "loss": 1.2458, + "step": 50801 + }, + { + "epoch": 0.05, + "learning_rate": 4.968346029570439e-05, + "loss": 1.2618, + "step": 50802 + }, + { + "epoch": 0.05, + "learning_rate": 4.96834478357181e-05, + "loss": 1.1046, + "step": 50803 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683435375488135e-05, + "loss": 1.3834, + "step": 50804 + }, + { + "epoch": 0.05, + "learning_rate": 4.968342291501451e-05, + "loss": 1.0878, + "step": 50805 + }, + { + "epoch": 0.05, + "learning_rate": 4.968341045429722e-05, + "loss": 0.6652, + "step": 50806 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683397993336264e-05, + "loss": 0.3433, + "step": 50807 + }, + { + "epoch": 0.05, + "learning_rate": 4.968338553213164e-05, + "loss": 0.4327, + "step": 50808 + }, + { + "epoch": 0.05, + "learning_rate": 4.968337307068336e-05, + "loss": 0.3267, + "step": 50809 + }, + { + "epoch": 0.05, + "learning_rate": 4.968336060899142e-05, + "loss": 1.087, + "step": 50810 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683348147055806e-05, + "loss": 1.2937, + "step": 50811 + }, + { + "epoch": 0.05, + "learning_rate": 4.968333568487653e-05, + "loss": 1.3539, + "step": 50812 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683323222453595e-05, + "loss": 1.1113, + "step": 50813 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683310759786995e-05, + "loss": 1.1592, + "step": 50814 + }, + { + "epoch": 0.05, + "learning_rate": 4.968329829687673e-05, + "loss": 0.9485, + "step": 50815 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683285833722805e-05, + "loss": 1.3216, + "step": 50816 + }, + { + "epoch": 0.05, + "learning_rate": 4.968327337032521e-05, + "loss": 0.9766, + "step": 50817 + }, + { + "epoch": 0.05, + "learning_rate": 4.968326090668396e-05, + "loss": 1.2029, + "step": 50818 + }, + { + "epoch": 0.05, + "learning_rate": 4.968324844279905e-05, + "loss": 1.3867, + "step": 50819 + }, + { + "epoch": 0.05, + "learning_rate": 4.968323597867046e-05, + "loss": 1.1287, + "step": 50820 + }, + { + "epoch": 0.05, + "learning_rate": 4.968322351429823e-05, + "loss": 1.1136, + "step": 50821 + }, + { + "epoch": 0.05, + "learning_rate": 4.968321104968232e-05, + "loss": 1.2897, + "step": 50822 + }, + { + "epoch": 0.05, + "learning_rate": 4.968319858482275e-05, + "loss": 1.6617, + "step": 50823 + }, + { + "epoch": 0.05, + "learning_rate": 4.968318611971953e-05, + "loss": 1.2159, + "step": 50824 + }, + { + "epoch": 0.05, + "learning_rate": 4.968317365437264e-05, + "loss": 1.1737, + "step": 50825 + }, + { + "epoch": 0.05, + "learning_rate": 4.968316118878209e-05, + "loss": 1.2414, + "step": 50826 + }, + { + "epoch": 0.05, + "learning_rate": 4.968314872294787e-05, + "loss": 0.9421, + "step": 50827 + }, + { + "epoch": 0.05, + "learning_rate": 4.968313625687e-05, + "loss": 1.2881, + "step": 50828 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683123790548455e-05, + "loss": 1.324, + "step": 50829 + }, + { + "epoch": 0.05, + "learning_rate": 4.968311132398327e-05, + "loss": 1.0784, + "step": 50830 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683098857174405e-05, + "loss": 1.3148, + "step": 50831 + }, + { + "epoch": 0.05, + "learning_rate": 4.968308639012188e-05, + "loss": 1.2218, + "step": 50832 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683073922825704e-05, + "loss": 1.4796, + "step": 50833 + }, + { + "epoch": 0.05, + "learning_rate": 4.968306145528586e-05, + "loss": 1.6267, + "step": 50834 + }, + { + "epoch": 0.05, + "learning_rate": 4.968304898750235e-05, + "loss": 1.4163, + "step": 50835 + }, + { + "epoch": 0.05, + "learning_rate": 4.968303651947519e-05, + "loss": 2.2022, + "step": 50836 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683024051204366e-05, + "loss": 1.9747, + "step": 50837 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683011582689884e-05, + "loss": 1.3457, + "step": 50838 + }, + { + "epoch": 0.05, + "learning_rate": 4.968299911393174e-05, + "loss": 1.2464, + "step": 50839 + }, + { + "epoch": 0.05, + "learning_rate": 4.968298664492994e-05, + "loss": 1.2881, + "step": 50840 + }, + { + "epoch": 0.05, + "learning_rate": 4.968297417568447e-05, + "loss": 1.2321, + "step": 50841 + }, + { + "epoch": 0.05, + "learning_rate": 4.968296170619534e-05, + "loss": 1.2366, + "step": 50842 + }, + { + "epoch": 0.05, + "learning_rate": 4.968294923646256e-05, + "loss": 1.1825, + "step": 50843 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682936766486114e-05, + "loss": 0.9497, + "step": 50844 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682924296266006e-05, + "loss": 1.2778, + "step": 50845 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682911825802244e-05, + "loss": 0.7652, + "step": 50846 + }, + { + "epoch": 0.05, + "learning_rate": 4.968289935509483e-05, + "loss": 1.1666, + "step": 50847 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682886884143744e-05, + "loss": 1.3905, + "step": 50848 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682874412949e-05, + "loss": 1.3286, + "step": 50849 + }, + { + "epoch": 0.05, + "learning_rate": 4.968286194151061e-05, + "loss": 1.0613, + "step": 50850 + }, + { + "epoch": 0.05, + "learning_rate": 4.968284946982854e-05, + "loss": 1.2764, + "step": 50851 + }, + { + "epoch": 0.05, + "learning_rate": 4.968283699790283e-05, + "loss": 1.1537, + "step": 50852 + }, + { + "epoch": 0.05, + "learning_rate": 4.968282452573345e-05, + "loss": 0.8506, + "step": 50853 + }, + { + "epoch": 0.05, + "learning_rate": 4.968281205332042e-05, + "loss": 0.8206, + "step": 50854 + }, + { + "epoch": 0.05, + "learning_rate": 4.968279958066372e-05, + "loss": 1.2353, + "step": 50855 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682787107763376e-05, + "loss": 1.2362, + "step": 50856 + }, + { + "epoch": 0.05, + "learning_rate": 4.968277463461936e-05, + "loss": 1.3987, + "step": 50857 + }, + { + "epoch": 0.05, + "learning_rate": 4.968276216123169e-05, + "loss": 1.3165, + "step": 50858 + }, + { + "epoch": 0.05, + "learning_rate": 4.968274968760037e-05, + "loss": 1.1854, + "step": 50859 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682737213725385e-05, + "loss": 0.7144, + "step": 50860 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682724739606744e-05, + "loss": 1.1777, + "step": 50861 + }, + { + "epoch": 0.05, + "learning_rate": 4.968271226524445e-05, + "loss": 1.2108, + "step": 50862 + }, + { + "epoch": 0.05, + "learning_rate": 4.96826997906385e-05, + "loss": 1.1526, + "step": 50863 + }, + { + "epoch": 0.05, + "learning_rate": 4.968268731578888e-05, + "loss": 1.3006, + "step": 50864 + }, + { + "epoch": 0.05, + "learning_rate": 4.968267484069561e-05, + "loss": 0.9912, + "step": 50865 + }, + { + "epoch": 0.05, + "learning_rate": 4.968266236535869e-05, + "loss": 1.1653, + "step": 50866 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682649889778104e-05, + "loss": 1.1836, + "step": 50867 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682637413953864e-05, + "loss": 0.7928, + "step": 50868 + }, + { + "epoch": 0.05, + "learning_rate": 4.968262493788597e-05, + "loss": 0.6318, + "step": 50869 + }, + { + "epoch": 0.05, + "learning_rate": 4.968261246157442e-05, + "loss": 1.2714, + "step": 50870 + }, + { + "epoch": 0.05, + "learning_rate": 4.968259998501921e-05, + "loss": 1.378, + "step": 50871 + }, + { + "epoch": 0.05, + "learning_rate": 4.968258750822034e-05, + "loss": 1.3262, + "step": 50872 + }, + { + "epoch": 0.05, + "learning_rate": 4.968257503117782e-05, + "loss": 1.0094, + "step": 50873 + }, + { + "epoch": 0.05, + "learning_rate": 4.968256255389164e-05, + "loss": 1.0979, + "step": 50874 + }, + { + "epoch": 0.05, + "learning_rate": 4.968255007636181e-05, + "loss": 1.0309, + "step": 50875 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682537598588316e-05, + "loss": 1.2385, + "step": 50876 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682525120571175e-05, + "loss": 1.2336, + "step": 50877 + }, + { + "epoch": 0.05, + "learning_rate": 4.968251264231037e-05, + "loss": 1.0073, + "step": 50878 + }, + { + "epoch": 0.05, + "learning_rate": 4.968250016380592e-05, + "loss": 1.1768, + "step": 50879 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682487685057813e-05, + "loss": 1.5068, + "step": 50880 + }, + { + "epoch": 0.05, + "learning_rate": 4.968247520606604e-05, + "loss": 0.9201, + "step": 50881 + }, + { + "epoch": 0.05, + "learning_rate": 4.968246272683062e-05, + "loss": 0.8493, + "step": 50882 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682450247351544e-05, + "loss": 1.0012, + "step": 50883 + }, + { + "epoch": 0.05, + "learning_rate": 4.968243776762882e-05, + "loss": 1.5085, + "step": 50884 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682425287662423e-05, + "loss": 1.5079, + "step": 50885 + }, + { + "epoch": 0.05, + "learning_rate": 4.968241280745239e-05, + "loss": 1.3053, + "step": 50886 + }, + { + "epoch": 0.05, + "learning_rate": 4.968240032699869e-05, + "loss": 1.1977, + "step": 50887 + }, + { + "epoch": 0.05, + "learning_rate": 4.968238784630135e-05, + "loss": 1.1159, + "step": 50888 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682375365360346e-05, + "loss": 0.8714, + "step": 50889 + }, + { + "epoch": 0.05, + "learning_rate": 4.968236288417569e-05, + "loss": 0.9208, + "step": 50890 + }, + { + "epoch": 0.05, + "learning_rate": 4.968235040274738e-05, + "loss": 0.4857, + "step": 50891 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682337921075416e-05, + "loss": 0.9742, + "step": 50892 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682325439159795e-05, + "loss": 0.9281, + "step": 50893 + }, + { + "epoch": 0.05, + "learning_rate": 4.968231295700052e-05, + "loss": 1.2124, + "step": 50894 + }, + { + "epoch": 0.05, + "learning_rate": 4.96823004745976e-05, + "loss": 1.1833, + "step": 50895 + }, + { + "epoch": 0.05, + "learning_rate": 4.968228799195102e-05, + "loss": 1.2468, + "step": 50896 + }, + { + "epoch": 0.05, + "learning_rate": 4.968227550906079e-05, + "loss": 1.5092, + "step": 50897 + }, + { + "epoch": 0.05, + "learning_rate": 4.968226302592691e-05, + "loss": 1.8816, + "step": 50898 + }, + { + "epoch": 0.05, + "learning_rate": 4.968225054254937e-05, + "loss": 0.9462, + "step": 50899 + }, + { + "epoch": 0.05, + "learning_rate": 4.968223805892818e-05, + "loss": 1.1171, + "step": 50900 + }, + { + "epoch": 0.05, + "learning_rate": 4.968222557506334e-05, + "loss": 1.195, + "step": 50901 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682213090954844e-05, + "loss": 1.2981, + "step": 50902 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682200606602696e-05, + "loss": 1.1339, + "step": 50903 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682188122006894e-05, + "loss": 1.1386, + "step": 50904 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682175637167445e-05, + "loss": 0.996, + "step": 50905 + }, + { + "epoch": 0.05, + "learning_rate": 4.968216315208434e-05, + "loss": 0.4509, + "step": 50906 + }, + { + "epoch": 0.05, + "learning_rate": 4.968215066675759e-05, + "loss": 0.2577, + "step": 50907 + }, + { + "epoch": 0.05, + "learning_rate": 4.968213818118718e-05, + "loss": 0.4026, + "step": 50908 + }, + { + "epoch": 0.05, + "learning_rate": 4.968212569537312e-05, + "loss": 1.0218, + "step": 50909 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682113209315406e-05, + "loss": 1.282, + "step": 50910 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682100723014046e-05, + "loss": 1.2368, + "step": 50911 + }, + { + "epoch": 0.05, + "learning_rate": 4.968208823646903e-05, + "loss": 1.1369, + "step": 50912 + }, + { + "epoch": 0.05, + "learning_rate": 4.968207574968037e-05, + "loss": 1.2487, + "step": 50913 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682063262648046e-05, + "loss": 1.4574, + "step": 50914 + }, + { + "epoch": 0.05, + "learning_rate": 4.968205077537208e-05, + "loss": 1.493, + "step": 50915 + }, + { + "epoch": 0.05, + "learning_rate": 4.9682038287852465e-05, + "loss": 1.0257, + "step": 50916 + }, + { + "epoch": 0.05, + "learning_rate": 4.96820258000892e-05, + "loss": 1.0253, + "step": 50917 + }, + { + "epoch": 0.05, + "learning_rate": 4.968201331208228e-05, + "loss": 1.2529, + "step": 50918 + }, + { + "epoch": 0.05, + "learning_rate": 4.968200082383171e-05, + "loss": 1.3915, + "step": 50919 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681988335337495e-05, + "loss": 1.4504, + "step": 50920 + }, + { + "epoch": 0.05, + "learning_rate": 4.968197584659962e-05, + "loss": 1.0841, + "step": 50921 + }, + { + "epoch": 0.05, + "learning_rate": 4.96819633576181e-05, + "loss": 1.021, + "step": 50922 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681950868392936e-05, + "loss": 1.4769, + "step": 50923 + }, + { + "epoch": 0.05, + "learning_rate": 4.968193837892411e-05, + "loss": 1.3686, + "step": 50924 + }, + { + "epoch": 0.05, + "learning_rate": 4.968192588921164e-05, + "loss": 1.2061, + "step": 50925 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681913399255524e-05, + "loss": 1.0888, + "step": 50926 + }, + { + "epoch": 0.05, + "learning_rate": 4.968190090905576e-05, + "loss": 0.7836, + "step": 50927 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681888418612346e-05, + "loss": 0.8779, + "step": 50928 + }, + { + "epoch": 0.05, + "learning_rate": 4.968187592792527e-05, + "loss": 1.2651, + "step": 50929 + }, + { + "epoch": 0.05, + "learning_rate": 4.968186343699456e-05, + "loss": 1.32, + "step": 50930 + }, + { + "epoch": 0.05, + "learning_rate": 4.96818509458202e-05, + "loss": 1.2343, + "step": 50931 + }, + { + "epoch": 0.05, + "learning_rate": 4.968183845440218e-05, + "loss": 1.1697, + "step": 50932 + }, + { + "epoch": 0.05, + "learning_rate": 4.968182596274051e-05, + "loss": 1.1143, + "step": 50933 + }, + { + "epoch": 0.05, + "learning_rate": 4.96818134708352e-05, + "loss": 1.2898, + "step": 50934 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681800978686244e-05, + "loss": 1.4461, + "step": 50935 + }, + { + "epoch": 0.05, + "learning_rate": 4.968178848629364e-05, + "loss": 1.0953, + "step": 50936 + }, + { + "epoch": 0.05, + "learning_rate": 4.968177599365739e-05, + "loss": 1.0214, + "step": 50937 + }, + { + "epoch": 0.05, + "learning_rate": 4.968176350077748e-05, + "loss": 1.1369, + "step": 50938 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681751007653934e-05, + "loss": 0.918, + "step": 50939 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681738514286726e-05, + "loss": 0.9514, + "step": 50940 + }, + { + "epoch": 0.05, + "learning_rate": 4.968172602067588e-05, + "loss": 1.1774, + "step": 50941 + }, + { + "epoch": 0.05, + "learning_rate": 4.968171352682138e-05, + "loss": 2.2631, + "step": 50942 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681701032723246e-05, + "loss": 1.5238, + "step": 50943 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681688538381456e-05, + "loss": 1.0035, + "step": 50944 + }, + { + "epoch": 0.05, + "learning_rate": 4.968167604379602e-05, + "loss": 0.9361, + "step": 50945 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681663548966926e-05, + "loss": 0.7093, + "step": 50946 + }, + { + "epoch": 0.05, + "learning_rate": 4.96816510538942e-05, + "loss": 1.2162, + "step": 50947 + }, + { + "epoch": 0.05, + "learning_rate": 4.968163855857783e-05, + "loss": 1.2576, + "step": 50948 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681626063017794e-05, + "loss": 0.9496, + "step": 50949 + }, + { + "epoch": 0.05, + "learning_rate": 4.968161356721413e-05, + "loss": 1.079, + "step": 50950 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681601071166805e-05, + "loss": 1.5977, + "step": 50951 + }, + { + "epoch": 0.05, + "learning_rate": 4.968158857487585e-05, + "loss": 1.1165, + "step": 50952 + }, + { + "epoch": 0.05, + "learning_rate": 4.968157607834123e-05, + "loss": 1.2349, + "step": 50953 + }, + { + "epoch": 0.05, + "learning_rate": 4.968156358156297e-05, + "loss": 1.3493, + "step": 50954 + }, + { + "epoch": 0.05, + "learning_rate": 4.968155108454108e-05, + "loss": 1.3811, + "step": 50955 + }, + { + "epoch": 0.05, + "learning_rate": 4.968153858727552e-05, + "loss": 1.3996, + "step": 50956 + }, + { + "epoch": 0.05, + "learning_rate": 4.968152608976633e-05, + "loss": 1.2265, + "step": 50957 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681513592013496e-05, + "loss": 0.9828, + "step": 50958 + }, + { + "epoch": 0.05, + "learning_rate": 4.968150109401701e-05, + "loss": 1.0315, + "step": 50959 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681488595776875e-05, + "loss": 0.7663, + "step": 50960 + }, + { + "epoch": 0.05, + "learning_rate": 4.96814760972931e-05, + "loss": 1.1719, + "step": 50961 + }, + { + "epoch": 0.05, + "learning_rate": 4.968146359856568e-05, + "loss": 1.713, + "step": 50962 + }, + { + "epoch": 0.05, + "learning_rate": 4.968145109959461e-05, + "loss": 1.345, + "step": 50963 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681438600379906e-05, + "loss": 1.0975, + "step": 50964 + }, + { + "epoch": 0.05, + "learning_rate": 4.968142610092155e-05, + "loss": 1.3571, + "step": 50965 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681413601219545e-05, + "loss": 1.2651, + "step": 50966 + }, + { + "epoch": 0.05, + "learning_rate": 4.96814011012739e-05, + "loss": 1.2876, + "step": 50967 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681388601084614e-05, + "loss": 1.1031, + "step": 50968 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681376100651674e-05, + "loss": 1.0482, + "step": 50969 + }, + { + "epoch": 0.05, + "learning_rate": 4.96813635999751e-05, + "loss": 1.3125, + "step": 50970 + }, + { + "epoch": 0.05, + "learning_rate": 4.968135109905488e-05, + "loss": 0.8321, + "step": 50971 + }, + { + "epoch": 0.05, + "learning_rate": 4.968133859789101e-05, + "loss": 0.8595, + "step": 50972 + }, + { + "epoch": 0.05, + "learning_rate": 4.96813260964835e-05, + "loss": 1.2502, + "step": 50973 + }, + { + "epoch": 0.05, + "learning_rate": 4.968131359483235e-05, + "loss": 0.9556, + "step": 50974 + }, + { + "epoch": 0.05, + "learning_rate": 4.968130109293756e-05, + "loss": 1.2349, + "step": 50975 + }, + { + "epoch": 0.05, + "learning_rate": 4.968128859079911e-05, + "loss": 0.4365, + "step": 50976 + }, + { + "epoch": 0.05, + "learning_rate": 4.968127608841703e-05, + "loss": 0.6308, + "step": 50977 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681263585791303e-05, + "loss": 1.1453, + "step": 50978 + }, + { + "epoch": 0.05, + "learning_rate": 4.968125108292193e-05, + "loss": 1.0357, + "step": 50979 + }, + { + "epoch": 0.05, + "learning_rate": 4.968123857980892e-05, + "loss": 1.9318, + "step": 50980 + }, + { + "epoch": 0.05, + "learning_rate": 4.968122607645227e-05, + "loss": 1.0439, + "step": 50981 + }, + { + "epoch": 0.05, + "learning_rate": 4.968121357285197e-05, + "loss": 1.1301, + "step": 50982 + }, + { + "epoch": 0.05, + "learning_rate": 4.968120106900803e-05, + "loss": 1.1981, + "step": 50983 + }, + { + "epoch": 0.05, + "learning_rate": 4.968118856492045e-05, + "loss": 1.0169, + "step": 50984 + }, + { + "epoch": 0.05, + "learning_rate": 4.968117606058922e-05, + "loss": 1.1031, + "step": 50985 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681163556014355e-05, + "loss": 1.1343, + "step": 50986 + }, + { + "epoch": 0.05, + "learning_rate": 4.968115105119584e-05, + "loss": 2.2069, + "step": 50987 + }, + { + "epoch": 0.05, + "learning_rate": 4.968113854613369e-05, + "loss": 5.5721, + "step": 50988 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681126040827895e-05, + "loss": 2.9299, + "step": 50989 + }, + { + "epoch": 0.05, + "learning_rate": 4.968111353527847e-05, + "loss": 1.3713, + "step": 50990 + }, + { + "epoch": 0.05, + "learning_rate": 4.968110102948539e-05, + "loss": 0.8701, + "step": 50991 + }, + { + "epoch": 0.05, + "learning_rate": 4.968108852344867e-05, + "loss": 1.3971, + "step": 50992 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681076017168316e-05, + "loss": 1.1494, + "step": 50993 + }, + { + "epoch": 0.05, + "learning_rate": 4.968106351064431e-05, + "loss": 1.091, + "step": 50994 + }, + { + "epoch": 0.05, + "learning_rate": 4.968105100387667e-05, + "loss": 1.0458, + "step": 50995 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681038496865385e-05, + "loss": 0.5869, + "step": 50996 + }, + { + "epoch": 0.05, + "learning_rate": 4.968102598961046e-05, + "loss": 1.1243, + "step": 50997 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681013482111905e-05, + "loss": 1.1364, + "step": 50998 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681000974369695e-05, + "loss": 1.2243, + "step": 50999 + }, + { + "epoch": 0.05, + "learning_rate": 4.968098846638385e-05, + "loss": 1.1812, + "step": 51000 + }, + { + "epoch": 0.05, + "eval_loss": 1.0772169828414917, + "eval_runtime": 126.8045, + "eval_samples_per_second": 10.922, + "eval_steps_per_second": 5.465, + "step": 51000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680975958154365e-05, + "loss": 1.0187, + "step": 51001 + }, + { + "epoch": 0.05, + "learning_rate": 4.968096344968124e-05, + "loss": 0.9657, + "step": 51002 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680950940964474e-05, + "loss": 0.9899, + "step": 51003 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680938432004074e-05, + "loss": 0.7999, + "step": 51004 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680925922800026e-05, + "loss": 0.8354, + "step": 51005 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680913413352345e-05, + "loss": 1.0303, + "step": 51006 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680900903661024e-05, + "loss": 0.8671, + "step": 51007 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680888393726055e-05, + "loss": 0.9649, + "step": 51008 + }, + { + "epoch": 0.05, + "learning_rate": 4.968087588354745e-05, + "loss": 1.1723, + "step": 51009 + }, + { + "epoch": 0.05, + "learning_rate": 4.968086337312521e-05, + "loss": 1.0499, + "step": 51010 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680850862459325e-05, + "loss": 1.1473, + "step": 51011 + }, + { + "epoch": 0.05, + "learning_rate": 4.968083835154981e-05, + "loss": 0.9966, + "step": 51012 + }, + { + "epoch": 0.05, + "learning_rate": 4.968082584039665e-05, + "loss": 1.1618, + "step": 51013 + }, + { + "epoch": 0.05, + "learning_rate": 4.968081332899985e-05, + "loss": 1.1192, + "step": 51014 + }, + { + "epoch": 0.05, + "learning_rate": 4.968080081735941e-05, + "loss": 1.2199, + "step": 51015 + }, + { + "epoch": 0.05, + "learning_rate": 4.968078830547533e-05, + "loss": 0.9783, + "step": 51016 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680775793347625e-05, + "loss": 0.9785, + "step": 51017 + }, + { + "epoch": 0.05, + "learning_rate": 4.968076328097627e-05, + "loss": 1.4172, + "step": 51018 + }, + { + "epoch": 0.05, + "learning_rate": 4.968075076836128e-05, + "loss": 1.1986, + "step": 51019 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680738255502655e-05, + "loss": 1.3663, + "step": 51020 + }, + { + "epoch": 0.05, + "learning_rate": 4.968072574240039e-05, + "loss": 1.319, + "step": 51021 + }, + { + "epoch": 0.05, + "learning_rate": 4.968071322905449e-05, + "loss": 1.1201, + "step": 51022 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680700715464946e-05, + "loss": 1.393, + "step": 51023 + }, + { + "epoch": 0.05, + "learning_rate": 4.968068820163176e-05, + "loss": 0.5786, + "step": 51024 + }, + { + "epoch": 0.05, + "learning_rate": 4.968067568755496e-05, + "loss": 0.2641, + "step": 51025 + }, + { + "epoch": 0.05, + "learning_rate": 4.96806631732345e-05, + "loss": 0.528, + "step": 51026 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680650658670414e-05, + "loss": 0.9759, + "step": 51027 + }, + { + "epoch": 0.05, + "learning_rate": 4.968063814386268e-05, + "loss": 1.2404, + "step": 51028 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680625628811315e-05, + "loss": 1.038, + "step": 51029 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680613113516315e-05, + "loss": 0.9967, + "step": 51030 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680600597977675e-05, + "loss": 1.1677, + "step": 51031 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680588082195414e-05, + "loss": 1.4002, + "step": 51032 + }, + { + "epoch": 0.05, + "learning_rate": 4.96805755661695e-05, + "loss": 1.2451, + "step": 51033 + }, + { + "epoch": 0.05, + "learning_rate": 4.968056304989995e-05, + "loss": 1.22, + "step": 51034 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680550533386774e-05, + "loss": 1.2907, + "step": 51035 + }, + { + "epoch": 0.05, + "learning_rate": 4.968053801662995e-05, + "loss": 1.0201, + "step": 51036 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680525499629494e-05, + "loss": 1.352, + "step": 51037 + }, + { + "epoch": 0.05, + "learning_rate": 4.968051298238541e-05, + "loss": 1.0539, + "step": 51038 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680500464897685e-05, + "loss": 1.1382, + "step": 51039 + }, + { + "epoch": 0.05, + "learning_rate": 4.968048794716632e-05, + "loss": 2.0848, + "step": 51040 + }, + { + "epoch": 0.05, + "learning_rate": 4.968047542919132e-05, + "loss": 0.9598, + "step": 51041 + }, + { + "epoch": 0.05, + "learning_rate": 4.968046291097269e-05, + "loss": 0.6702, + "step": 51042 + }, + { + "epoch": 0.05, + "learning_rate": 4.968045039251042e-05, + "loss": 1.4868, + "step": 51043 + }, + { + "epoch": 0.05, + "learning_rate": 4.968043787380452e-05, + "loss": 1.0412, + "step": 51044 + }, + { + "epoch": 0.05, + "learning_rate": 4.968042535485499e-05, + "loss": 1.1093, + "step": 51045 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680412835661814e-05, + "loss": 1.1818, + "step": 51046 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680400316225005e-05, + "loss": 1.3554, + "step": 51047 + }, + { + "epoch": 0.05, + "learning_rate": 4.968038779654457e-05, + "loss": 0.9076, + "step": 51048 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680375276620494e-05, + "loss": 0.7492, + "step": 51049 + }, + { + "epoch": 0.05, + "learning_rate": 4.968036275645278e-05, + "loss": 1.1089, + "step": 51050 + }, + { + "epoch": 0.05, + "learning_rate": 4.968035023604144e-05, + "loss": 1.0522, + "step": 51051 + }, + { + "epoch": 0.05, + "learning_rate": 4.968033771538646e-05, + "loss": 1.2026, + "step": 51052 + }, + { + "epoch": 0.05, + "learning_rate": 4.968032519448785e-05, + "loss": 0.5533, + "step": 51053 + }, + { + "epoch": 0.05, + "learning_rate": 4.968031267334561e-05, + "loss": 1.2418, + "step": 51054 + }, + { + "epoch": 0.05, + "learning_rate": 4.968030015195972e-05, + "loss": 1.467, + "step": 51055 + }, + { + "epoch": 0.05, + "learning_rate": 4.968028763033021e-05, + "loss": 1.0677, + "step": 51056 + }, + { + "epoch": 0.05, + "learning_rate": 4.968027510845707e-05, + "loss": 0.7095, + "step": 51057 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680262586340284e-05, + "loss": 1.1701, + "step": 51058 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680250063979884e-05, + "loss": 1.0655, + "step": 51059 + }, + { + "epoch": 0.05, + "learning_rate": 4.968023754137584e-05, + "loss": 1.1065, + "step": 51060 + }, + { + "epoch": 0.05, + "learning_rate": 4.968022501852816e-05, + "loss": 1.5136, + "step": 51061 + }, + { + "epoch": 0.05, + "learning_rate": 4.968021249543685e-05, + "loss": 1.051, + "step": 51062 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680199972101905e-05, + "loss": 1.0462, + "step": 51063 + }, + { + "epoch": 0.05, + "learning_rate": 4.968018744852332e-05, + "loss": 0.9805, + "step": 51064 + }, + { + "epoch": 0.05, + "learning_rate": 4.968017492470112e-05, + "loss": 1.2191, + "step": 51065 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680162400635276e-05, + "loss": 1.0385, + "step": 51066 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680149876325806e-05, + "loss": 1.2659, + "step": 51067 + }, + { + "epoch": 0.05, + "learning_rate": 4.96801373517727e-05, + "loss": 1.1055, + "step": 51068 + }, + { + "epoch": 0.05, + "learning_rate": 4.968012482697596e-05, + "loss": 1.0791, + "step": 51069 + }, + { + "epoch": 0.05, + "learning_rate": 4.968011230193559e-05, + "loss": 1.0505, + "step": 51070 + }, + { + "epoch": 0.05, + "learning_rate": 4.968009977665159e-05, + "loss": 0.955, + "step": 51071 + }, + { + "epoch": 0.05, + "learning_rate": 4.968008725112396e-05, + "loss": 1.2262, + "step": 51072 + }, + { + "epoch": 0.05, + "learning_rate": 4.96800747253527e-05, + "loss": 1.3425, + "step": 51073 + }, + { + "epoch": 0.05, + "learning_rate": 4.968006219933781e-05, + "loss": 1.8073, + "step": 51074 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680049673079274e-05, + "loss": 1.4001, + "step": 51075 + }, + { + "epoch": 0.05, + "learning_rate": 4.968003714657712e-05, + "loss": 1.424, + "step": 51076 + }, + { + "epoch": 0.05, + "learning_rate": 4.968002461983133e-05, + "loss": 1.097, + "step": 51077 + }, + { + "epoch": 0.05, + "learning_rate": 4.968001209284192e-05, + "loss": 1.1353, + "step": 51078 + }, + { + "epoch": 0.05, + "learning_rate": 4.967999956560886e-05, + "loss": 1.4989, + "step": 51079 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679987038132186e-05, + "loss": 1.316, + "step": 51080 + }, + { + "epoch": 0.05, + "learning_rate": 4.967997451041188e-05, + "loss": 0.9828, + "step": 51081 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679961982447934e-05, + "loss": 1.1016, + "step": 51082 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679949454240364e-05, + "loss": 0.4869, + "step": 51083 + }, + { + "epoch": 0.05, + "learning_rate": 4.967993692578916e-05, + "loss": 0.9535, + "step": 51084 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679924397094335e-05, + "loss": 1.11, + "step": 51085 + }, + { + "epoch": 0.05, + "learning_rate": 4.967991186815587e-05, + "loss": 1.2258, + "step": 51086 + }, + { + "epoch": 0.05, + "learning_rate": 4.967989933897378e-05, + "loss": 1.2692, + "step": 51087 + }, + { + "epoch": 0.05, + "learning_rate": 4.967988680954806e-05, + "loss": 1.1446, + "step": 51088 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679874279878714e-05, + "loss": 1.6347, + "step": 51089 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679861749965735e-05, + "loss": 1.1313, + "step": 51090 + }, + { + "epoch": 0.05, + "learning_rate": 4.967984921980913e-05, + "loss": 1.1998, + "step": 51091 + }, + { + "epoch": 0.05, + "learning_rate": 4.967983668940889e-05, + "loss": 1.3673, + "step": 51092 + }, + { + "epoch": 0.05, + "learning_rate": 4.967982415876502e-05, + "loss": 0.9154, + "step": 51093 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679811627877526e-05, + "loss": 0.9972, + "step": 51094 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679799096746405e-05, + "loss": 0.6865, + "step": 51095 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679786565371656e-05, + "loss": 0.9716, + "step": 51096 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679774033753274e-05, + "loss": 1.1187, + "step": 51097 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679761501891265e-05, + "loss": 0.6148, + "step": 51098 + }, + { + "epoch": 0.05, + "learning_rate": 4.967974896978563e-05, + "loss": 1.4177, + "step": 51099 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679736437436366e-05, + "loss": 1.3852, + "step": 51100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679723904843475e-05, + "loss": 0.9843, + "step": 51101 + }, + { + "epoch": 0.05, + "learning_rate": 4.967971137200695e-05, + "loss": 0.9, + "step": 51102 + }, + { + "epoch": 0.05, + "learning_rate": 4.96796988389268e-05, + "loss": 0.3497, + "step": 51103 + }, + { + "epoch": 0.05, + "learning_rate": 4.967968630560302e-05, + "loss": 0.2502, + "step": 51104 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679673772035624e-05, + "loss": 0.1976, + "step": 51105 + }, + { + "epoch": 0.05, + "learning_rate": 4.967966123822459e-05, + "loss": 0.1376, + "step": 51106 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679648704169933e-05, + "loss": 0.3462, + "step": 51107 + }, + { + "epoch": 0.05, + "learning_rate": 4.967963616987165e-05, + "loss": 1.1134, + "step": 51108 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679623635329734e-05, + "loss": 0.8918, + "step": 51109 + }, + { + "epoch": 0.05, + "learning_rate": 4.96796111005442e-05, + "loss": 1.1984, + "step": 51110 + }, + { + "epoch": 0.05, + "learning_rate": 4.967959856551503e-05, + "loss": 1.4285, + "step": 51111 + }, + { + "epoch": 0.05, + "learning_rate": 4.967958603024224e-05, + "loss": 1.3418, + "step": 51112 + }, + { + "epoch": 0.05, + "learning_rate": 4.967957349472582e-05, + "loss": 1.5465, + "step": 51113 + }, + { + "epoch": 0.05, + "learning_rate": 4.967956095896578e-05, + "loss": 1.253, + "step": 51114 + }, + { + "epoch": 0.05, + "learning_rate": 4.967954842296211e-05, + "loss": 0.8489, + "step": 51115 + }, + { + "epoch": 0.05, + "learning_rate": 4.967953588671481e-05, + "loss": 0.9126, + "step": 51116 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679523350223886e-05, + "loss": 1.0982, + "step": 51117 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679510813489336e-05, + "loss": 0.9771, + "step": 51118 + }, + { + "epoch": 0.05, + "learning_rate": 4.967949827651116e-05, + "loss": 1.8101, + "step": 51119 + }, + { + "epoch": 0.05, + "learning_rate": 4.967948573928936e-05, + "loss": 1.2585, + "step": 51120 + }, + { + "epoch": 0.05, + "learning_rate": 4.967947320182393e-05, + "loss": 1.3655, + "step": 51121 + }, + { + "epoch": 0.05, + "learning_rate": 4.967946066411488e-05, + "loss": 1.0539, + "step": 51122 + }, + { + "epoch": 0.05, + "learning_rate": 4.967944812616221e-05, + "loss": 1.3296, + "step": 51123 + }, + { + "epoch": 0.05, + "learning_rate": 4.967943558796591e-05, + "loss": 1.5091, + "step": 51124 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679423049525976e-05, + "loss": 1.1279, + "step": 51125 + }, + { + "epoch": 0.05, + "learning_rate": 4.967941051084243e-05, + "loss": 1.1816, + "step": 51126 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679397971915246e-05, + "loss": 1.4219, + "step": 51127 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679385432744453e-05, + "loss": 1.0928, + "step": 51128 + }, + { + "epoch": 0.05, + "learning_rate": 4.967937289333002e-05, + "loss": 0.9247, + "step": 51129 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679360353671975e-05, + "loss": 0.4011, + "step": 51130 + }, + { + "epoch": 0.05, + "learning_rate": 4.96793478137703e-05, + "loss": 0.2048, + "step": 51131 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679335273625e-05, + "loss": 0.3707, + "step": 51132 + }, + { + "epoch": 0.05, + "learning_rate": 4.967932273323608e-05, + "loss": 0.3529, + "step": 51133 + }, + { + "epoch": 0.05, + "learning_rate": 4.967931019260353e-05, + "loss": 0.2276, + "step": 51134 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679297651727366e-05, + "loss": 1.0658, + "step": 51135 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679285110607564e-05, + "loss": 1.4644, + "step": 51136 + }, + { + "epoch": 0.05, + "learning_rate": 4.967927256924415e-05, + "loss": 1.1449, + "step": 51137 + }, + { + "epoch": 0.05, + "learning_rate": 4.967926002763711e-05, + "loss": 0.9968, + "step": 51138 + }, + { + "epoch": 0.05, + "learning_rate": 4.967924748578645e-05, + "loss": 1.8075, + "step": 51139 + }, + { + "epoch": 0.05, + "learning_rate": 4.967923494369216e-05, + "loss": 1.7016, + "step": 51140 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679222401354253e-05, + "loss": 1.5921, + "step": 51141 + }, + { + "epoch": 0.05, + "learning_rate": 4.967920985877272e-05, + "loss": 0.6285, + "step": 51142 + }, + { + "epoch": 0.05, + "learning_rate": 4.967919731594757e-05, + "loss": 1.0731, + "step": 51143 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679184772878784e-05, + "loss": 1.3896, + "step": 51144 + }, + { + "epoch": 0.05, + "learning_rate": 4.967917222956639e-05, + "loss": 1.3032, + "step": 51145 + }, + { + "epoch": 0.05, + "learning_rate": 4.967915968601036e-05, + "loss": 1.0397, + "step": 51146 + }, + { + "epoch": 0.05, + "learning_rate": 4.967914714221071e-05, + "loss": 0.6098, + "step": 51147 + }, + { + "epoch": 0.05, + "learning_rate": 4.967913459816745e-05, + "loss": 0.6256, + "step": 51148 + }, + { + "epoch": 0.05, + "learning_rate": 4.967912205388056e-05, + "loss": 1.6806, + "step": 51149 + }, + { + "epoch": 0.05, + "learning_rate": 4.967910950935005e-05, + "loss": 1.2765, + "step": 51150 + }, + { + "epoch": 0.05, + "learning_rate": 4.967909696457592e-05, + "loss": 0.8864, + "step": 51151 + }, + { + "epoch": 0.05, + "learning_rate": 4.9679084419558166e-05, + "loss": 0.4949, + "step": 51152 + }, + { + "epoch": 0.05, + "learning_rate": 4.967907187429679e-05, + "loss": 1.165, + "step": 51153 + }, + { + "epoch": 0.05, + "learning_rate": 4.967905932879179e-05, + "loss": 1.062, + "step": 51154 + }, + { + "epoch": 0.05, + "learning_rate": 4.967904678304317e-05, + "loss": 0.8651, + "step": 51155 + }, + { + "epoch": 0.05, + "learning_rate": 4.967903423705093e-05, + "loss": 1.527, + "step": 51156 + }, + { + "epoch": 0.05, + "learning_rate": 4.967902169081506e-05, + "loss": 1.0831, + "step": 51157 + }, + { + "epoch": 0.05, + "learning_rate": 4.967900914433559e-05, + "loss": 1.2524, + "step": 51158 + }, + { + "epoch": 0.05, + "learning_rate": 4.967899659761248e-05, + "loss": 0.9837, + "step": 51159 + }, + { + "epoch": 0.05, + "learning_rate": 4.967898405064576e-05, + "loss": 1.3002, + "step": 51160 + }, + { + "epoch": 0.05, + "learning_rate": 4.967897150343542e-05, + "loss": 1.1991, + "step": 51161 + }, + { + "epoch": 0.05, + "learning_rate": 4.967895895598145e-05, + "loss": 0.8827, + "step": 51162 + }, + { + "epoch": 0.05, + "learning_rate": 4.967894640828387e-05, + "loss": 0.961, + "step": 51163 + }, + { + "epoch": 0.05, + "learning_rate": 4.967893386034266e-05, + "loss": 1.6161, + "step": 51164 + }, + { + "epoch": 0.05, + "learning_rate": 4.967892131215784e-05, + "loss": 0.9892, + "step": 51165 + }, + { + "epoch": 0.05, + "learning_rate": 4.967890876372939e-05, + "loss": 0.9235, + "step": 51166 + }, + { + "epoch": 0.05, + "learning_rate": 4.967889621505733e-05, + "loss": 0.5809, + "step": 51167 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678883666141645e-05, + "loss": 0.8172, + "step": 51168 + }, + { + "epoch": 0.05, + "learning_rate": 4.967887111698234e-05, + "loss": 0.6674, + "step": 51169 + }, + { + "epoch": 0.05, + "learning_rate": 4.967885856757942e-05, + "loss": 1.2598, + "step": 51170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678846017932875e-05, + "loss": 1.1598, + "step": 51171 + }, + { + "epoch": 0.05, + "learning_rate": 4.967883346804271e-05, + "loss": 1.2965, + "step": 51172 + }, + { + "epoch": 0.05, + "learning_rate": 4.967882091790893e-05, + "loss": 1.238, + "step": 51173 + }, + { + "epoch": 0.05, + "learning_rate": 4.967880836753154e-05, + "loss": 0.8515, + "step": 51174 + }, + { + "epoch": 0.05, + "learning_rate": 4.967879581691052e-05, + "loss": 0.8651, + "step": 51175 + }, + { + "epoch": 0.05, + "learning_rate": 4.967878326604588e-05, + "loss": 1.1469, + "step": 51176 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678770714937625e-05, + "loss": 1.2685, + "step": 51177 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678758163585746e-05, + "loss": 0.9574, + "step": 51178 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678745611990255e-05, + "loss": 1.381, + "step": 51179 + }, + { + "epoch": 0.05, + "learning_rate": 4.967873306015115e-05, + "loss": 0.8333, + "step": 51180 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678720508068425e-05, + "loss": 0.9428, + "step": 51181 + }, + { + "epoch": 0.05, + "learning_rate": 4.967870795574207e-05, + "loss": 0.622, + "step": 51182 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678695403172107e-05, + "loss": 0.1731, + "step": 51183 + }, + { + "epoch": 0.05, + "learning_rate": 4.967868285035853e-05, + "loss": 0.7097, + "step": 51184 + }, + { + "epoch": 0.05, + "learning_rate": 4.967867029730132e-05, + "loss": 0.309, + "step": 51185 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678657744000515e-05, + "loss": 0.7936, + "step": 51186 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678645190456075e-05, + "loss": 0.9256, + "step": 51187 + }, + { + "epoch": 0.05, + "learning_rate": 4.967863263666802e-05, + "loss": 1.2712, + "step": 51188 + }, + { + "epoch": 0.05, + "learning_rate": 4.967862008263635e-05, + "loss": 0.8503, + "step": 51189 + }, + { + "epoch": 0.05, + "learning_rate": 4.967860752836107e-05, + "loss": 1.0199, + "step": 51190 + }, + { + "epoch": 0.05, + "learning_rate": 4.967859497384216e-05, + "loss": 1.3425, + "step": 51191 + }, + { + "epoch": 0.05, + "learning_rate": 4.967858241907965e-05, + "loss": 1.2229, + "step": 51192 + }, + { + "epoch": 0.05, + "learning_rate": 4.967856986407351e-05, + "loss": 1.1227, + "step": 51193 + }, + { + "epoch": 0.05, + "learning_rate": 4.967855730882376e-05, + "loss": 1.0078, + "step": 51194 + }, + { + "epoch": 0.05, + "learning_rate": 4.967854475333039e-05, + "loss": 1.3498, + "step": 51195 + }, + { + "epoch": 0.05, + "learning_rate": 4.967853219759341e-05, + "loss": 1.703, + "step": 51196 + }, + { + "epoch": 0.05, + "learning_rate": 4.967851964161281e-05, + "loss": 1.9526, + "step": 51197 + }, + { + "epoch": 0.05, + "learning_rate": 4.967850708538859e-05, + "loss": 1.4711, + "step": 51198 + }, + { + "epoch": 0.05, + "learning_rate": 4.967849452892076e-05, + "loss": 1.0276, + "step": 51199 + }, + { + "epoch": 0.05, + "learning_rate": 4.967848197220931e-05, + "loss": 1.6189, + "step": 51200 + }, + { + "epoch": 0.05, + "learning_rate": 4.967846941525425e-05, + "loss": 1.2693, + "step": 51201 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678456858055565e-05, + "loss": 1.0353, + "step": 51202 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678444300613276e-05, + "loss": 1.6369, + "step": 51203 + }, + { + "epoch": 0.05, + "learning_rate": 4.967843174292737e-05, + "loss": 1.1746, + "step": 51204 + }, + { + "epoch": 0.05, + "learning_rate": 4.967841918499784e-05, + "loss": 1.159, + "step": 51205 + }, + { + "epoch": 0.05, + "learning_rate": 4.96784066268247e-05, + "loss": 1.0813, + "step": 51206 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678394068407945e-05, + "loss": 0.4728, + "step": 51207 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678381509747575e-05, + "loss": 1.1495, + "step": 51208 + }, + { + "epoch": 0.05, + "learning_rate": 4.967836895084359e-05, + "loss": 1.0266, + "step": 51209 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678356391695995e-05, + "loss": 0.7623, + "step": 51210 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678343832304785e-05, + "loss": 1.0681, + "step": 51211 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678331272669955e-05, + "loss": 1.0481, + "step": 51212 + }, + { + "epoch": 0.05, + "learning_rate": 4.967831871279152e-05, + "loss": 1.3205, + "step": 51213 + }, + { + "epoch": 0.05, + "learning_rate": 4.967830615266946e-05, + "loss": 1.4629, + "step": 51214 + }, + { + "epoch": 0.05, + "learning_rate": 4.967829359230379e-05, + "loss": 0.7186, + "step": 51215 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678281031694506e-05, + "loss": 0.6039, + "step": 51216 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678268470841615e-05, + "loss": 0.9807, + "step": 51217 + }, + { + "epoch": 0.05, + "learning_rate": 4.967825590974511e-05, + "loss": 1.1602, + "step": 51218 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678243348404986e-05, + "loss": 1.1413, + "step": 51219 + }, + { + "epoch": 0.05, + "learning_rate": 4.967823078682125e-05, + "loss": 1.2787, + "step": 51220 + }, + { + "epoch": 0.05, + "learning_rate": 4.96782182249939e-05, + "loss": 0.8962, + "step": 51221 + }, + { + "epoch": 0.05, + "learning_rate": 4.967820566292294e-05, + "loss": 0.8647, + "step": 51222 + }, + { + "epoch": 0.05, + "learning_rate": 4.967819310060836e-05, + "loss": 1.161, + "step": 51223 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678180538050175e-05, + "loss": 1.2833, + "step": 51224 + }, + { + "epoch": 0.05, + "learning_rate": 4.967816797524837e-05, + "loss": 1.1325, + "step": 51225 + }, + { + "epoch": 0.05, + "learning_rate": 4.967815541220296e-05, + "loss": 1.0025, + "step": 51226 + }, + { + "epoch": 0.05, + "learning_rate": 4.967814284891393e-05, + "loss": 0.2968, + "step": 51227 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678130285381294e-05, + "loss": 0.4103, + "step": 51228 + }, + { + "epoch": 0.05, + "learning_rate": 4.967811772160504e-05, + "loss": 0.3899, + "step": 51229 + }, + { + "epoch": 0.05, + "learning_rate": 4.967810515758519e-05, + "loss": 0.2665, + "step": 51230 + }, + { + "epoch": 0.05, + "learning_rate": 4.967809259332171e-05, + "loss": 0.1886, + "step": 51231 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678080028814624e-05, + "loss": 0.1956, + "step": 51232 + }, + { + "epoch": 0.05, + "learning_rate": 4.967806746406393e-05, + "loss": 0.1265, + "step": 51233 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678054899069626e-05, + "loss": 0.0741, + "step": 51234 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678042333831706e-05, + "loss": 0.1684, + "step": 51235 + }, + { + "epoch": 0.05, + "learning_rate": 4.967802976835017e-05, + "loss": 0.8752, + "step": 51236 + }, + { + "epoch": 0.05, + "learning_rate": 4.9678017202625034e-05, + "loss": 0.92, + "step": 51237 + }, + { + "epoch": 0.05, + "learning_rate": 4.967800463665628e-05, + "loss": 1.1918, + "step": 51238 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677992070443914e-05, + "loss": 1.0074, + "step": 51239 + }, + { + "epoch": 0.05, + "learning_rate": 4.967797950398794e-05, + "loss": 1.1857, + "step": 51240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677966937288355e-05, + "loss": 1.0189, + "step": 51241 + }, + { + "epoch": 0.05, + "learning_rate": 4.967795437034516e-05, + "loss": 1.3097, + "step": 51242 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677941803158355e-05, + "loss": 1.335, + "step": 51243 + }, + { + "epoch": 0.05, + "learning_rate": 4.967792923572794e-05, + "loss": 1.014, + "step": 51244 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677916668053914e-05, + "loss": 1.0231, + "step": 51245 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677904100136274e-05, + "loss": 1.4416, + "step": 51246 + }, + { + "epoch": 0.05, + "learning_rate": 4.967789153197503e-05, + "loss": 1.0449, + "step": 51247 + }, + { + "epoch": 0.05, + "learning_rate": 4.967787896357018e-05, + "loss": 0.8801, + "step": 51248 + }, + { + "epoch": 0.05, + "learning_rate": 4.967786639492171e-05, + "loss": 1.2409, + "step": 51249 + }, + { + "epoch": 0.05, + "learning_rate": 4.967785382602963e-05, + "loss": 1.0409, + "step": 51250 + }, + { + "epoch": 0.05, + "learning_rate": 4.967784125689395e-05, + "loss": 0.9542, + "step": 51251 + }, + { + "epoch": 0.05, + "learning_rate": 4.967782868751466e-05, + "loss": 1.0321, + "step": 51252 + }, + { + "epoch": 0.05, + "learning_rate": 4.967781611789176e-05, + "loss": 1.22, + "step": 51253 + }, + { + "epoch": 0.05, + "learning_rate": 4.967780354802525e-05, + "loss": 1.1845, + "step": 51254 + }, + { + "epoch": 0.05, + "learning_rate": 4.967779097791513e-05, + "loss": 1.2537, + "step": 51255 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677778407561396e-05, + "loss": 1.0931, + "step": 51256 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677765836964055e-05, + "loss": 1.565, + "step": 51257 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677753266123114e-05, + "loss": 1.2032, + "step": 51258 + }, + { + "epoch": 0.05, + "learning_rate": 4.967774069503855e-05, + "loss": 1.2173, + "step": 51259 + }, + { + "epoch": 0.05, + "learning_rate": 4.967772812371039e-05, + "loss": 1.3598, + "step": 51260 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677715552138625e-05, + "loss": 1.1215, + "step": 51261 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677702980323244e-05, + "loss": 1.0754, + "step": 51262 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677690408264256e-05, + "loss": 1.1838, + "step": 51263 + }, + { + "epoch": 0.05, + "learning_rate": 4.967767783596166e-05, + "loss": 0.9751, + "step": 51264 + }, + { + "epoch": 0.05, + "learning_rate": 4.967766526341546e-05, + "loss": 1.3585, + "step": 51265 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677652690625646e-05, + "loss": 1.3011, + "step": 51266 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677640117592225e-05, + "loss": 1.518, + "step": 51267 + }, + { + "epoch": 0.05, + "learning_rate": 4.967762754431521e-05, + "loss": 0.9362, + "step": 51268 + }, + { + "epoch": 0.05, + "learning_rate": 4.967761497079457e-05, + "loss": 1.1261, + "step": 51269 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677602397030335e-05, + "loss": 1.0602, + "step": 51270 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677589823022494e-05, + "loss": 0.9716, + "step": 51271 + }, + { + "epoch": 0.05, + "learning_rate": 4.967757724877104e-05, + "loss": 1.1069, + "step": 51272 + }, + { + "epoch": 0.05, + "learning_rate": 4.967756467427598e-05, + "loss": 1.1206, + "step": 51273 + }, + { + "epoch": 0.05, + "learning_rate": 4.967755209953732e-05, + "loss": 0.8445, + "step": 51274 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677539524555036e-05, + "loss": 0.3779, + "step": 51275 + }, + { + "epoch": 0.05, + "learning_rate": 4.967752694932917e-05, + "loss": 1.5477, + "step": 51276 + }, + { + "epoch": 0.05, + "learning_rate": 4.967751437385968e-05, + "loss": 1.2733, + "step": 51277 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677501798146586e-05, + "loss": 1.2421, + "step": 51278 + }, + { + "epoch": 0.05, + "learning_rate": 4.967748922218989e-05, + "loss": 0.7068, + "step": 51279 + }, + { + "epoch": 0.05, + "learning_rate": 4.967747664598959e-05, + "loss": 1.0271, + "step": 51280 + }, + { + "epoch": 0.05, + "learning_rate": 4.967746406954569e-05, + "loss": 1.3386, + "step": 51281 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677451492858175e-05, + "loss": 1.0934, + "step": 51282 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677438915927053e-05, + "loss": 1.3487, + "step": 51283 + }, + { + "epoch": 0.05, + "learning_rate": 4.967742633875233e-05, + "loss": 0.947, + "step": 51284 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677413761334005e-05, + "loss": 1.2626, + "step": 51285 + }, + { + "epoch": 0.05, + "learning_rate": 4.967740118367207e-05, + "loss": 1.1332, + "step": 51286 + }, + { + "epoch": 0.05, + "learning_rate": 4.967738860576653e-05, + "loss": 1.1243, + "step": 51287 + }, + { + "epoch": 0.05, + "learning_rate": 4.967737602761738e-05, + "loss": 1.0855, + "step": 51288 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677363449224634e-05, + "loss": 1.0618, + "step": 51289 + }, + { + "epoch": 0.05, + "learning_rate": 4.967735087058828e-05, + "loss": 1.0894, + "step": 51290 + }, + { + "epoch": 0.05, + "learning_rate": 4.967733829170832e-05, + "loss": 0.7039, + "step": 51291 + }, + { + "epoch": 0.05, + "learning_rate": 4.967732571258476e-05, + "loss": 1.147, + "step": 51292 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677313133217596e-05, + "loss": 0.939, + "step": 51293 + }, + { + "epoch": 0.05, + "learning_rate": 4.967730055360683e-05, + "loss": 1.1615, + "step": 51294 + }, + { + "epoch": 0.05, + "learning_rate": 4.967728797375245e-05, + "loss": 0.8671, + "step": 51295 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677275393654474e-05, + "loss": 0.6377, + "step": 51296 + }, + { + "epoch": 0.05, + "learning_rate": 4.967726281331289e-05, + "loss": 1.0993, + "step": 51297 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677250232727706e-05, + "loss": 1.4376, + "step": 51298 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677237651898925e-05, + "loss": 1.1181, + "step": 51299 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677225070826524e-05, + "loss": 1.1494, + "step": 51300 + }, + { + "epoch": 0.05, + "learning_rate": 4.967721248951053e-05, + "loss": 0.9759, + "step": 51301 + }, + { + "epoch": 0.05, + "learning_rate": 4.967719990795093e-05, + "loss": 0.3724, + "step": 51302 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677187326147736e-05, + "loss": 1.0439, + "step": 51303 + }, + { + "epoch": 0.05, + "learning_rate": 4.967717474410093e-05, + "loss": 0.5904, + "step": 51304 + }, + { + "epoch": 0.05, + "learning_rate": 4.967716216181052e-05, + "loss": 1.1281, + "step": 51305 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677149579276515e-05, + "loss": 1.0348, + "step": 51306 + }, + { + "epoch": 0.05, + "learning_rate": 4.967713699649891e-05, + "loss": 1.0946, + "step": 51307 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677124413477695e-05, + "loss": 1.1473, + "step": 51308 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677111830212875e-05, + "loss": 0.8401, + "step": 51309 + }, + { + "epoch": 0.05, + "learning_rate": 4.967709924670446e-05, + "loss": 1.357, + "step": 51310 + }, + { + "epoch": 0.05, + "learning_rate": 4.967708666295244e-05, + "loss": 1.8027, + "step": 51311 + }, + { + "epoch": 0.05, + "learning_rate": 4.967707407895682e-05, + "loss": 2.1651, + "step": 51312 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677061494717595e-05, + "loss": 1.1781, + "step": 51313 + }, + { + "epoch": 0.05, + "learning_rate": 4.967704891023477e-05, + "loss": 1.1859, + "step": 51314 + }, + { + "epoch": 0.05, + "learning_rate": 4.967703632550834e-05, + "loss": 1.1187, + "step": 51315 + }, + { + "epoch": 0.05, + "learning_rate": 4.967702374053832e-05, + "loss": 1.1285, + "step": 51316 + }, + { + "epoch": 0.05, + "learning_rate": 4.9677011155324696e-05, + "loss": 1.1239, + "step": 51317 + }, + { + "epoch": 0.05, + "learning_rate": 4.967699856986746e-05, + "loss": 1.2107, + "step": 51318 + }, + { + "epoch": 0.05, + "learning_rate": 4.967698598416663e-05, + "loss": 1.3121, + "step": 51319 + }, + { + "epoch": 0.05, + "learning_rate": 4.96769733982222e-05, + "loss": 1.4179, + "step": 51320 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676960812034165e-05, + "loss": 0.9622, + "step": 51321 + }, + { + "epoch": 0.05, + "learning_rate": 4.967694822560254e-05, + "loss": 1.2228, + "step": 51322 + }, + { + "epoch": 0.05, + "learning_rate": 4.96769356389273e-05, + "loss": 1.205, + "step": 51323 + }, + { + "epoch": 0.05, + "learning_rate": 4.967692305200847e-05, + "loss": 1.0655, + "step": 51324 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676910464846036e-05, + "loss": 0.9997, + "step": 51325 + }, + { + "epoch": 0.05, + "learning_rate": 4.967689787744e-05, + "loss": 1.1217, + "step": 51326 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676885289790365e-05, + "loss": 1.0609, + "step": 51327 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676872701897134e-05, + "loss": 1.3034, + "step": 51328 + }, + { + "epoch": 0.05, + "learning_rate": 4.96768601137603e-05, + "loss": 1.3339, + "step": 51329 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676847525379863e-05, + "loss": 1.5942, + "step": 51330 + }, + { + "epoch": 0.05, + "learning_rate": 4.967683493675583e-05, + "loss": 1.3455, + "step": 51331 + }, + { + "epoch": 0.05, + "learning_rate": 4.967682234788821e-05, + "loss": 1.4004, + "step": 51332 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676809758776976e-05, + "loss": 1.0652, + "step": 51333 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676797169422145e-05, + "loss": 1.0441, + "step": 51334 + }, + { + "epoch": 0.05, + "learning_rate": 4.967678457982372e-05, + "loss": 0.7943, + "step": 51335 + }, + { + "epoch": 0.05, + "learning_rate": 4.967677198998169e-05, + "loss": 0.8131, + "step": 51336 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676759399896065e-05, + "loss": 1.2113, + "step": 51337 + }, + { + "epoch": 0.05, + "learning_rate": 4.967674680956684e-05, + "loss": 1.3571, + "step": 51338 + }, + { + "epoch": 0.05, + "learning_rate": 4.967673421899402e-05, + "loss": 1.4612, + "step": 51339 + }, + { + "epoch": 0.05, + "learning_rate": 4.967672162817759e-05, + "loss": 1.3219, + "step": 51340 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676709037117576e-05, + "loss": 0.9087, + "step": 51341 + }, + { + "epoch": 0.05, + "learning_rate": 4.967669644581396e-05, + "loss": 1.1763, + "step": 51342 + }, + { + "epoch": 0.05, + "learning_rate": 4.967668385426674e-05, + "loss": 1.087, + "step": 51343 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676671262475926e-05, + "loss": 1.6602, + "step": 51344 + }, + { + "epoch": 0.05, + "learning_rate": 4.967665867044152e-05, + "loss": 1.0491, + "step": 51345 + }, + { + "epoch": 0.05, + "learning_rate": 4.967664607816351e-05, + "loss": 0.6539, + "step": 51346 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676633485641904e-05, + "loss": 0.918, + "step": 51347 + }, + { + "epoch": 0.05, + "learning_rate": 4.96766208928767e-05, + "loss": 1.3068, + "step": 51348 + }, + { + "epoch": 0.05, + "learning_rate": 4.96766082998679e-05, + "loss": 1.2979, + "step": 51349 + }, + { + "epoch": 0.05, + "learning_rate": 4.96765957066155e-05, + "loss": 1.1172, + "step": 51350 + }, + { + "epoch": 0.05, + "learning_rate": 4.967658311311951e-05, + "loss": 1.1895, + "step": 51351 + }, + { + "epoch": 0.05, + "learning_rate": 4.967657051937992e-05, + "loss": 1.2033, + "step": 51352 + }, + { + "epoch": 0.05, + "learning_rate": 4.967655792539673e-05, + "loss": 1.2479, + "step": 51353 + }, + { + "epoch": 0.05, + "learning_rate": 4.967654533116994e-05, + "loss": 1.0503, + "step": 51354 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676532736699566e-05, + "loss": 1.2506, + "step": 51355 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676520141985584e-05, + "loss": 1.3388, + "step": 51356 + }, + { + "epoch": 0.05, + "learning_rate": 4.967650754702801e-05, + "loss": 1.238, + "step": 51357 + }, + { + "epoch": 0.05, + "learning_rate": 4.967649495182684e-05, + "loss": 1.3006, + "step": 51358 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676482356382074e-05, + "loss": 1.1958, + "step": 51359 + }, + { + "epoch": 0.05, + "learning_rate": 4.967646976069371e-05, + "loss": 1.2931, + "step": 51360 + }, + { + "epoch": 0.05, + "learning_rate": 4.967645716476176e-05, + "loss": 0.9715, + "step": 51361 + }, + { + "epoch": 0.05, + "learning_rate": 4.96764445685862e-05, + "loss": 0.8823, + "step": 51362 + }, + { + "epoch": 0.05, + "learning_rate": 4.967643197216706e-05, + "loss": 1.1408, + "step": 51363 + }, + { + "epoch": 0.05, + "learning_rate": 4.967641937550431e-05, + "loss": 1.0322, + "step": 51364 + }, + { + "epoch": 0.05, + "learning_rate": 4.967640677859797e-05, + "loss": 1.0608, + "step": 51365 + }, + { + "epoch": 0.05, + "learning_rate": 4.967639418144804e-05, + "loss": 1.385, + "step": 51366 + }, + { + "epoch": 0.05, + "learning_rate": 4.967638158405451e-05, + "loss": 1.2776, + "step": 51367 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676368986417386e-05, + "loss": 1.0054, + "step": 51368 + }, + { + "epoch": 0.05, + "learning_rate": 4.967635638853667e-05, + "loss": 0.8585, + "step": 51369 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676343790412354e-05, + "loss": 1.073, + "step": 51370 + }, + { + "epoch": 0.05, + "learning_rate": 4.967633119204445e-05, + "loss": 0.8721, + "step": 51371 + }, + { + "epoch": 0.05, + "learning_rate": 4.967631859343295e-05, + "loss": 0.6106, + "step": 51372 + }, + { + "epoch": 0.05, + "learning_rate": 4.967630599457785e-05, + "loss": 0.798, + "step": 51373 + }, + { + "epoch": 0.05, + "learning_rate": 4.967629339547916e-05, + "loss": 1.0224, + "step": 51374 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676280796136875e-05, + "loss": 0.9047, + "step": 51375 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676268196551e-05, + "loss": 1.2478, + "step": 51376 + }, + { + "epoch": 0.05, + "learning_rate": 4.967625559672152e-05, + "loss": 1.2455, + "step": 51377 + }, + { + "epoch": 0.05, + "learning_rate": 4.967624299664846e-05, + "loss": 1.1408, + "step": 51378 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676230396331804e-05, + "loss": 0.9535, + "step": 51379 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676217795771554e-05, + "loss": 0.9902, + "step": 51380 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676205194967703e-05, + "loss": 1.0224, + "step": 51381 + }, + { + "epoch": 0.05, + "learning_rate": 4.967619259392027e-05, + "loss": 1.1547, + "step": 51382 + }, + { + "epoch": 0.05, + "learning_rate": 4.967617999262924e-05, + "loss": 1.6006, + "step": 51383 + }, + { + "epoch": 0.05, + "learning_rate": 4.967616739109461e-05, + "loss": 1.1799, + "step": 51384 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676154789316384e-05, + "loss": 1.3094, + "step": 51385 + }, + { + "epoch": 0.05, + "learning_rate": 4.967614218729458e-05, + "loss": 1.0946, + "step": 51386 + }, + { + "epoch": 0.05, + "learning_rate": 4.967612958502918e-05, + "loss": 1.2873, + "step": 51387 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676116982520184e-05, + "loss": 1.2826, + "step": 51388 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676104379767596e-05, + "loss": 1.2681, + "step": 51389 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676091776771414e-05, + "loss": 0.9727, + "step": 51390 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676079173531646e-05, + "loss": 1.3455, + "step": 51391 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676066570048285e-05, + "loss": 1.4111, + "step": 51392 + }, + { + "epoch": 0.05, + "learning_rate": 4.967605396632134e-05, + "loss": 1.1477, + "step": 51393 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676041362350783e-05, + "loss": 1.3077, + "step": 51394 + }, + { + "epoch": 0.05, + "learning_rate": 4.967602875813665e-05, + "loss": 1.8671, + "step": 51395 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676016153678916e-05, + "loss": 1.3438, + "step": 51396 + }, + { + "epoch": 0.05, + "learning_rate": 4.96760035489776e-05, + "loss": 1.1089, + "step": 51397 + }, + { + "epoch": 0.05, + "learning_rate": 4.967599094403269e-05, + "loss": 1.0268, + "step": 51398 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675978338844184e-05, + "loss": 1.3529, + "step": 51399 + }, + { + "epoch": 0.05, + "learning_rate": 4.967596573341209e-05, + "loss": 1.1758, + "step": 51400 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675953127736405e-05, + "loss": 0.9817, + "step": 51401 + }, + { + "epoch": 0.05, + "learning_rate": 4.967594052181713e-05, + "loss": 1.2331, + "step": 51402 + }, + { + "epoch": 0.05, + "learning_rate": 4.967592791565427e-05, + "loss": 1.1422, + "step": 51403 + }, + { + "epoch": 0.05, + "learning_rate": 4.967591530924781e-05, + "loss": 1.044, + "step": 51404 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675902702597765e-05, + "loss": 1.2161, + "step": 51405 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675890095704134e-05, + "loss": 1.4879, + "step": 51406 + }, + { + "epoch": 0.05, + "learning_rate": 4.967587748856691e-05, + "loss": 1.0528, + "step": 51407 + }, + { + "epoch": 0.05, + "learning_rate": 4.967586488118609e-05, + "loss": 1.1027, + "step": 51408 + }, + { + "epoch": 0.05, + "learning_rate": 4.967585227356168e-05, + "loss": 1.2446, + "step": 51409 + }, + { + "epoch": 0.05, + "learning_rate": 4.967583966569369e-05, + "loss": 1.175, + "step": 51410 + }, + { + "epoch": 0.05, + "learning_rate": 4.967582705758211e-05, + "loss": 0.7925, + "step": 51411 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675814449226934e-05, + "loss": 0.6086, + "step": 51412 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675801840628164e-05, + "loss": 1.0638, + "step": 51413 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675789231785815e-05, + "loss": 1.2768, + "step": 51414 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675776622699873e-05, + "loss": 1.002, + "step": 51415 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675764013370345e-05, + "loss": 0.7295, + "step": 51416 + }, + { + "epoch": 0.05, + "learning_rate": 4.967575140379723e-05, + "loss": 1.0113, + "step": 51417 + }, + { + "epoch": 0.05, + "learning_rate": 4.967573879398052e-05, + "loss": 1.0871, + "step": 51418 + }, + { + "epoch": 0.05, + "learning_rate": 4.967572618392022e-05, + "loss": 1.1494, + "step": 51419 + }, + { + "epoch": 0.05, + "learning_rate": 4.967571357361635e-05, + "loss": 1.203, + "step": 51420 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675700963068874e-05, + "loss": 0.8616, + "step": 51421 + }, + { + "epoch": 0.05, + "learning_rate": 4.967568835227781e-05, + "loss": 1.1397, + "step": 51422 + }, + { + "epoch": 0.05, + "learning_rate": 4.967567574124316e-05, + "loss": 1.2737, + "step": 51423 + }, + { + "epoch": 0.05, + "learning_rate": 4.967566312996492e-05, + "loss": 1.5492, + "step": 51424 + }, + { + "epoch": 0.05, + "learning_rate": 4.96756505184431e-05, + "loss": 1.7575, + "step": 51425 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675637906677684e-05, + "loss": 1.3538, + "step": 51426 + }, + { + "epoch": 0.05, + "learning_rate": 4.967562529466869e-05, + "loss": 1.3618, + "step": 51427 + }, + { + "epoch": 0.05, + "learning_rate": 4.967561268241611e-05, + "loss": 1.1917, + "step": 51428 + }, + { + "epoch": 0.05, + "learning_rate": 4.967560006991992e-05, + "loss": 1.1932, + "step": 51429 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675587457180165e-05, + "loss": 0.8932, + "step": 51430 + }, + { + "epoch": 0.05, + "learning_rate": 4.967557484419682e-05, + "loss": 1.1393, + "step": 51431 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675562230969885e-05, + "loss": 1.3877, + "step": 51432 + }, + { + "epoch": 0.05, + "learning_rate": 4.967554961749936e-05, + "loss": 1.6293, + "step": 51433 + }, + { + "epoch": 0.05, + "learning_rate": 4.967553700378526e-05, + "loss": 1.2452, + "step": 51434 + }, + { + "epoch": 0.05, + "learning_rate": 4.967552438982756e-05, + "loss": 1.1843, + "step": 51435 + }, + { + "epoch": 0.05, + "learning_rate": 4.967551177562628e-05, + "loss": 1.2521, + "step": 51436 + }, + { + "epoch": 0.05, + "learning_rate": 4.967549916118141e-05, + "loss": 1.3859, + "step": 51437 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675486546492956e-05, + "loss": 1.46, + "step": 51438 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675473931560915e-05, + "loss": 1.3401, + "step": 51439 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675461316385294e-05, + "loss": 1.0102, + "step": 51440 + }, + { + "epoch": 0.05, + "learning_rate": 4.967544870096608e-05, + "loss": 0.9497, + "step": 51441 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675436085303286e-05, + "loss": 0.3104, + "step": 51442 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675423469396906e-05, + "loss": 0.5384, + "step": 51443 + }, + { + "epoch": 0.05, + "learning_rate": 4.967541085324693e-05, + "loss": 1.1496, + "step": 51444 + }, + { + "epoch": 0.05, + "learning_rate": 4.967539823685338e-05, + "loss": 0.9611, + "step": 51445 + }, + { + "epoch": 0.05, + "learning_rate": 4.967538562021624e-05, + "loss": 0.4345, + "step": 51446 + }, + { + "epoch": 0.05, + "learning_rate": 4.967537300333552e-05, + "loss": 0.9104, + "step": 51447 + }, + { + "epoch": 0.05, + "learning_rate": 4.967536038621121e-05, + "loss": 1.1066, + "step": 51448 + }, + { + "epoch": 0.05, + "learning_rate": 4.967534776884332e-05, + "loss": 0.9972, + "step": 51449 + }, + { + "epoch": 0.05, + "learning_rate": 4.967533515123184e-05, + "loss": 1.3702, + "step": 51450 + }, + { + "epoch": 0.05, + "learning_rate": 4.967532253337678e-05, + "loss": 1.0773, + "step": 51451 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675309915278134e-05, + "loss": 1.0482, + "step": 51452 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675297296935904e-05, + "loss": 1.3266, + "step": 51453 + }, + { + "epoch": 0.05, + "learning_rate": 4.967528467835009e-05, + "loss": 0.9693, + "step": 51454 + }, + { + "epoch": 0.05, + "learning_rate": 4.967527205952069e-05, + "loss": 0.9024, + "step": 51455 + }, + { + "epoch": 0.05, + "learning_rate": 4.967525944044771e-05, + "loss": 0.556, + "step": 51456 + }, + { + "epoch": 0.05, + "learning_rate": 4.967524682113114e-05, + "loss": 0.5969, + "step": 51457 + }, + { + "epoch": 0.05, + "learning_rate": 4.967523420157099e-05, + "loss": 0.541, + "step": 51458 + }, + { + "epoch": 0.05, + "learning_rate": 4.967522158176726e-05, + "loss": 0.3368, + "step": 51459 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675208961719945e-05, + "loss": 0.4313, + "step": 51460 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675196341429044e-05, + "loss": 0.5903, + "step": 51461 + }, + { + "epoch": 0.05, + "learning_rate": 4.967518372089456e-05, + "loss": 1.0827, + "step": 51462 + }, + { + "epoch": 0.05, + "learning_rate": 4.967517110011649e-05, + "loss": 1.2698, + "step": 51463 + }, + { + "epoch": 0.05, + "learning_rate": 4.967515847909484e-05, + "loss": 0.5502, + "step": 51464 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675145857829607e-05, + "loss": 0.7009, + "step": 51465 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675133236320795e-05, + "loss": 1.0663, + "step": 51466 + }, + { + "epoch": 0.05, + "learning_rate": 4.96751206145684e-05, + "loss": 1.1567, + "step": 51467 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675107992572426e-05, + "loss": 0.9781, + "step": 51468 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675095370332855e-05, + "loss": 1.3009, + "step": 51469 + }, + { + "epoch": 0.05, + "learning_rate": 4.967508274784971e-05, + "loss": 1.0115, + "step": 51470 + }, + { + "epoch": 0.05, + "learning_rate": 4.967507012512299e-05, + "loss": 1.4421, + "step": 51471 + }, + { + "epoch": 0.05, + "learning_rate": 4.967505750215268e-05, + "loss": 1.2401, + "step": 51472 + }, + { + "epoch": 0.05, + "learning_rate": 4.967504487893879e-05, + "loss": 0.7448, + "step": 51473 + }, + { + "epoch": 0.05, + "learning_rate": 4.967503225548132e-05, + "loss": 1.1025, + "step": 51474 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675019631780266e-05, + "loss": 1.5867, + "step": 51475 + }, + { + "epoch": 0.05, + "learning_rate": 4.967500700783563e-05, + "loss": 1.3637, + "step": 51476 + }, + { + "epoch": 0.05, + "learning_rate": 4.967499438364742e-05, + "loss": 1.1141, + "step": 51477 + }, + { + "epoch": 0.05, + "learning_rate": 4.967498175921562e-05, + "loss": 1.0799, + "step": 51478 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674969134540236e-05, + "loss": 1.0203, + "step": 51479 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674956509621284e-05, + "loss": 0.9004, + "step": 51480 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674943884458744e-05, + "loss": 1.094, + "step": 51481 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674931259052626e-05, + "loss": 1.408, + "step": 51482 + }, + { + "epoch": 0.05, + "learning_rate": 4.967491863340292e-05, + "loss": 0.8922, + "step": 51483 + }, + { + "epoch": 0.05, + "learning_rate": 4.967490600750965e-05, + "loss": 1.321, + "step": 51484 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674893381372785e-05, + "loss": 1.4032, + "step": 51485 + }, + { + "epoch": 0.05, + "learning_rate": 4.967488075499234e-05, + "loss": 1.3183, + "step": 51486 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674868128368324e-05, + "loss": 1.0688, + "step": 51487 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674855501500715e-05, + "loss": 0.8893, + "step": 51488 + }, + { + "epoch": 0.05, + "learning_rate": 4.967484287438954e-05, + "loss": 0.9818, + "step": 51489 + }, + { + "epoch": 0.05, + "learning_rate": 4.967483024703478e-05, + "loss": 1.4456, + "step": 51490 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674817619436435e-05, + "loss": 1.2699, + "step": 51491 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674804991594514e-05, + "loss": 0.9239, + "step": 51492 + }, + { + "epoch": 0.05, + "learning_rate": 4.967479236350901e-05, + "loss": 0.554, + "step": 51493 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674779735179946e-05, + "loss": 1.3473, + "step": 51494 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674767106607286e-05, + "loss": 1.3245, + "step": 51495 + }, + { + "epoch": 0.05, + "learning_rate": 4.967475447779105e-05, + "loss": 1.2106, + "step": 51496 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674741848731234e-05, + "loss": 0.9428, + "step": 51497 + }, + { + "epoch": 0.05, + "learning_rate": 4.967472921942784e-05, + "loss": 1.0283, + "step": 51498 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674716589880864e-05, + "loss": 0.8604, + "step": 51499 + }, + { + "epoch": 0.05, + "learning_rate": 4.967470396009032e-05, + "loss": 1.2091, + "step": 51500 + }, + { + "epoch": 0.05, + "eval_loss": 1.0775607824325562, + "eval_runtime": 128.4334, + "eval_samples_per_second": 10.784, + "eval_steps_per_second": 5.396, + "step": 51500 + }, + { + "epoch": 0.05, + "learning_rate": 4.967469133005619e-05, + "loss": 0.9495, + "step": 51501 + }, + { + "epoch": 0.05, + "learning_rate": 4.967467869977848e-05, + "loss": 1.2246, + "step": 51502 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674666069257196e-05, + "loss": 1.0874, + "step": 51503 + }, + { + "epoch": 0.05, + "learning_rate": 4.967465343849233e-05, + "loss": 1.0463, + "step": 51504 + }, + { + "epoch": 0.05, + "learning_rate": 4.96746408074839e-05, + "loss": 0.9654, + "step": 51505 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674628176231876e-05, + "loss": 1.0388, + "step": 51506 + }, + { + "epoch": 0.05, + "learning_rate": 4.967461554473628e-05, + "loss": 1.0211, + "step": 51507 + }, + { + "epoch": 0.05, + "learning_rate": 4.967460291299711e-05, + "loss": 0.7911, + "step": 51508 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674590281014354e-05, + "loss": 1.0714, + "step": 51509 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674577648788034e-05, + "loss": 0.9485, + "step": 51510 + }, + { + "epoch": 0.05, + "learning_rate": 4.967456501631813e-05, + "loss": 1.3072, + "step": 51511 + }, + { + "epoch": 0.05, + "learning_rate": 4.967455238360464e-05, + "loss": 1.5207, + "step": 51512 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674539750647584e-05, + "loss": 1.3495, + "step": 51513 + }, + { + "epoch": 0.05, + "learning_rate": 4.967452711744695e-05, + "loss": 1.4105, + "step": 51514 + }, + { + "epoch": 0.05, + "learning_rate": 4.967451448400274e-05, + "loss": 1.4757, + "step": 51515 + }, + { + "epoch": 0.05, + "learning_rate": 4.967450185031495e-05, + "loss": 1.0526, + "step": 51516 + }, + { + "epoch": 0.05, + "learning_rate": 4.967448921638359e-05, + "loss": 4.2783, + "step": 51517 + }, + { + "epoch": 0.05, + "learning_rate": 4.967447658220865e-05, + "loss": 5.031, + "step": 51518 + }, + { + "epoch": 0.05, + "learning_rate": 4.967446394779013e-05, + "loss": 1.2627, + "step": 51519 + }, + { + "epoch": 0.05, + "learning_rate": 4.967445131312804e-05, + "loss": 1.2538, + "step": 51520 + }, + { + "epoch": 0.05, + "learning_rate": 4.967443867822237e-05, + "loss": 0.8417, + "step": 51521 + }, + { + "epoch": 0.05, + "learning_rate": 4.967442604307313e-05, + "loss": 0.7773, + "step": 51522 + }, + { + "epoch": 0.05, + "learning_rate": 4.967441340768031e-05, + "loss": 1.1533, + "step": 51523 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674400772043914e-05, + "loss": 0.9936, + "step": 51524 + }, + { + "epoch": 0.05, + "learning_rate": 4.967438813616394e-05, + "loss": 1.1728, + "step": 51525 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674375500040396e-05, + "loss": 1.2406, + "step": 51526 + }, + { + "epoch": 0.05, + "learning_rate": 4.967436286367328e-05, + "loss": 0.9511, + "step": 51527 + }, + { + "epoch": 0.05, + "learning_rate": 4.967435022706258e-05, + "loss": 1.5075, + "step": 51528 + }, + { + "epoch": 0.05, + "learning_rate": 4.967433759020832e-05, + "loss": 1.3463, + "step": 51529 + }, + { + "epoch": 0.05, + "learning_rate": 4.967432495311047e-05, + "loss": 1.1087, + "step": 51530 + }, + { + "epoch": 0.05, + "learning_rate": 4.967431231576905e-05, + "loss": 1.6079, + "step": 51531 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674299678184055e-05, + "loss": 1.2773, + "step": 51532 + }, + { + "epoch": 0.05, + "learning_rate": 4.967428704035549e-05, + "loss": 1.2556, + "step": 51533 + }, + { + "epoch": 0.05, + "learning_rate": 4.967427440228335e-05, + "loss": 1.035, + "step": 51534 + }, + { + "epoch": 0.05, + "learning_rate": 4.967426176396763e-05, + "loss": 1.1236, + "step": 51535 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674249125408344e-05, + "loss": 1.0517, + "step": 51536 + }, + { + "epoch": 0.05, + "learning_rate": 4.967423648660548e-05, + "loss": 0.8382, + "step": 51537 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674223847559045e-05, + "loss": 0.5889, + "step": 51538 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674211208269026e-05, + "loss": 0.3865, + "step": 51539 + }, + { + "epoch": 0.05, + "learning_rate": 4.967419856873545e-05, + "loss": 0.4128, + "step": 51540 + }, + { + "epoch": 0.05, + "learning_rate": 4.967418592895829e-05, + "loss": 0.4484, + "step": 51541 + }, + { + "epoch": 0.05, + "learning_rate": 4.967417328893756e-05, + "loss": 0.4549, + "step": 51542 + }, + { + "epoch": 0.05, + "learning_rate": 4.967416064867325e-05, + "loss": 0.3912, + "step": 51543 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674148008165375e-05, + "loss": 0.7654, + "step": 51544 + }, + { + "epoch": 0.05, + "learning_rate": 4.967413536741393e-05, + "loss": 1.0878, + "step": 51545 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674122726418904e-05, + "loss": 2.0341, + "step": 51546 + }, + { + "epoch": 0.05, + "learning_rate": 4.9674110085180317e-05, + "loss": 1.1783, + "step": 51547 + }, + { + "epoch": 0.05, + "learning_rate": 4.967409744369814e-05, + "loss": 1.4286, + "step": 51548 + }, + { + "epoch": 0.05, + "learning_rate": 4.967408480197241e-05, + "loss": 1.121, + "step": 51549 + }, + { + "epoch": 0.05, + "learning_rate": 4.96740721600031e-05, + "loss": 1.31, + "step": 51550 + }, + { + "epoch": 0.05, + "learning_rate": 4.967405951779021e-05, + "loss": 1.1899, + "step": 51551 + }, + { + "epoch": 0.05, + "learning_rate": 4.967404687533376e-05, + "loss": 0.949, + "step": 51552 + }, + { + "epoch": 0.05, + "learning_rate": 4.967403423263373e-05, + "loss": 1.2074, + "step": 51553 + }, + { + "epoch": 0.05, + "learning_rate": 4.967402158969013e-05, + "loss": 1.1903, + "step": 51554 + }, + { + "epoch": 0.05, + "learning_rate": 4.967400894650296e-05, + "loss": 1.0921, + "step": 51555 + }, + { + "epoch": 0.05, + "learning_rate": 4.967399630307222e-05, + "loss": 0.7623, + "step": 51556 + }, + { + "epoch": 0.05, + "learning_rate": 4.96739836593979e-05, + "loss": 1.0981, + "step": 51557 + }, + { + "epoch": 0.05, + "learning_rate": 4.967397101548002e-05, + "loss": 1.0838, + "step": 51558 + }, + { + "epoch": 0.05, + "learning_rate": 4.967395837131856e-05, + "loss": 1.261, + "step": 51559 + }, + { + "epoch": 0.05, + "learning_rate": 4.967394572691354e-05, + "loss": 1.3782, + "step": 51560 + }, + { + "epoch": 0.05, + "learning_rate": 4.967393308226494e-05, + "loss": 1.1777, + "step": 51561 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673920437372774e-05, + "loss": 0.8844, + "step": 51562 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673907792237035e-05, + "loss": 1.0644, + "step": 51563 + }, + { + "epoch": 0.05, + "learning_rate": 4.967389514685773e-05, + "loss": 0.9007, + "step": 51564 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673882501234846e-05, + "loss": 1.0957, + "step": 51565 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673869855368396e-05, + "loss": 0.8572, + "step": 51566 + }, + { + "epoch": 0.05, + "learning_rate": 4.967385720925838e-05, + "loss": 0.9788, + "step": 51567 + }, + { + "epoch": 0.05, + "learning_rate": 4.967384456290479e-05, + "loss": 1.0188, + "step": 51568 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673831916307635e-05, + "loss": 0.742, + "step": 51569 + }, + { + "epoch": 0.05, + "learning_rate": 4.96738192694669e-05, + "loss": 0.8322, + "step": 51570 + }, + { + "epoch": 0.05, + "learning_rate": 4.96738066223826e-05, + "loss": 1.2437, + "step": 51571 + }, + { + "epoch": 0.05, + "learning_rate": 4.967379397505474e-05, + "loss": 1.3505, + "step": 51572 + }, + { + "epoch": 0.05, + "learning_rate": 4.96737813274833e-05, + "loss": 1.3223, + "step": 51573 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673768679668294e-05, + "loss": 1.212, + "step": 51574 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673756031609716e-05, + "loss": 1.0744, + "step": 51575 + }, + { + "epoch": 0.05, + "learning_rate": 4.967374338330757e-05, + "loss": 1.2489, + "step": 51576 + }, + { + "epoch": 0.05, + "learning_rate": 4.967373073476186e-05, + "loss": 1.0722, + "step": 51577 + }, + { + "epoch": 0.05, + "learning_rate": 4.967371808597258e-05, + "loss": 1.2661, + "step": 51578 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673705436939736e-05, + "loss": 1.29, + "step": 51579 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673692787663314e-05, + "loss": 1.1954, + "step": 51580 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673680138143325e-05, + "loss": 1.0933, + "step": 51581 + }, + { + "epoch": 0.05, + "learning_rate": 4.967366748837977e-05, + "loss": 1.2136, + "step": 51582 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673654838372645e-05, + "loss": 1.8135, + "step": 51583 + }, + { + "epoch": 0.05, + "learning_rate": 4.967364218812195e-05, + "loss": 1.366, + "step": 51584 + }, + { + "epoch": 0.05, + "learning_rate": 4.96736295376277e-05, + "loss": 1.395, + "step": 51585 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673616886889875e-05, + "loss": 1.2662, + "step": 51586 + }, + { + "epoch": 0.05, + "learning_rate": 4.967360423590848e-05, + "loss": 1.9725, + "step": 51587 + }, + { + "epoch": 0.05, + "learning_rate": 4.967359158468351e-05, + "loss": 1.5472, + "step": 51588 + }, + { + "epoch": 0.05, + "learning_rate": 4.967357893321499e-05, + "loss": 0.9361, + "step": 51589 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673566281502894e-05, + "loss": 0.4452, + "step": 51590 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673553629547226e-05, + "loss": 1.2479, + "step": 51591 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673540977348e-05, + "loss": 0.9601, + "step": 51592 + }, + { + "epoch": 0.05, + "learning_rate": 4.96735283249052e-05, + "loss": 0.8575, + "step": 51593 + }, + { + "epoch": 0.05, + "learning_rate": 4.967351567221884e-05, + "loss": 1.343, + "step": 51594 + }, + { + "epoch": 0.05, + "learning_rate": 4.967350301928891e-05, + "loss": 1.1755, + "step": 51595 + }, + { + "epoch": 0.05, + "learning_rate": 4.967349036611541e-05, + "loss": 0.2504, + "step": 51596 + }, + { + "epoch": 0.05, + "learning_rate": 4.967347771269835e-05, + "loss": 0.208, + "step": 51597 + }, + { + "epoch": 0.05, + "learning_rate": 4.967346505903772e-05, + "loss": 0.9546, + "step": 51598 + }, + { + "epoch": 0.05, + "learning_rate": 4.967345240513353e-05, + "loss": 1.0824, + "step": 51599 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673439750985765e-05, + "loss": 1.2148, + "step": 51600 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673427096594436e-05, + "loss": 1.0429, + "step": 51601 + }, + { + "epoch": 0.05, + "learning_rate": 4.967341444195954e-05, + "loss": 1.7596, + "step": 51602 + }, + { + "epoch": 0.05, + "learning_rate": 4.967340178708109e-05, + "loss": 1.1402, + "step": 51603 + }, + { + "epoch": 0.05, + "learning_rate": 4.967338913195906e-05, + "loss": 1.3644, + "step": 51604 + }, + { + "epoch": 0.05, + "learning_rate": 4.967337647659348e-05, + "loss": 0.8953, + "step": 51605 + }, + { + "epoch": 0.05, + "learning_rate": 4.967336382098432e-05, + "loss": 0.9681, + "step": 51606 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673351165131604e-05, + "loss": 0.9175, + "step": 51607 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673338509035314e-05, + "loss": 1.0606, + "step": 51608 + }, + { + "epoch": 0.05, + "learning_rate": 4.967332585269548e-05, + "loss": 1.3129, + "step": 51609 + }, + { + "epoch": 0.05, + "learning_rate": 4.967331319611206e-05, + "loss": 0.8927, + "step": 51610 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673300539285085e-05, + "loss": 1.2375, + "step": 51611 + }, + { + "epoch": 0.05, + "learning_rate": 4.967328788221454e-05, + "loss": 1.3083, + "step": 51612 + }, + { + "epoch": 0.05, + "learning_rate": 4.967327522490043e-05, + "loss": 1.4061, + "step": 51613 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673262567342756e-05, + "loss": 1.5069, + "step": 51614 + }, + { + "epoch": 0.05, + "learning_rate": 4.967324990954153e-05, + "loss": 1.2746, + "step": 51615 + }, + { + "epoch": 0.05, + "learning_rate": 4.967323725149673e-05, + "loss": 1.0182, + "step": 51616 + }, + { + "epoch": 0.05, + "learning_rate": 4.967322459320837e-05, + "loss": 1.0279, + "step": 51617 + }, + { + "epoch": 0.05, + "learning_rate": 4.967321193467644e-05, + "loss": 0.9843, + "step": 51618 + }, + { + "epoch": 0.05, + "learning_rate": 4.967319927590095e-05, + "loss": 1.0477, + "step": 51619 + }, + { + "epoch": 0.05, + "learning_rate": 4.96731866168819e-05, + "loss": 1.1079, + "step": 51620 + }, + { + "epoch": 0.05, + "learning_rate": 4.967317395761928e-05, + "loss": 1.2286, + "step": 51621 + }, + { + "epoch": 0.05, + "learning_rate": 4.96731612981131e-05, + "loss": 1.0663, + "step": 51622 + }, + { + "epoch": 0.05, + "learning_rate": 4.967314863836336e-05, + "loss": 1.1788, + "step": 51623 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673135978370054e-05, + "loss": 1.0122, + "step": 51624 + }, + { + "epoch": 0.05, + "learning_rate": 4.967312331813319e-05, + "loss": 1.1037, + "step": 51625 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673110657652756e-05, + "loss": 1.0854, + "step": 51626 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673097996928765e-05, + "loss": 1.0167, + "step": 51627 + }, + { + "epoch": 0.05, + "learning_rate": 4.967308533596121e-05, + "loss": 1.0612, + "step": 51628 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673072674750085e-05, + "loss": 1.2546, + "step": 51629 + }, + { + "epoch": 0.05, + "learning_rate": 4.967306001329541e-05, + "loss": 0.7701, + "step": 51630 + }, + { + "epoch": 0.05, + "learning_rate": 4.967304735159717e-05, + "loss": 1.1011, + "step": 51631 + }, + { + "epoch": 0.05, + "learning_rate": 4.967303468965536e-05, + "loss": 1.0987, + "step": 51632 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673022027469994e-05, + "loss": 0.9272, + "step": 51633 + }, + { + "epoch": 0.05, + "learning_rate": 4.967300936504107e-05, + "loss": 1.3195, + "step": 51634 + }, + { + "epoch": 0.05, + "learning_rate": 4.967299670236858e-05, + "loss": 1.1218, + "step": 51635 + }, + { + "epoch": 0.05, + "learning_rate": 4.967298403945253e-05, + "loss": 1.3276, + "step": 51636 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672971376292914e-05, + "loss": 1.1121, + "step": 51637 + }, + { + "epoch": 0.05, + "learning_rate": 4.967295871288974e-05, + "loss": 0.9125, + "step": 51638 + }, + { + "epoch": 0.05, + "learning_rate": 4.967294604924301e-05, + "loss": 1.2063, + "step": 51639 + }, + { + "epoch": 0.05, + "learning_rate": 4.967293338535271e-05, + "loss": 1.7595, + "step": 51640 + }, + { + "epoch": 0.05, + "learning_rate": 4.967292072121885e-05, + "loss": 1.0064, + "step": 51641 + }, + { + "epoch": 0.05, + "learning_rate": 4.967290805684144e-05, + "loss": 1.2907, + "step": 51642 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672895392220466e-05, + "loss": 1.3186, + "step": 51643 + }, + { + "epoch": 0.05, + "learning_rate": 4.967288272735593e-05, + "loss": 1.1114, + "step": 51644 + }, + { + "epoch": 0.05, + "learning_rate": 4.967287006224782e-05, + "loss": 1.0167, + "step": 51645 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672857396896165e-05, + "loss": 1.1433, + "step": 51646 + }, + { + "epoch": 0.05, + "learning_rate": 4.967284473130095e-05, + "loss": 1.0962, + "step": 51647 + }, + { + "epoch": 0.05, + "learning_rate": 4.967283206546217e-05, + "loss": 1.0868, + "step": 51648 + }, + { + "epoch": 0.05, + "learning_rate": 4.967281939937983e-05, + "loss": 1.023, + "step": 51649 + }, + { + "epoch": 0.05, + "learning_rate": 4.967280673305394e-05, + "loss": 1.1709, + "step": 51650 + }, + { + "epoch": 0.05, + "learning_rate": 4.967279406648448e-05, + "loss": 1.4403, + "step": 51651 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672781399671464e-05, + "loss": 1.036, + "step": 51652 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672768732614885e-05, + "loss": 1.1629, + "step": 51653 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672756065314754e-05, + "loss": 1.2107, + "step": 51654 + }, + { + "epoch": 0.05, + "learning_rate": 4.967274339777106e-05, + "loss": 1.2262, + "step": 51655 + }, + { + "epoch": 0.05, + "learning_rate": 4.96727307299838e-05, + "loss": 0.8381, + "step": 51656 + }, + { + "epoch": 0.05, + "learning_rate": 4.967271806195299e-05, + "loss": 0.8597, + "step": 51657 + }, + { + "epoch": 0.05, + "learning_rate": 4.967270539367862e-05, + "loss": 1.2896, + "step": 51658 + }, + { + "epoch": 0.05, + "learning_rate": 4.967269272516069e-05, + "loss": 1.5411, + "step": 51659 + }, + { + "epoch": 0.05, + "learning_rate": 4.96726800563992e-05, + "loss": 1.3167, + "step": 51660 + }, + { + "epoch": 0.05, + "learning_rate": 4.967266738739416e-05, + "loss": 1.3008, + "step": 51661 + }, + { + "epoch": 0.05, + "learning_rate": 4.967265471814555e-05, + "loss": 0.9603, + "step": 51662 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672642048653394e-05, + "loss": 0.7181, + "step": 51663 + }, + { + "epoch": 0.05, + "learning_rate": 4.967262937891767e-05, + "loss": 0.5362, + "step": 51664 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672616708938394e-05, + "loss": 0.3771, + "step": 51665 + }, + { + "epoch": 0.05, + "learning_rate": 4.967260403871556e-05, + "loss": 0.3867, + "step": 51666 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672591368249164e-05, + "loss": 0.2507, + "step": 51667 + }, + { + "epoch": 0.05, + "learning_rate": 4.967257869753922e-05, + "loss": 0.3741, + "step": 51668 + }, + { + "epoch": 0.05, + "learning_rate": 4.967256602658571e-05, + "loss": 1.0398, + "step": 51669 + }, + { + "epoch": 0.05, + "learning_rate": 4.967255335538864e-05, + "loss": 1.3958, + "step": 51670 + }, + { + "epoch": 0.05, + "learning_rate": 4.967254068394802e-05, + "loss": 1.3549, + "step": 51671 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672528012263844e-05, + "loss": 1.3708, + "step": 51672 + }, + { + "epoch": 0.05, + "learning_rate": 4.967251534033611e-05, + "loss": 0.9993, + "step": 51673 + }, + { + "epoch": 0.05, + "learning_rate": 4.967250266816481e-05, + "loss": 1.3055, + "step": 51674 + }, + { + "epoch": 0.05, + "learning_rate": 4.967248999574996e-05, + "loss": 1.0037, + "step": 51675 + }, + { + "epoch": 0.05, + "learning_rate": 4.967247732309156e-05, + "loss": 1.0175, + "step": 51676 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672464650189596e-05, + "loss": 1.0363, + "step": 51677 + }, + { + "epoch": 0.05, + "learning_rate": 4.967245197704408e-05, + "loss": 0.4171, + "step": 51678 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672439303655005e-05, + "loss": 0.9984, + "step": 51679 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672426630022374e-05, + "loss": 1.4126, + "step": 51680 + }, + { + "epoch": 0.05, + "learning_rate": 4.967241395614619e-05, + "loss": 1.0829, + "step": 51681 + }, + { + "epoch": 0.05, + "learning_rate": 4.967240128202645e-05, + "loss": 0.9459, + "step": 51682 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672388607663154e-05, + "loss": 1.3586, + "step": 51683 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672375933056306e-05, + "loss": 1.0954, + "step": 51684 + }, + { + "epoch": 0.05, + "learning_rate": 4.967236325820589e-05, + "loss": 0.5704, + "step": 51685 + }, + { + "epoch": 0.05, + "learning_rate": 4.967235058311193e-05, + "loss": 1.1308, + "step": 51686 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672337907774415e-05, + "loss": 1.3166, + "step": 51687 + }, + { + "epoch": 0.05, + "learning_rate": 4.967232523219334e-05, + "loss": 1.4554, + "step": 51688 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672312556368714e-05, + "loss": 1.5819, + "step": 51689 + }, + { + "epoch": 0.05, + "learning_rate": 4.967229988030053e-05, + "loss": 1.0802, + "step": 51690 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672287203988796e-05, + "loss": 1.3267, + "step": 51691 + }, + { + "epoch": 0.05, + "learning_rate": 4.96722745274335e-05, + "loss": 1.1801, + "step": 51692 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672261850634655e-05, + "loss": 0.5496, + "step": 51693 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672249173592256e-05, + "loss": 0.8226, + "step": 51694 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672236496306305e-05, + "loss": 1.2306, + "step": 51695 + }, + { + "epoch": 0.05, + "learning_rate": 4.96722238187768e-05, + "loss": 0.8018, + "step": 51696 + }, + { + "epoch": 0.05, + "learning_rate": 4.967221114100373e-05, + "loss": 1.1455, + "step": 51697 + }, + { + "epoch": 0.05, + "learning_rate": 4.967219846298712e-05, + "loss": 1.1927, + "step": 51698 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672185784726946e-05, + "loss": 1.4379, + "step": 51699 + }, + { + "epoch": 0.05, + "learning_rate": 4.967217310622323e-05, + "loss": 1.1379, + "step": 51700 + }, + { + "epoch": 0.05, + "learning_rate": 4.967216042747596e-05, + "loss": 1.1024, + "step": 51701 + }, + { + "epoch": 0.05, + "learning_rate": 4.967214774848512e-05, + "loss": 1.2059, + "step": 51702 + }, + { + "epoch": 0.05, + "learning_rate": 4.967213506925074e-05, + "loss": 1.1096, + "step": 51703 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672122389772804e-05, + "loss": 1.2756, + "step": 51704 + }, + { + "epoch": 0.05, + "learning_rate": 4.967210971005132e-05, + "loss": 1.3283, + "step": 51705 + }, + { + "epoch": 0.05, + "learning_rate": 4.9672097030086285e-05, + "loss": 0.8826, + "step": 51706 + }, + { + "epoch": 0.05, + "learning_rate": 4.967208434987769e-05, + "loss": 1.343, + "step": 51707 + }, + { + "epoch": 0.05, + "learning_rate": 4.967207166942554e-05, + "loss": 1.5738, + "step": 51708 + }, + { + "epoch": 0.05, + "learning_rate": 4.967205898872985e-05, + "loss": 0.7135, + "step": 51709 + }, + { + "epoch": 0.05, + "learning_rate": 4.96720463077906e-05, + "loss": 1.1618, + "step": 51710 + }, + { + "epoch": 0.05, + "learning_rate": 4.967203362660779e-05, + "loss": 1.3252, + "step": 51711 + }, + { + "epoch": 0.05, + "learning_rate": 4.967202094518144e-05, + "loss": 1.2174, + "step": 51712 + }, + { + "epoch": 0.05, + "learning_rate": 4.967200826351154e-05, + "loss": 1.0645, + "step": 51713 + }, + { + "epoch": 0.05, + "learning_rate": 4.967199558159808e-05, + "loss": 0.632, + "step": 51714 + }, + { + "epoch": 0.05, + "learning_rate": 4.967198289944107e-05, + "loss": 0.6852, + "step": 51715 + }, + { + "epoch": 0.05, + "learning_rate": 4.967197021704051e-05, + "loss": 0.4618, + "step": 51716 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671957534396404e-05, + "loss": 1.156, + "step": 51717 + }, + { + "epoch": 0.05, + "learning_rate": 4.967194485150874e-05, + "loss": 1.2968, + "step": 51718 + }, + { + "epoch": 0.05, + "learning_rate": 4.967193216837753e-05, + "loss": 1.1674, + "step": 51719 + }, + { + "epoch": 0.05, + "learning_rate": 4.967191948500276e-05, + "loss": 1.2086, + "step": 51720 + }, + { + "epoch": 0.05, + "learning_rate": 4.967190680138445e-05, + "loss": 0.9068, + "step": 51721 + }, + { + "epoch": 0.05, + "learning_rate": 4.967189411752258e-05, + "loss": 0.9993, + "step": 51722 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671881433417167e-05, + "loss": 0.7994, + "step": 51723 + }, + { + "epoch": 0.05, + "learning_rate": 4.967186874906819e-05, + "loss": 1.3346, + "step": 51724 + }, + { + "epoch": 0.05, + "learning_rate": 4.967185606447568e-05, + "loss": 1.4514, + "step": 51725 + }, + { + "epoch": 0.05, + "learning_rate": 4.967184337963962e-05, + "loss": 1.0737, + "step": 51726 + }, + { + "epoch": 0.05, + "learning_rate": 4.967183069455999e-05, + "loss": 1.2009, + "step": 51727 + }, + { + "epoch": 0.05, + "learning_rate": 4.967181800923683e-05, + "loss": 0.9337, + "step": 51728 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671805323670115e-05, + "loss": 0.8403, + "step": 51729 + }, + { + "epoch": 0.05, + "learning_rate": 4.967179263785985e-05, + "loss": 1.1101, + "step": 51730 + }, + { + "epoch": 0.05, + "learning_rate": 4.967177995180603e-05, + "loss": 1.2796, + "step": 51731 + }, + { + "epoch": 0.05, + "learning_rate": 4.967176726550866e-05, + "loss": 1.2928, + "step": 51732 + }, + { + "epoch": 0.05, + "learning_rate": 4.967175457896776e-05, + "loss": 1.251, + "step": 51733 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671741892183286e-05, + "loss": 1.0282, + "step": 51734 + }, + { + "epoch": 0.05, + "learning_rate": 4.967172920515527e-05, + "loss": 1.2694, + "step": 51735 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671716517883714e-05, + "loss": 0.9206, + "step": 51736 + }, + { + "epoch": 0.05, + "learning_rate": 4.96717038303686e-05, + "loss": 1.2255, + "step": 51737 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671691142609946e-05, + "loss": 0.9271, + "step": 51738 + }, + { + "epoch": 0.05, + "learning_rate": 4.967167845460773e-05, + "loss": 1.4877, + "step": 51739 + }, + { + "epoch": 0.05, + "learning_rate": 4.967166576636198e-05, + "loss": 0.9717, + "step": 51740 + }, + { + "epoch": 0.05, + "learning_rate": 4.967165307787267e-05, + "loss": 0.9876, + "step": 51741 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671640389139814e-05, + "loss": 1.0732, + "step": 51742 + }, + { + "epoch": 0.05, + "learning_rate": 4.967162770016342e-05, + "loss": 1.0829, + "step": 51743 + }, + { + "epoch": 0.05, + "learning_rate": 4.967161501094347e-05, + "loss": 0.5115, + "step": 51744 + }, + { + "epoch": 0.05, + "learning_rate": 4.967160232147998e-05, + "loss": 1.0612, + "step": 51745 + }, + { + "epoch": 0.05, + "learning_rate": 4.967158963177293e-05, + "loss": 0.8973, + "step": 51746 + }, + { + "epoch": 0.05, + "learning_rate": 4.967157694182234e-05, + "loss": 1.2944, + "step": 51747 + }, + { + "epoch": 0.05, + "learning_rate": 4.96715642516282e-05, + "loss": 1.2105, + "step": 51748 + }, + { + "epoch": 0.05, + "learning_rate": 4.967155156119052e-05, + "loss": 0.8171, + "step": 51749 + }, + { + "epoch": 0.05, + "learning_rate": 4.967153887050928e-05, + "loss": 0.8203, + "step": 51750 + }, + { + "epoch": 0.05, + "learning_rate": 4.96715261795845e-05, + "loss": 1.6517, + "step": 51751 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671513488416175e-05, + "loss": 1.3989, + "step": 51752 + }, + { + "epoch": 0.05, + "learning_rate": 4.96715007970043e-05, + "loss": 0.6272, + "step": 51753 + }, + { + "epoch": 0.05, + "learning_rate": 4.967148810534888e-05, + "loss": 0.4035, + "step": 51754 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671475413449906e-05, + "loss": 0.7503, + "step": 51755 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671462721307396e-05, + "loss": 1.201, + "step": 51756 + }, + { + "epoch": 0.05, + "learning_rate": 4.967145002892134e-05, + "loss": 1.1359, + "step": 51757 + }, + { + "epoch": 0.05, + "learning_rate": 4.967143733629173e-05, + "loss": 0.8516, + "step": 51758 + }, + { + "epoch": 0.05, + "learning_rate": 4.967142464341858e-05, + "loss": 0.8251, + "step": 51759 + }, + { + "epoch": 0.05, + "learning_rate": 4.967141195030188e-05, + "loss": 0.8116, + "step": 51760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671399256941635e-05, + "loss": 1.1688, + "step": 51761 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671386563337844e-05, + "loss": 1.2055, + "step": 51762 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671373869490514e-05, + "loss": 1.0374, + "step": 51763 + }, + { + "epoch": 0.05, + "learning_rate": 4.967136117539963e-05, + "loss": 1.0805, + "step": 51764 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671348481065205e-05, + "loss": 1.1294, + "step": 51765 + }, + { + "epoch": 0.05, + "learning_rate": 4.967133578648723e-05, + "loss": 1.2038, + "step": 51766 + }, + { + "epoch": 0.05, + "learning_rate": 4.967132309166572e-05, + "loss": 0.6925, + "step": 51767 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671310396600654e-05, + "loss": 0.9803, + "step": 51768 + }, + { + "epoch": 0.05, + "learning_rate": 4.967129770129205e-05, + "loss": 1.5786, + "step": 51769 + }, + { + "epoch": 0.05, + "learning_rate": 4.96712850057399e-05, + "loss": 0.9764, + "step": 51770 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671272309944206e-05, + "loss": 1.1885, + "step": 51771 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671259613904965e-05, + "loss": 1.1041, + "step": 51772 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671246917622185e-05, + "loss": 1.0034, + "step": 51773 + }, + { + "epoch": 0.05, + "learning_rate": 4.967123422109585e-05, + "loss": 0.1412, + "step": 51774 + }, + { + "epoch": 0.05, + "learning_rate": 4.967122152432598e-05, + "loss": 0.1062, + "step": 51775 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671208827312565e-05, + "loss": 0.088, + "step": 51776 + }, + { + "epoch": 0.05, + "learning_rate": 4.967119613005561e-05, + "loss": 0.0614, + "step": 51777 + }, + { + "epoch": 0.05, + "learning_rate": 4.96711834325551e-05, + "loss": 0.0594, + "step": 51778 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671170734811054e-05, + "loss": 0.0411, + "step": 51779 + }, + { + "epoch": 0.05, + "learning_rate": 4.967115803682347e-05, + "loss": 0.0348, + "step": 51780 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671145338592336e-05, + "loss": 0.249, + "step": 51781 + }, + { + "epoch": 0.05, + "learning_rate": 4.967113264011766e-05, + "loss": 1.4521, + "step": 51782 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671119941399436e-05, + "loss": 0.8714, + "step": 51783 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671107242437674e-05, + "loss": 0.5126, + "step": 51784 + }, + { + "epoch": 0.05, + "learning_rate": 4.967109454323237e-05, + "loss": 0.4819, + "step": 51785 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671081843783526e-05, + "loss": 0.4114, + "step": 51786 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671069144091134e-05, + "loss": 0.3905, + "step": 51787 + }, + { + "epoch": 0.05, + "learning_rate": 4.96710564441552e-05, + "loss": 0.4036, + "step": 51788 + }, + { + "epoch": 0.05, + "learning_rate": 4.9671043743975726e-05, + "loss": 0.4251, + "step": 51789 + }, + { + "epoch": 0.05, + "learning_rate": 4.967103104355271e-05, + "loss": 1.101, + "step": 51790 + }, + { + "epoch": 0.05, + "learning_rate": 4.967101834288615e-05, + "loss": 1.4059, + "step": 51791 + }, + { + "epoch": 0.05, + "learning_rate": 4.967100564197605e-05, + "loss": 0.9449, + "step": 51792 + }, + { + "epoch": 0.05, + "learning_rate": 4.96709929408224e-05, + "loss": 0.8725, + "step": 51793 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670980239425224e-05, + "loss": 0.999, + "step": 51794 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670967537784494e-05, + "loss": 0.947, + "step": 51795 + }, + { + "epoch": 0.05, + "learning_rate": 4.967095483590023e-05, + "loss": 1.1362, + "step": 51796 + }, + { + "epoch": 0.05, + "learning_rate": 4.967094213377242e-05, + "loss": 1.2675, + "step": 51797 + }, + { + "epoch": 0.05, + "learning_rate": 4.967092943140107e-05, + "loss": 1.1512, + "step": 51798 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670916728786175e-05, + "loss": 1.8528, + "step": 51799 + }, + { + "epoch": 0.05, + "learning_rate": 4.967090402592774e-05, + "loss": 1.5481, + "step": 51800 + }, + { + "epoch": 0.05, + "learning_rate": 4.967089132282577e-05, + "loss": 1.7985, + "step": 51801 + }, + { + "epoch": 0.05, + "learning_rate": 4.967087861948026e-05, + "loss": 0.8957, + "step": 51802 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670865915891207e-05, + "loss": 1.4816, + "step": 51803 + }, + { + "epoch": 0.05, + "learning_rate": 4.967085321205861e-05, + "loss": 1.58, + "step": 51804 + }, + { + "epoch": 0.05, + "learning_rate": 4.967084050798248e-05, + "loss": 1.7592, + "step": 51805 + }, + { + "epoch": 0.05, + "learning_rate": 4.96708278036628e-05, + "loss": 1.1631, + "step": 51806 + }, + { + "epoch": 0.05, + "learning_rate": 4.967081509909959e-05, + "loss": 1.4126, + "step": 51807 + }, + { + "epoch": 0.05, + "learning_rate": 4.967080239429284e-05, + "loss": 1.7235, + "step": 51808 + }, + { + "epoch": 0.05, + "learning_rate": 4.967078968924254e-05, + "loss": 1.7359, + "step": 51809 + }, + { + "epoch": 0.05, + "learning_rate": 4.967077698394871e-05, + "loss": 1.2499, + "step": 51810 + }, + { + "epoch": 0.05, + "learning_rate": 4.967076427841133e-05, + "loss": 1.0487, + "step": 51811 + }, + { + "epoch": 0.05, + "learning_rate": 4.967075157263042e-05, + "loss": 1.0361, + "step": 51812 + }, + { + "epoch": 0.05, + "learning_rate": 4.967073886660597e-05, + "loss": 1.0177, + "step": 51813 + }, + { + "epoch": 0.05, + "learning_rate": 4.967072616033798e-05, + "loss": 1.212, + "step": 51814 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670713453826444e-05, + "loss": 1.2086, + "step": 51815 + }, + { + "epoch": 0.05, + "learning_rate": 4.967070074707138e-05, + "loss": 1.1663, + "step": 51816 + }, + { + "epoch": 0.05, + "learning_rate": 4.967068804007277e-05, + "loss": 1.1732, + "step": 51817 + }, + { + "epoch": 0.05, + "learning_rate": 4.967067533283062e-05, + "loss": 1.3008, + "step": 51818 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670662625344945e-05, + "loss": 0.8426, + "step": 51819 + }, + { + "epoch": 0.05, + "learning_rate": 4.967064991761572e-05, + "loss": 0.868, + "step": 51820 + }, + { + "epoch": 0.05, + "learning_rate": 4.967063720964296e-05, + "loss": 0.9535, + "step": 51821 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670624501426656e-05, + "loss": 1.2437, + "step": 51822 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670611792966816e-05, + "loss": 1.0657, + "step": 51823 + }, + { + "epoch": 0.05, + "learning_rate": 4.967059908426344e-05, + "loss": 1.0802, + "step": 51824 + }, + { + "epoch": 0.05, + "learning_rate": 4.967058637531653e-05, + "loss": 1.1057, + "step": 51825 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670573666126074e-05, + "loss": 0.9519, + "step": 51826 + }, + { + "epoch": 0.05, + "learning_rate": 4.967056095669209e-05, + "loss": 1.3203, + "step": 51827 + }, + { + "epoch": 0.05, + "learning_rate": 4.967054824701456e-05, + "loss": 1.2279, + "step": 51828 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670535537093496e-05, + "loss": 1.0398, + "step": 51829 + }, + { + "epoch": 0.05, + "learning_rate": 4.96705228269289e-05, + "loss": 1.0566, + "step": 51830 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670510116520765e-05, + "loss": 1.2058, + "step": 51831 + }, + { + "epoch": 0.05, + "learning_rate": 4.967049740586909e-05, + "loss": 1.1347, + "step": 51832 + }, + { + "epoch": 0.05, + "learning_rate": 4.967048469497388e-05, + "loss": 0.7475, + "step": 51833 + }, + { + "epoch": 0.05, + "learning_rate": 4.967047198383513e-05, + "loss": 0.768, + "step": 51834 + }, + { + "epoch": 0.05, + "learning_rate": 4.967045927245284e-05, + "loss": 1.2189, + "step": 51835 + }, + { + "epoch": 0.05, + "learning_rate": 4.967044656082703e-05, + "loss": 1.2062, + "step": 51836 + }, + { + "epoch": 0.05, + "learning_rate": 4.967043384895767e-05, + "loss": 1.0593, + "step": 51837 + }, + { + "epoch": 0.05, + "learning_rate": 4.967042113684478e-05, + "loss": 0.9167, + "step": 51838 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670408424488346e-05, + "loss": 1.0683, + "step": 51839 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670395711888384e-05, + "loss": 1.1151, + "step": 51840 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670382999044884e-05, + "loss": 1.1355, + "step": 51841 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670370285957845e-05, + "loss": 1.3118, + "step": 51842 + }, + { + "epoch": 0.05, + "learning_rate": 4.967035757262728e-05, + "loss": 1.2826, + "step": 51843 + }, + { + "epoch": 0.05, + "learning_rate": 4.967034485905317e-05, + "loss": 2.4691, + "step": 51844 + }, + { + "epoch": 0.05, + "learning_rate": 4.967033214523553e-05, + "loss": 1.1221, + "step": 51845 + }, + { + "epoch": 0.05, + "learning_rate": 4.967031943117435e-05, + "loss": 0.9834, + "step": 51846 + }, + { + "epoch": 0.05, + "learning_rate": 4.967030671686964e-05, + "loss": 1.3222, + "step": 51847 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670294002321393e-05, + "loss": 1.0021, + "step": 51848 + }, + { + "epoch": 0.05, + "learning_rate": 4.967028128752961e-05, + "loss": 1.0903, + "step": 51849 + }, + { + "epoch": 0.05, + "learning_rate": 4.967026857249429e-05, + "loss": 0.9848, + "step": 51850 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670255857215443e-05, + "loss": 0.8265, + "step": 51851 + }, + { + "epoch": 0.05, + "learning_rate": 4.967024314169305e-05, + "loss": 0.8051, + "step": 51852 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670230425927136e-05, + "loss": 1.2334, + "step": 51853 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670217709917684e-05, + "loss": 1.0843, + "step": 51854 + }, + { + "epoch": 0.05, + "learning_rate": 4.967020499366469e-05, + "loss": 1.1225, + "step": 51855 + }, + { + "epoch": 0.05, + "learning_rate": 4.967019227716817e-05, + "loss": 1.2046, + "step": 51856 + }, + { + "epoch": 0.05, + "learning_rate": 4.967017956042812e-05, + "loss": 0.9454, + "step": 51857 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670166843444534e-05, + "loss": 0.9595, + "step": 51858 + }, + { + "epoch": 0.05, + "learning_rate": 4.967015412621741e-05, + "loss": 0.9908, + "step": 51859 + }, + { + "epoch": 0.05, + "learning_rate": 4.967014140874675e-05, + "loss": 1.2015, + "step": 51860 + }, + { + "epoch": 0.05, + "learning_rate": 4.967012869103256e-05, + "loss": 1.237, + "step": 51861 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670115973074836e-05, + "loss": 1.3267, + "step": 51862 + }, + { + "epoch": 0.05, + "learning_rate": 4.967010325487358e-05, + "loss": 1.187, + "step": 51863 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670090536428795e-05, + "loss": 1.3473, + "step": 51864 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670077817740476e-05, + "loss": 1.2922, + "step": 51865 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670065098808624e-05, + "loss": 0.8503, + "step": 51866 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670052379633234e-05, + "loss": 1.148, + "step": 51867 + }, + { + "epoch": 0.05, + "learning_rate": 4.967003966021432e-05, + "loss": 0.8889, + "step": 51868 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670026940551864e-05, + "loss": 1.1762, + "step": 51869 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670014220645885e-05, + "loss": 1.1403, + "step": 51870 + }, + { + "epoch": 0.05, + "learning_rate": 4.9670001500496366e-05, + "loss": 0.9131, + "step": 51871 + }, + { + "epoch": 0.05, + "learning_rate": 4.966998878010333e-05, + "loss": 0.9412, + "step": 51872 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669976059466746e-05, + "loss": 1.2147, + "step": 51873 + }, + { + "epoch": 0.05, + "learning_rate": 4.966996333858663e-05, + "loss": 1.1071, + "step": 51874 + }, + { + "epoch": 0.05, + "learning_rate": 4.966995061746299e-05, + "loss": 1.2678, + "step": 51875 + }, + { + "epoch": 0.05, + "learning_rate": 4.966993789609582e-05, + "loss": 0.9956, + "step": 51876 + }, + { + "epoch": 0.05, + "learning_rate": 4.966992517448512e-05, + "loss": 0.7154, + "step": 51877 + }, + { + "epoch": 0.05, + "learning_rate": 4.966991245263088e-05, + "loss": 0.7078, + "step": 51878 + }, + { + "epoch": 0.05, + "learning_rate": 4.966989973053312e-05, + "loss": 0.6353, + "step": 51879 + }, + { + "epoch": 0.05, + "learning_rate": 4.966988700819182e-05, + "loss": 0.564, + "step": 51880 + }, + { + "epoch": 0.05, + "learning_rate": 4.966987428560699e-05, + "loss": 0.9677, + "step": 51881 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669861562778635e-05, + "loss": 1.1695, + "step": 51882 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669848839706744e-05, + "loss": 0.9114, + "step": 51883 + }, + { + "epoch": 0.05, + "learning_rate": 4.966983611639133e-05, + "loss": 1.6355, + "step": 51884 + }, + { + "epoch": 0.05, + "learning_rate": 4.966982339283238e-05, + "loss": 1.5078, + "step": 51885 + }, + { + "epoch": 0.05, + "learning_rate": 4.96698106690299e-05, + "loss": 1.1385, + "step": 51886 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669797944983887e-05, + "loss": 1.5239, + "step": 51887 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669785220694355e-05, + "loss": 1.0863, + "step": 51888 + }, + { + "epoch": 0.05, + "learning_rate": 4.966977249616128e-05, + "loss": 0.9125, + "step": 51889 + }, + { + "epoch": 0.05, + "learning_rate": 4.966975977138468e-05, + "loss": 1.1254, + "step": 51890 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669747046364556e-05, + "loss": 1.59, + "step": 51891 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669734321100896e-05, + "loss": 1.0763, + "step": 51892 + }, + { + "epoch": 0.05, + "learning_rate": 4.966972159559371e-05, + "loss": 1.2317, + "step": 51893 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669708869842994e-05, + "loss": 1.0896, + "step": 51894 + }, + { + "epoch": 0.05, + "learning_rate": 4.966969614384875e-05, + "loss": 1.1222, + "step": 51895 + }, + { + "epoch": 0.05, + "learning_rate": 4.966968341761098e-05, + "loss": 1.1927, + "step": 51896 + }, + { + "epoch": 0.05, + "learning_rate": 4.966967069112968e-05, + "loss": 1.1071, + "step": 51897 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669657964404845e-05, + "loss": 1.1032, + "step": 51898 + }, + { + "epoch": 0.05, + "learning_rate": 4.966964523743649e-05, + "loss": 1.543, + "step": 51899 + }, + { + "epoch": 0.05, + "learning_rate": 4.96696325102246e-05, + "loss": 1.406, + "step": 51900 + }, + { + "epoch": 0.05, + "learning_rate": 4.966961978276918e-05, + "loss": 1.2214, + "step": 51901 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669607055070235e-05, + "loss": 1.4692, + "step": 51902 + }, + { + "epoch": 0.05, + "learning_rate": 4.966959432712777e-05, + "loss": 1.3458, + "step": 51903 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669581598941765e-05, + "loss": 1.3144, + "step": 51904 + }, + { + "epoch": 0.05, + "learning_rate": 4.966956887051224e-05, + "loss": 1.2077, + "step": 51905 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669556141839187e-05, + "loss": 0.6827, + "step": 51906 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669543412922606e-05, + "loss": 0.8718, + "step": 51907 + }, + { + "epoch": 0.05, + "learning_rate": 4.966953068376249e-05, + "loss": 1.3898, + "step": 51908 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669517954358855e-05, + "loss": 1.2356, + "step": 51909 + }, + { + "epoch": 0.05, + "learning_rate": 4.966950522471169e-05, + "loss": 1.3237, + "step": 51910 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669492494821e-05, + "loss": 1.283, + "step": 51911 + }, + { + "epoch": 0.05, + "learning_rate": 4.966947976468678e-05, + "loss": 1.0422, + "step": 51912 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669467034309035e-05, + "loss": 1.471, + "step": 51913 + }, + { + "epoch": 0.05, + "learning_rate": 4.966945430368777e-05, + "loss": 1.3171, + "step": 51914 + }, + { + "epoch": 0.05, + "learning_rate": 4.966944157282297e-05, + "loss": 1.0855, + "step": 51915 + }, + { + "epoch": 0.05, + "learning_rate": 4.966942884171465e-05, + "loss": 1.355, + "step": 51916 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669416110362795e-05, + "loss": 1.1228, + "step": 51917 + }, + { + "epoch": 0.05, + "learning_rate": 4.966940337876742e-05, + "loss": 1.0814, + "step": 51918 + }, + { + "epoch": 0.05, + "learning_rate": 4.966939064692852e-05, + "loss": 1.2315, + "step": 51919 + }, + { + "epoch": 0.05, + "learning_rate": 4.966937791484609e-05, + "loss": 1.2801, + "step": 51920 + }, + { + "epoch": 0.05, + "learning_rate": 4.966936518252013e-05, + "loss": 1.0793, + "step": 51921 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669352449950655e-05, + "loss": 1.1382, + "step": 51922 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669339717137655e-05, + "loss": 0.8711, + "step": 51923 + }, + { + "epoch": 0.05, + "learning_rate": 4.966932698408112e-05, + "loss": 0.9714, + "step": 51924 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669314250781066e-05, + "loss": 1.0865, + "step": 51925 + }, + { + "epoch": 0.05, + "learning_rate": 4.966930151723749e-05, + "loss": 1.1672, + "step": 51926 + }, + { + "epoch": 0.05, + "learning_rate": 4.966928878345039e-05, + "loss": 1.061, + "step": 51927 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669276049419754e-05, + "loss": 1.102, + "step": 51928 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669263315145595e-05, + "loss": 1.1924, + "step": 51929 + }, + { + "epoch": 0.05, + "learning_rate": 4.966925058062792e-05, + "loss": 1.142, + "step": 51930 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669237845866715e-05, + "loss": 0.9989, + "step": 51931 + }, + { + "epoch": 0.05, + "learning_rate": 4.966922511086198e-05, + "loss": 0.7338, + "step": 51932 + }, + { + "epoch": 0.05, + "learning_rate": 4.966921237561373e-05, + "loss": 0.5105, + "step": 51933 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669199640121954e-05, + "loss": 1.2873, + "step": 51934 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669186904386656e-05, + "loss": 0.9242, + "step": 51935 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669174168407834e-05, + "loss": 0.6223, + "step": 51936 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669161432185485e-05, + "loss": 0.7926, + "step": 51937 + }, + { + "epoch": 0.05, + "learning_rate": 4.966914869571961e-05, + "loss": 0.6962, + "step": 51938 + }, + { + "epoch": 0.05, + "learning_rate": 4.966913595901021e-05, + "loss": 0.8485, + "step": 51939 + }, + { + "epoch": 0.05, + "learning_rate": 4.96691232220573e-05, + "loss": 1.4391, + "step": 51940 + }, + { + "epoch": 0.05, + "learning_rate": 4.966911048486086e-05, + "loss": 1.1088, + "step": 51941 + }, + { + "epoch": 0.05, + "learning_rate": 4.96690977474209e-05, + "loss": 1.2757, + "step": 51942 + }, + { + "epoch": 0.05, + "learning_rate": 4.9669085009737404e-05, + "loss": 1.5748, + "step": 51943 + }, + { + "epoch": 0.05, + "learning_rate": 4.96690722718104e-05, + "loss": 1.2779, + "step": 51944 + }, + { + "epoch": 0.05, + "learning_rate": 4.966905953363986e-05, + "loss": 1.3259, + "step": 51945 + }, + { + "epoch": 0.05, + "learning_rate": 4.966904679522581e-05, + "loss": 0.9898, + "step": 51946 + }, + { + "epoch": 0.05, + "learning_rate": 4.966903405656823e-05, + "loss": 1.1763, + "step": 51947 + }, + { + "epoch": 0.05, + "learning_rate": 4.966902131766713e-05, + "loss": 0.9795, + "step": 51948 + }, + { + "epoch": 0.05, + "learning_rate": 4.966900857852251e-05, + "loss": 1.1802, + "step": 51949 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668995839134366e-05, + "loss": 0.9445, + "step": 51950 + }, + { + "epoch": 0.05, + "learning_rate": 4.96689830995027e-05, + "loss": 1.2073, + "step": 51951 + }, + { + "epoch": 0.05, + "learning_rate": 4.966897035962751e-05, + "loss": 1.0002, + "step": 51952 + }, + { + "epoch": 0.05, + "learning_rate": 4.96689576195088e-05, + "loss": 1.1224, + "step": 51953 + }, + { + "epoch": 0.05, + "learning_rate": 4.966894487914657e-05, + "loss": 1.2232, + "step": 51954 + }, + { + "epoch": 0.05, + "learning_rate": 4.966893213854081e-05, + "loss": 1.0091, + "step": 51955 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668919397691544e-05, + "loss": 0.8458, + "step": 51956 + }, + { + "epoch": 0.05, + "learning_rate": 4.966890665659875e-05, + "loss": 0.9435, + "step": 51957 + }, + { + "epoch": 0.05, + "learning_rate": 4.966889391526243e-05, + "loss": 0.7872, + "step": 51958 + }, + { + "epoch": 0.05, + "learning_rate": 4.966888117368259e-05, + "loss": 1.0746, + "step": 51959 + }, + { + "epoch": 0.05, + "learning_rate": 4.966886843185924e-05, + "loss": 1.1199, + "step": 51960 + }, + { + "epoch": 0.05, + "learning_rate": 4.966885568979236e-05, + "loss": 0.5517, + "step": 51961 + }, + { + "epoch": 0.05, + "learning_rate": 4.966884294748196e-05, + "loss": 1.1014, + "step": 51962 + }, + { + "epoch": 0.05, + "learning_rate": 4.966883020492804e-05, + "loss": 1.1456, + "step": 51963 + }, + { + "epoch": 0.05, + "learning_rate": 4.96688174621306e-05, + "loss": 1.0376, + "step": 51964 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668804719089636e-05, + "loss": 1.5457, + "step": 51965 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668791975805154e-05, + "loss": 0.9193, + "step": 51966 + }, + { + "epoch": 0.05, + "learning_rate": 4.966877923227716e-05, + "loss": 0.9254, + "step": 51967 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668766488505634e-05, + "loss": 1.3298, + "step": 51968 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668753744490596e-05, + "loss": 1.4031, + "step": 51969 + }, + { + "epoch": 0.05, + "learning_rate": 4.966874100023204e-05, + "loss": 0.9426, + "step": 51970 + }, + { + "epoch": 0.05, + "learning_rate": 4.966872825572996e-05, + "loss": 1.1783, + "step": 51971 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668715510984364e-05, + "loss": 1.0086, + "step": 51972 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668702765995245e-05, + "loss": 0.9708, + "step": 51973 + }, + { + "epoch": 0.05, + "learning_rate": 4.966869002076261e-05, + "loss": 1.3605, + "step": 51974 + }, + { + "epoch": 0.05, + "learning_rate": 4.966867727528646e-05, + "loss": 0.8443, + "step": 51975 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668664529566777e-05, + "loss": 1.3974, + "step": 51976 + }, + { + "epoch": 0.05, + "learning_rate": 4.966865178360359e-05, + "loss": 1.4757, + "step": 51977 + }, + { + "epoch": 0.05, + "learning_rate": 4.966863903739688e-05, + "loss": 1.3314, + "step": 51978 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668626290946654e-05, + "loss": 1.2591, + "step": 51979 + }, + { + "epoch": 0.05, + "learning_rate": 4.96686135442529e-05, + "loss": 1.1451, + "step": 51980 + }, + { + "epoch": 0.05, + "learning_rate": 4.966860079731563e-05, + "loss": 1.214, + "step": 51981 + }, + { + "epoch": 0.05, + "learning_rate": 4.966858805013485e-05, + "loss": 1.5197, + "step": 51982 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668575302710553e-05, + "loss": 1.2746, + "step": 51983 + }, + { + "epoch": 0.05, + "learning_rate": 4.966856255504273e-05, + "loss": 1.3522, + "step": 51984 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668549807131395e-05, + "loss": 1.2913, + "step": 51985 + }, + { + "epoch": 0.05, + "learning_rate": 4.966853705897654e-05, + "loss": 0.9383, + "step": 51986 + }, + { + "epoch": 0.05, + "learning_rate": 4.966852431057817e-05, + "loss": 1.0891, + "step": 51987 + }, + { + "epoch": 0.05, + "learning_rate": 4.966851156193628e-05, + "loss": 1.3981, + "step": 51988 + }, + { + "epoch": 0.05, + "learning_rate": 4.966849881305087e-05, + "loss": 0.9833, + "step": 51989 + }, + { + "epoch": 0.05, + "learning_rate": 4.966848606392195e-05, + "loss": 1.1908, + "step": 51990 + }, + { + "epoch": 0.05, + "learning_rate": 4.966847331454951e-05, + "loss": 1.0485, + "step": 51991 + }, + { + "epoch": 0.05, + "learning_rate": 4.966846056493355e-05, + "loss": 1.1396, + "step": 51992 + }, + { + "epoch": 0.05, + "learning_rate": 4.966844781507408e-05, + "loss": 1.1453, + "step": 51993 + }, + { + "epoch": 0.05, + "learning_rate": 4.966843506497109e-05, + "loss": 1.1722, + "step": 51994 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668422314624575e-05, + "loss": 0.8722, + "step": 51995 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668409564034555e-05, + "loss": 0.5043, + "step": 51996 + }, + { + "epoch": 0.05, + "learning_rate": 4.966839681320101e-05, + "loss": 1.159, + "step": 51997 + }, + { + "epoch": 0.05, + "learning_rate": 4.966838406212396e-05, + "loss": 1.2168, + "step": 51998 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668371310803386e-05, + "loss": 0.9558, + "step": 51999 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668358559239306e-05, + "loss": 1.5304, + "step": 52000 + }, + { + "epoch": 0.05, + "eval_loss": 1.0703086853027344, + "eval_runtime": 125.4565, + "eval_samples_per_second": 11.04, + "eval_steps_per_second": 5.524, + "step": 52000 + }, + { + "epoch": 0.05, + "learning_rate": 4.96683458074317e-05, + "loss": 1.418, + "step": 52001 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668333055380576e-05, + "loss": 0.7807, + "step": 52002 + }, + { + "epoch": 0.05, + "learning_rate": 4.966832030308595e-05, + "loss": 1.117, + "step": 52003 + }, + { + "epoch": 0.05, + "learning_rate": 4.96683075505478e-05, + "loss": 1.2714, + "step": 52004 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668294797766133e-05, + "loss": 0.7885, + "step": 52005 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668282044740955e-05, + "loss": 1.1122, + "step": 52006 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668269291472266e-05, + "loss": 1.4083, + "step": 52007 + }, + { + "epoch": 0.05, + "learning_rate": 4.966825653796006e-05, + "loss": 1.1875, + "step": 52008 + }, + { + "epoch": 0.05, + "learning_rate": 4.966824378420433e-05, + "loss": 1.2533, + "step": 52009 + }, + { + "epoch": 0.05, + "learning_rate": 4.966823103020509e-05, + "loss": 1.2226, + "step": 52010 + }, + { + "epoch": 0.05, + "learning_rate": 4.966821827596234e-05, + "loss": 0.9889, + "step": 52011 + }, + { + "epoch": 0.05, + "learning_rate": 4.966820552147607e-05, + "loss": 1.1577, + "step": 52012 + }, + { + "epoch": 0.05, + "learning_rate": 4.96681927667463e-05, + "loss": 0.7887, + "step": 52013 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668180011773004e-05, + "loss": 1.145, + "step": 52014 + }, + { + "epoch": 0.05, + "learning_rate": 4.966816725655619e-05, + "loss": 1.0927, + "step": 52015 + }, + { + "epoch": 0.05, + "learning_rate": 4.966815450109587e-05, + "loss": 0.9112, + "step": 52016 + }, + { + "epoch": 0.05, + "learning_rate": 4.966814174539204e-05, + "loss": 1.4039, + "step": 52017 + }, + { + "epoch": 0.05, + "learning_rate": 4.966812898944469e-05, + "loss": 1.4523, + "step": 52018 + }, + { + "epoch": 0.05, + "learning_rate": 4.966811623325382e-05, + "loss": 0.9854, + "step": 52019 + }, + { + "epoch": 0.05, + "learning_rate": 4.966810347681945e-05, + "loss": 1.2305, + "step": 52020 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668090720141556e-05, + "loss": 1.5931, + "step": 52021 + }, + { + "epoch": 0.05, + "learning_rate": 4.966807796322016e-05, + "loss": 1.2519, + "step": 52022 + }, + { + "epoch": 0.05, + "learning_rate": 4.966806520605524e-05, + "loss": 0.9767, + "step": 52023 + }, + { + "epoch": 0.05, + "learning_rate": 4.966805244864682e-05, + "loss": 1.1565, + "step": 52024 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668039690994875e-05, + "loss": 1.7571, + "step": 52025 + }, + { + "epoch": 0.05, + "learning_rate": 4.966802693309942e-05, + "loss": 1.4884, + "step": 52026 + }, + { + "epoch": 0.05, + "learning_rate": 4.9668014174960453e-05, + "loss": 1.3462, + "step": 52027 + }, + { + "epoch": 0.05, + "learning_rate": 4.966800141657798e-05, + "loss": 0.9514, + "step": 52028 + }, + { + "epoch": 0.05, + "learning_rate": 4.966798865795199e-05, + "loss": 1.4721, + "step": 52029 + }, + { + "epoch": 0.05, + "learning_rate": 4.966797589908249e-05, + "loss": 1.1214, + "step": 52030 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667963139969475e-05, + "loss": 1.1559, + "step": 52031 + }, + { + "epoch": 0.05, + "learning_rate": 4.966795038061295e-05, + "loss": 1.1037, + "step": 52032 + }, + { + "epoch": 0.05, + "learning_rate": 4.966793762101292e-05, + "loss": 0.8786, + "step": 52033 + }, + { + "epoch": 0.05, + "learning_rate": 4.966792486116937e-05, + "loss": 0.9768, + "step": 52034 + }, + { + "epoch": 0.05, + "learning_rate": 4.96679121010823e-05, + "loss": 1.0437, + "step": 52035 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667899340751736e-05, + "loss": 1.3013, + "step": 52036 + }, + { + "epoch": 0.05, + "learning_rate": 4.966788658017766e-05, + "loss": 1.093, + "step": 52037 + }, + { + "epoch": 0.05, + "learning_rate": 4.966787381936006e-05, + "loss": 1.1951, + "step": 52038 + }, + { + "epoch": 0.05, + "learning_rate": 4.966786105829896e-05, + "loss": 1.206, + "step": 52039 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667848296994346e-05, + "loss": 1.1404, + "step": 52040 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667835535446214e-05, + "loss": 1.404, + "step": 52041 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667822773654584e-05, + "loss": 1.2113, + "step": 52042 + }, + { + "epoch": 0.05, + "learning_rate": 4.966781001161944e-05, + "loss": 1.4991, + "step": 52043 + }, + { + "epoch": 0.05, + "learning_rate": 4.966779724934079e-05, + "loss": 0.6577, + "step": 52044 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667784486818616e-05, + "loss": 0.3646, + "step": 52045 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667771724052945e-05, + "loss": 0.5426, + "step": 52046 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667758961043756e-05, + "loss": 1.0744, + "step": 52047 + }, + { + "epoch": 0.05, + "learning_rate": 4.966774619779106e-05, + "loss": 1.0151, + "step": 52048 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667733434294855e-05, + "loss": 1.1289, + "step": 52049 + }, + { + "epoch": 0.05, + "learning_rate": 4.966772067055514e-05, + "loss": 1.1372, + "step": 52050 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667707906571914e-05, + "loss": 1.061, + "step": 52051 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667695142345186e-05, + "loss": 1.3647, + "step": 52052 + }, + { + "epoch": 0.05, + "learning_rate": 4.966768237787493e-05, + "loss": 0.911, + "step": 52053 + }, + { + "epoch": 0.05, + "learning_rate": 4.966766961316119e-05, + "loss": 1.1869, + "step": 52054 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667656848203925e-05, + "loss": 1.2912, + "step": 52055 + }, + { + "epoch": 0.05, + "learning_rate": 4.966764408300316e-05, + "loss": 1.1705, + "step": 52056 + }, + { + "epoch": 0.05, + "learning_rate": 4.966763131755888e-05, + "loss": 1.31, + "step": 52057 + }, + { + "epoch": 0.05, + "learning_rate": 4.96676185518711e-05, + "loss": 1.1062, + "step": 52058 + }, + { + "epoch": 0.05, + "learning_rate": 4.96676057859398e-05, + "loss": 0.9691, + "step": 52059 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667593019765005e-05, + "loss": 1.5011, + "step": 52060 + }, + { + "epoch": 0.05, + "learning_rate": 4.966758025334669e-05, + "loss": 1.8592, + "step": 52061 + }, + { + "epoch": 0.05, + "learning_rate": 4.966756748668487e-05, + "loss": 1.8622, + "step": 52062 + }, + { + "epoch": 0.05, + "learning_rate": 4.966755471977954e-05, + "loss": 0.9812, + "step": 52063 + }, + { + "epoch": 0.05, + "learning_rate": 4.966754195263071e-05, + "loss": 0.6723, + "step": 52064 + }, + { + "epoch": 0.05, + "learning_rate": 4.966752918523837e-05, + "loss": 0.7784, + "step": 52065 + }, + { + "epoch": 0.05, + "learning_rate": 4.966751641760252e-05, + "loss": 1.0331, + "step": 52066 + }, + { + "epoch": 0.05, + "learning_rate": 4.966750364972316e-05, + "loss": 0.9092, + "step": 52067 + }, + { + "epoch": 0.05, + "learning_rate": 4.96674908816003e-05, + "loss": 0.8889, + "step": 52068 + }, + { + "epoch": 0.05, + "learning_rate": 4.966747811323393e-05, + "loss": 1.3366, + "step": 52069 + }, + { + "epoch": 0.05, + "learning_rate": 4.966746534462405e-05, + "loss": 1.1909, + "step": 52070 + }, + { + "epoch": 0.05, + "learning_rate": 4.966745257577067e-05, + "loss": 1.2708, + "step": 52071 + }, + { + "epoch": 0.05, + "learning_rate": 4.966743980667378e-05, + "loss": 1.0446, + "step": 52072 + }, + { + "epoch": 0.05, + "learning_rate": 4.966742703733338e-05, + "loss": 1.3055, + "step": 52073 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667414267749476e-05, + "loss": 0.9848, + "step": 52074 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667401497922064e-05, + "loss": 1.0932, + "step": 52075 + }, + { + "epoch": 0.05, + "learning_rate": 4.966738872785115e-05, + "loss": 3.5206, + "step": 52076 + }, + { + "epoch": 0.05, + "learning_rate": 4.966737595753672e-05, + "loss": 0.9491, + "step": 52077 + }, + { + "epoch": 0.05, + "learning_rate": 4.96673631869788e-05, + "loss": 0.4289, + "step": 52078 + }, + { + "epoch": 0.05, + "learning_rate": 4.966735041617736e-05, + "loss": 1.0219, + "step": 52079 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667337645132425e-05, + "loss": 0.7158, + "step": 52080 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667324873843976e-05, + "loss": 0.9519, + "step": 52081 + }, + { + "epoch": 0.05, + "learning_rate": 4.966731210231202e-05, + "loss": 1.0215, + "step": 52082 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667299330536564e-05, + "loss": 1.0709, + "step": 52083 + }, + { + "epoch": 0.05, + "learning_rate": 4.966728655851761e-05, + "loss": 0.8784, + "step": 52084 + }, + { + "epoch": 0.05, + "learning_rate": 4.966727378625514e-05, + "loss": 0.8212, + "step": 52085 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667261013749165e-05, + "loss": 0.9951, + "step": 52086 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667248240999694e-05, + "loss": 1.2297, + "step": 52087 + }, + { + "epoch": 0.05, + "learning_rate": 4.966723546800671e-05, + "loss": 1.1806, + "step": 52088 + }, + { + "epoch": 0.05, + "learning_rate": 4.966722269477022e-05, + "loss": 1.1241, + "step": 52089 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667209921290234e-05, + "loss": 0.8857, + "step": 52090 + }, + { + "epoch": 0.05, + "learning_rate": 4.966719714756674e-05, + "loss": 0.8648, + "step": 52091 + }, + { + "epoch": 0.05, + "learning_rate": 4.966718437359974e-05, + "loss": 0.8054, + "step": 52092 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667171599389236e-05, + "loss": 1.3571, + "step": 52093 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667158824935236e-05, + "loss": 1.521, + "step": 52094 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667146050237724e-05, + "loss": 0.9733, + "step": 52095 + }, + { + "epoch": 0.05, + "learning_rate": 4.966713327529671e-05, + "loss": 1.1643, + "step": 52096 + }, + { + "epoch": 0.05, + "learning_rate": 4.966712050011219e-05, + "loss": 0.8154, + "step": 52097 + }, + { + "epoch": 0.05, + "learning_rate": 4.966710772468417e-05, + "loss": 1.2312, + "step": 52098 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667094949012646e-05, + "loss": 0.9694, + "step": 52099 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667082173097615e-05, + "loss": 0.9098, + "step": 52100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667069396939086e-05, + "loss": 0.874, + "step": 52101 + }, + { + "epoch": 0.05, + "learning_rate": 4.966705662053705e-05, + "loss": 0.8626, + "step": 52102 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667043843891514e-05, + "loss": 0.8463, + "step": 52103 + }, + { + "epoch": 0.05, + "learning_rate": 4.966703106700248e-05, + "loss": 0.8346, + "step": 52104 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667018289869935e-05, + "loss": 0.8508, + "step": 52105 + }, + { + "epoch": 0.05, + "learning_rate": 4.9667005512493894e-05, + "loss": 0.8486, + "step": 52106 + }, + { + "epoch": 0.05, + "learning_rate": 4.966699273487435e-05, + "loss": 0.8424, + "step": 52107 + }, + { + "epoch": 0.05, + "learning_rate": 4.966697995701129e-05, + "loss": 0.8382, + "step": 52108 + }, + { + "epoch": 0.05, + "learning_rate": 4.966696717890475e-05, + "loss": 0.8449, + "step": 52109 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666954400554696e-05, + "loss": 0.8236, + "step": 52110 + }, + { + "epoch": 0.05, + "learning_rate": 4.966694162196114e-05, + "loss": 0.8132, + "step": 52111 + }, + { + "epoch": 0.05, + "learning_rate": 4.966692884312408e-05, + "loss": 0.8249, + "step": 52112 + }, + { + "epoch": 0.05, + "learning_rate": 4.966691606404353e-05, + "loss": 0.8176, + "step": 52113 + }, + { + "epoch": 0.05, + "learning_rate": 4.966690328471947e-05, + "loss": 0.8056, + "step": 52114 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666890505151906e-05, + "loss": 0.8572, + "step": 52115 + }, + { + "epoch": 0.05, + "learning_rate": 4.966687772534085e-05, + "loss": 0.8293, + "step": 52116 + }, + { + "epoch": 0.05, + "learning_rate": 4.966686494528629e-05, + "loss": 0.8326, + "step": 52117 + }, + { + "epoch": 0.05, + "learning_rate": 4.966685216498823e-05, + "loss": 0.8097, + "step": 52118 + }, + { + "epoch": 0.05, + "learning_rate": 4.966683938444666e-05, + "loss": 0.8164, + "step": 52119 + }, + { + "epoch": 0.05, + "learning_rate": 4.96668266036616e-05, + "loss": 0.835, + "step": 52120 + }, + { + "epoch": 0.05, + "learning_rate": 4.966681382263303e-05, + "loss": 0.8242, + "step": 52121 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666801041360964e-05, + "loss": 0.8089, + "step": 52122 + }, + { + "epoch": 0.05, + "learning_rate": 4.96667882598454e-05, + "loss": 0.8099, + "step": 52123 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666775478086336e-05, + "loss": 0.8041, + "step": 52124 + }, + { + "epoch": 0.05, + "learning_rate": 4.966676269608377e-05, + "loss": 0.8352, + "step": 52125 + }, + { + "epoch": 0.05, + "learning_rate": 4.966674991383771e-05, + "loss": 0.8237, + "step": 52126 + }, + { + "epoch": 0.05, + "learning_rate": 4.966673713134814e-05, + "loss": 0.8232, + "step": 52127 + }, + { + "epoch": 0.05, + "learning_rate": 4.966672434861508e-05, + "loss": 0.8138, + "step": 52128 + }, + { + "epoch": 0.05, + "learning_rate": 4.966671156563851e-05, + "loss": 0.8221, + "step": 52129 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666698782418446e-05, + "loss": 0.8096, + "step": 52130 + }, + { + "epoch": 0.05, + "learning_rate": 4.966668599895489e-05, + "loss": 0.8267, + "step": 52131 + }, + { + "epoch": 0.05, + "learning_rate": 4.966667321524782e-05, + "loss": 0.8181, + "step": 52132 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666660431297265e-05, + "loss": 0.8229, + "step": 52133 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666647647103205e-05, + "loss": 0.8095, + "step": 52134 + }, + { + "epoch": 0.05, + "learning_rate": 4.966663486266564e-05, + "loss": 0.8243, + "step": 52135 + }, + { + "epoch": 0.05, + "learning_rate": 4.966662207798458e-05, + "loss": 0.8162, + "step": 52136 + }, + { + "epoch": 0.05, + "learning_rate": 4.966660929306003e-05, + "loss": 0.828, + "step": 52137 + }, + { + "epoch": 0.05, + "learning_rate": 4.966659650789197e-05, + "loss": 0.9173, + "step": 52138 + }, + { + "epoch": 0.05, + "learning_rate": 4.966658372248042e-05, + "loss": 1.3388, + "step": 52139 + }, + { + "epoch": 0.05, + "learning_rate": 4.966657093682537e-05, + "loss": 0.7117, + "step": 52140 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666558150926826e-05, + "loss": 0.3596, + "step": 52141 + }, + { + "epoch": 0.05, + "learning_rate": 4.966654536478478e-05, + "loss": 1.2451, + "step": 52142 + }, + { + "epoch": 0.05, + "learning_rate": 4.966653257839923e-05, + "loss": 1.1854, + "step": 52143 + }, + { + "epoch": 0.05, + "learning_rate": 4.966651979177019e-05, + "loss": 0.9259, + "step": 52144 + }, + { + "epoch": 0.05, + "learning_rate": 4.966650700489765e-05, + "loss": 0.9478, + "step": 52145 + }, + { + "epoch": 0.05, + "learning_rate": 4.966649421778161e-05, + "loss": 0.6892, + "step": 52146 + }, + { + "epoch": 0.05, + "learning_rate": 4.966648143042208e-05, + "loss": 1.1562, + "step": 52147 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666468642819045e-05, + "loss": 0.8988, + "step": 52148 + }, + { + "epoch": 0.05, + "learning_rate": 4.966645585497252e-05, + "loss": 1.2359, + "step": 52149 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666443066882494e-05, + "loss": 1.0505, + "step": 52150 + }, + { + "epoch": 0.05, + "learning_rate": 4.966643027854897e-05, + "loss": 1.071, + "step": 52151 + }, + { + "epoch": 0.05, + "learning_rate": 4.966641748997196e-05, + "loss": 1.3864, + "step": 52152 + }, + { + "epoch": 0.05, + "learning_rate": 4.966640470115144e-05, + "loss": 1.2015, + "step": 52153 + }, + { + "epoch": 0.05, + "learning_rate": 4.966639191208743e-05, + "loss": 1.3605, + "step": 52154 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666379122779925e-05, + "loss": 1.1425, + "step": 52155 + }, + { + "epoch": 0.05, + "learning_rate": 4.966636633322892e-05, + "loss": 1.2815, + "step": 52156 + }, + { + "epoch": 0.05, + "learning_rate": 4.966635354343442e-05, + "loss": 1.0937, + "step": 52157 + }, + { + "epoch": 0.05, + "learning_rate": 4.966634075339642e-05, + "loss": 1.4193, + "step": 52158 + }, + { + "epoch": 0.05, + "learning_rate": 4.966632796311493e-05, + "loss": 1.3705, + "step": 52159 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666315172589946e-05, + "loss": 1.3058, + "step": 52160 + }, + { + "epoch": 0.05, + "learning_rate": 4.966630238182146e-05, + "loss": 1.1835, + "step": 52161 + }, + { + "epoch": 0.05, + "learning_rate": 4.966628959080949e-05, + "loss": 0.7187, + "step": 52162 + }, + { + "epoch": 0.05, + "learning_rate": 4.966627679955401e-05, + "loss": 0.542, + "step": 52163 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666264008055043e-05, + "loss": 0.5875, + "step": 52164 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666251216312586e-05, + "loss": 0.5724, + "step": 52165 + }, + { + "epoch": 0.05, + "learning_rate": 4.966623842432663e-05, + "loss": 0.4874, + "step": 52166 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666225632097175e-05, + "loss": 0.482, + "step": 52167 + }, + { + "epoch": 0.05, + "learning_rate": 4.966621283962423e-05, + "loss": 0.3774, + "step": 52168 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666200046907786e-05, + "loss": 0.4349, + "step": 52169 + }, + { + "epoch": 0.05, + "learning_rate": 4.966618725394785e-05, + "loss": 0.378, + "step": 52170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666174460744416e-05, + "loss": 0.4538, + "step": 52171 + }, + { + "epoch": 0.05, + "learning_rate": 4.96661616672975e-05, + "loss": 0.4397, + "step": 52172 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666148873607075e-05, + "loss": 0.4044, + "step": 52173 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666136079673166e-05, + "loss": 0.3852, + "step": 52174 + }, + { + "epoch": 0.05, + "learning_rate": 4.966612328549576e-05, + "loss": 0.4898, + "step": 52175 + }, + { + "epoch": 0.05, + "learning_rate": 4.966611049107486e-05, + "loss": 0.4931, + "step": 52176 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666097696410466e-05, + "loss": 0.4552, + "step": 52177 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666084901502585e-05, + "loss": 0.4384, + "step": 52178 + }, + { + "epoch": 0.05, + "learning_rate": 4.96660721063512e-05, + "loss": 0.3538, + "step": 52179 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666059310956335e-05, + "loss": 0.4165, + "step": 52180 + }, + { + "epoch": 0.05, + "learning_rate": 4.9666046515317966e-05, + "loss": 0.4181, + "step": 52181 + }, + { + "epoch": 0.05, + "learning_rate": 4.966603371943611e-05, + "loss": 0.4447, + "step": 52182 + }, + { + "epoch": 0.05, + "learning_rate": 4.966602092331076e-05, + "loss": 0.4237, + "step": 52183 + }, + { + "epoch": 0.05, + "learning_rate": 4.966600812694192e-05, + "loss": 0.4903, + "step": 52184 + }, + { + "epoch": 0.05, + "learning_rate": 4.966599533032958e-05, + "loss": 0.4106, + "step": 52185 + }, + { + "epoch": 0.05, + "learning_rate": 4.966598253347376e-05, + "loss": 0.3703, + "step": 52186 + }, + { + "epoch": 0.05, + "learning_rate": 4.966596973637443e-05, + "loss": 0.3948, + "step": 52187 + }, + { + "epoch": 0.05, + "learning_rate": 4.966595693903162e-05, + "loss": 0.4204, + "step": 52188 + }, + { + "epoch": 0.05, + "learning_rate": 4.966594414144532e-05, + "loss": 0.4035, + "step": 52189 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665931343615523e-05, + "loss": 0.354, + "step": 52190 + }, + { + "epoch": 0.05, + "learning_rate": 4.966591854554223e-05, + "loss": 0.3821, + "step": 52191 + }, + { + "epoch": 0.05, + "learning_rate": 4.966590574722546e-05, + "loss": 0.4056, + "step": 52192 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665892948665184e-05, + "loss": 0.419, + "step": 52193 + }, + { + "epoch": 0.05, + "learning_rate": 4.966588014986142e-05, + "loss": 0.3872, + "step": 52194 + }, + { + "epoch": 0.05, + "learning_rate": 4.966586735081417e-05, + "loss": 0.3611, + "step": 52195 + }, + { + "epoch": 0.05, + "learning_rate": 4.966585455152343e-05, + "loss": 0.3782, + "step": 52196 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665841751989197e-05, + "loss": 0.3899, + "step": 52197 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665828952211466e-05, + "loss": 0.4095, + "step": 52198 + }, + { + "epoch": 0.05, + "learning_rate": 4.966581615219025e-05, + "loss": 0.3554, + "step": 52199 + }, + { + "epoch": 0.05, + "learning_rate": 4.966580335192554e-05, + "loss": 0.3381, + "step": 52200 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665790551417345e-05, + "loss": 0.388, + "step": 52201 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665777750665655e-05, + "loss": 0.3247, + "step": 52202 + }, + { + "epoch": 0.05, + "learning_rate": 4.966576494967048e-05, + "loss": 0.3077, + "step": 52203 + }, + { + "epoch": 0.05, + "learning_rate": 4.966575214843181e-05, + "loss": 0.3215, + "step": 52204 + }, + { + "epoch": 0.05, + "learning_rate": 4.966573934694966e-05, + "loss": 0.3591, + "step": 52205 + }, + { + "epoch": 0.05, + "learning_rate": 4.966572654522401e-05, + "loss": 0.3954, + "step": 52206 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665713743254874e-05, + "loss": 0.3932, + "step": 52207 + }, + { + "epoch": 0.05, + "learning_rate": 4.966570094104224e-05, + "loss": 0.4128, + "step": 52208 + }, + { + "epoch": 0.05, + "learning_rate": 4.966568813858613e-05, + "loss": 0.397, + "step": 52209 + }, + { + "epoch": 0.05, + "learning_rate": 4.966567533588652e-05, + "loss": 0.4063, + "step": 52210 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665662532943424e-05, + "loss": 0.3902, + "step": 52211 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665649729756846e-05, + "loss": 0.367, + "step": 52212 + }, + { + "epoch": 0.05, + "learning_rate": 4.966563692632678e-05, + "loss": 0.3387, + "step": 52213 + }, + { + "epoch": 0.05, + "learning_rate": 4.966562412265321e-05, + "loss": 0.3726, + "step": 52214 + }, + { + "epoch": 0.05, + "learning_rate": 4.966561131873616e-05, + "loss": 0.2915, + "step": 52215 + }, + { + "epoch": 0.05, + "learning_rate": 4.966559851457562e-05, + "loss": 0.2401, + "step": 52216 + }, + { + "epoch": 0.05, + "learning_rate": 4.966558571017159e-05, + "loss": 0.3201, + "step": 52217 + }, + { + "epoch": 0.05, + "learning_rate": 4.966557290552408e-05, + "loss": 0.2775, + "step": 52218 + }, + { + "epoch": 0.05, + "learning_rate": 4.966556010063308e-05, + "loss": 0.3052, + "step": 52219 + }, + { + "epoch": 0.05, + "learning_rate": 4.966554729549858e-05, + "loss": 0.3583, + "step": 52220 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665534490120606e-05, + "loss": 0.3936, + "step": 52221 + }, + { + "epoch": 0.05, + "learning_rate": 4.966552168449914e-05, + "loss": 0.3901, + "step": 52222 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665508878634184e-05, + "loss": 1.2757, + "step": 52223 + }, + { + "epoch": 0.05, + "learning_rate": 4.966549607252574e-05, + "loss": 1.0228, + "step": 52224 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665483266173816e-05, + "loss": 1.0983, + "step": 52225 + }, + { + "epoch": 0.05, + "learning_rate": 4.96654704595784e-05, + "loss": 1.4193, + "step": 52226 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665457652739496e-05, + "loss": 1.0788, + "step": 52227 + }, + { + "epoch": 0.05, + "learning_rate": 4.96654448456571e-05, + "loss": 0.8148, + "step": 52228 + }, + { + "epoch": 0.05, + "learning_rate": 4.966543203833123e-05, + "loss": 1.2267, + "step": 52229 + }, + { + "epoch": 0.05, + "learning_rate": 4.966541923076186e-05, + "loss": 1.1171, + "step": 52230 + }, + { + "epoch": 0.05, + "learning_rate": 4.966540642294901e-05, + "loss": 1.5675, + "step": 52231 + }, + { + "epoch": 0.05, + "learning_rate": 4.966539361489267e-05, + "loss": 1.2103, + "step": 52232 + }, + { + "epoch": 0.05, + "learning_rate": 4.966538080659285e-05, + "loss": 1.313, + "step": 52233 + }, + { + "epoch": 0.05, + "learning_rate": 4.966536799804954e-05, + "loss": 1.3657, + "step": 52234 + }, + { + "epoch": 0.05, + "learning_rate": 4.966535518926274e-05, + "loss": 1.5412, + "step": 52235 + }, + { + "epoch": 0.05, + "learning_rate": 4.966534238023246e-05, + "loss": 1.2317, + "step": 52236 + }, + { + "epoch": 0.05, + "learning_rate": 4.966532957095869e-05, + "loss": 2.092, + "step": 52237 + }, + { + "epoch": 0.05, + "learning_rate": 4.966531676144144e-05, + "loss": 1.0445, + "step": 52238 + }, + { + "epoch": 0.05, + "learning_rate": 4.96653039516807e-05, + "loss": 1.2456, + "step": 52239 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665291141676475e-05, + "loss": 1.1981, + "step": 52240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665278331428766e-05, + "loss": 1.3247, + "step": 52241 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665265520937566e-05, + "loss": 1.2126, + "step": 52242 + }, + { + "epoch": 0.05, + "learning_rate": 4.966525271020289e-05, + "loss": 1.0957, + "step": 52243 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665239899224726e-05, + "loss": 1.3608, + "step": 52244 + }, + { + "epoch": 0.05, + "learning_rate": 4.966522708800307e-05, + "loss": 1.1038, + "step": 52245 + }, + { + "epoch": 0.05, + "learning_rate": 4.966521427653793e-05, + "loss": 1.1411, + "step": 52246 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665201464829316e-05, + "loss": 1.3164, + "step": 52247 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665188652877214e-05, + "loss": 1.1648, + "step": 52248 + }, + { + "epoch": 0.05, + "learning_rate": 4.966517584068162e-05, + "loss": 1.1443, + "step": 52249 + }, + { + "epoch": 0.05, + "learning_rate": 4.966516302824255e-05, + "loss": 1.6526, + "step": 52250 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665150215559995e-05, + "loss": 1.3271, + "step": 52251 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665137402633955e-05, + "loss": 1.4214, + "step": 52252 + }, + { + "epoch": 0.05, + "learning_rate": 4.966512458946443e-05, + "loss": 1.1782, + "step": 52253 + }, + { + "epoch": 0.05, + "learning_rate": 4.966511177605142e-05, + "loss": 0.9843, + "step": 52254 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665098962394926e-05, + "loss": 0.9861, + "step": 52255 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665086148494954e-05, + "loss": 1.6322, + "step": 52256 + }, + { + "epoch": 0.05, + "learning_rate": 4.966507333435149e-05, + "loss": 1.1624, + "step": 52257 + }, + { + "epoch": 0.05, + "learning_rate": 4.966506051996455e-05, + "loss": 1.2777, + "step": 52258 + }, + { + "epoch": 0.05, + "learning_rate": 4.966504770533412e-05, + "loss": 1.4121, + "step": 52259 + }, + { + "epoch": 0.05, + "learning_rate": 4.966503489046022e-05, + "loss": 0.8739, + "step": 52260 + }, + { + "epoch": 0.05, + "learning_rate": 4.966502207534283e-05, + "loss": 0.7016, + "step": 52261 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665009259981955e-05, + "loss": 2.0736, + "step": 52262 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664996444377596e-05, + "loss": 2.2101, + "step": 52263 + }, + { + "epoch": 0.05, + "learning_rate": 4.966498362852976e-05, + "loss": 2.0734, + "step": 52264 + }, + { + "epoch": 0.05, + "learning_rate": 4.966497081243844e-05, + "loss": 1.5519, + "step": 52265 + }, + { + "epoch": 0.05, + "learning_rate": 4.966495799610363e-05, + "loss": 1.2127, + "step": 52266 + }, + { + "epoch": 0.05, + "learning_rate": 4.966494517952535e-05, + "loss": 1.0901, + "step": 52267 + }, + { + "epoch": 0.05, + "learning_rate": 4.966493236270358e-05, + "loss": 0.8855, + "step": 52268 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664919545638335e-05, + "loss": 1.4033, + "step": 52269 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664906728329604e-05, + "loss": 1.6805, + "step": 52270 + }, + { + "epoch": 0.05, + "learning_rate": 4.966489391077739e-05, + "loss": 0.8307, + "step": 52271 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664881092981694e-05, + "loss": 1.0869, + "step": 52272 + }, + { + "epoch": 0.05, + "learning_rate": 4.966486827494252e-05, + "loss": 0.7334, + "step": 52273 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664855456659866e-05, + "loss": 0.9782, + "step": 52274 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664842638133725e-05, + "loss": 1.4505, + "step": 52275 + }, + { + "epoch": 0.05, + "learning_rate": 4.966482981936411e-05, + "loss": 0.9972, + "step": 52276 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664817000351e-05, + "loss": 0.9202, + "step": 52277 + }, + { + "epoch": 0.05, + "learning_rate": 4.966480418109443e-05, + "loss": 1.2436, + "step": 52278 + }, + { + "epoch": 0.05, + "learning_rate": 4.966479136159436e-05, + "loss": 1.4621, + "step": 52279 + }, + { + "epoch": 0.05, + "learning_rate": 4.966477854185083e-05, + "loss": 1.0606, + "step": 52280 + }, + { + "epoch": 0.05, + "learning_rate": 4.96647657218638e-05, + "loss": 0.9117, + "step": 52281 + }, + { + "epoch": 0.05, + "learning_rate": 4.96647529016333e-05, + "loss": 1.2618, + "step": 52282 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664740081159324e-05, + "loss": 1.169, + "step": 52283 + }, + { + "epoch": 0.05, + "learning_rate": 4.966472726044186e-05, + "loss": 1.3812, + "step": 52284 + }, + { + "epoch": 0.05, + "learning_rate": 4.966471443948092e-05, + "loss": 1.0927, + "step": 52285 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664701618276496e-05, + "loss": 0.9917, + "step": 52286 + }, + { + "epoch": 0.05, + "learning_rate": 4.96646887968286e-05, + "loss": 0.8905, + "step": 52287 + }, + { + "epoch": 0.05, + "learning_rate": 4.966467597513722e-05, + "loss": 1.4075, + "step": 52288 + }, + { + "epoch": 0.05, + "learning_rate": 4.966466315320236e-05, + "loss": 1.165, + "step": 52289 + }, + { + "epoch": 0.05, + "learning_rate": 4.966465033102403e-05, + "loss": 1.4168, + "step": 52290 + }, + { + "epoch": 0.05, + "learning_rate": 4.966463750860221e-05, + "loss": 1.3754, + "step": 52291 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664624685936914e-05, + "loss": 1.1195, + "step": 52292 + }, + { + "epoch": 0.05, + "learning_rate": 4.966461186302814e-05, + "loss": 1.4041, + "step": 52293 + }, + { + "epoch": 0.05, + "learning_rate": 4.966459903987588e-05, + "loss": 1.0274, + "step": 52294 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664586216480155e-05, + "loss": 1.1988, + "step": 52295 + }, + { + "epoch": 0.05, + "learning_rate": 4.966457339284094e-05, + "loss": 0.9235, + "step": 52296 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664560568958254e-05, + "loss": 1.2991, + "step": 52297 + }, + { + "epoch": 0.05, + "learning_rate": 4.966454774483209e-05, + "loss": 1.316, + "step": 52298 + }, + { + "epoch": 0.05, + "learning_rate": 4.966453492046244e-05, + "loss": 1.1442, + "step": 52299 + }, + { + "epoch": 0.05, + "learning_rate": 4.966452209584932e-05, + "loss": 1.1229, + "step": 52300 + }, + { + "epoch": 0.05, + "learning_rate": 4.966450927099272e-05, + "loss": 1.1125, + "step": 52301 + }, + { + "epoch": 0.05, + "learning_rate": 4.966449644589264e-05, + "loss": 1.2735, + "step": 52302 + }, + { + "epoch": 0.05, + "learning_rate": 4.966448362054908e-05, + "loss": 1.2242, + "step": 52303 + }, + { + "epoch": 0.05, + "learning_rate": 4.966447079496205e-05, + "loss": 1.0751, + "step": 52304 + }, + { + "epoch": 0.05, + "learning_rate": 4.966445796913154e-05, + "loss": 0.986, + "step": 52305 + }, + { + "epoch": 0.05, + "learning_rate": 4.966444514305755e-05, + "loss": 1.0501, + "step": 52306 + }, + { + "epoch": 0.05, + "learning_rate": 4.966443231674009e-05, + "loss": 0.9181, + "step": 52307 + }, + { + "epoch": 0.05, + "learning_rate": 4.966441949017915e-05, + "loss": 0.7686, + "step": 52308 + }, + { + "epoch": 0.05, + "learning_rate": 4.966440666337473e-05, + "loss": 1.3423, + "step": 52309 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664393836326836e-05, + "loss": 1.0103, + "step": 52310 + }, + { + "epoch": 0.05, + "learning_rate": 4.966438100903546e-05, + "loss": 0.8665, + "step": 52311 + }, + { + "epoch": 0.05, + "learning_rate": 4.966436818150062e-05, + "loss": 0.9322, + "step": 52312 + }, + { + "epoch": 0.05, + "learning_rate": 4.966435535372229e-05, + "loss": 0.9754, + "step": 52313 + }, + { + "epoch": 0.05, + "learning_rate": 4.966434252570049e-05, + "loss": 1.2218, + "step": 52314 + }, + { + "epoch": 0.05, + "learning_rate": 4.966432969743522e-05, + "loss": 1.0827, + "step": 52315 + }, + { + "epoch": 0.05, + "learning_rate": 4.966431686892646e-05, + "loss": 1.1135, + "step": 52316 + }, + { + "epoch": 0.05, + "learning_rate": 4.966430404017424e-05, + "loss": 1.052, + "step": 52317 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664291211178534e-05, + "loss": 1.3317, + "step": 52318 + }, + { + "epoch": 0.05, + "learning_rate": 4.966427838193936e-05, + "loss": 1.39, + "step": 52319 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664265552456703e-05, + "loss": 1.1819, + "step": 52320 + }, + { + "epoch": 0.05, + "learning_rate": 4.966425272273058e-05, + "loss": 0.9536, + "step": 52321 + }, + { + "epoch": 0.05, + "learning_rate": 4.966423989276097e-05, + "loss": 0.9333, + "step": 52322 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664227062547886e-05, + "loss": 1.0831, + "step": 52323 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664214232091333e-05, + "loss": 0.8525, + "step": 52324 + }, + { + "epoch": 0.05, + "learning_rate": 4.966420140139131e-05, + "loss": 0.7367, + "step": 52325 + }, + { + "epoch": 0.05, + "learning_rate": 4.96641885704478e-05, + "loss": 1.1106, + "step": 52326 + }, + { + "epoch": 0.05, + "learning_rate": 4.966417573926082e-05, + "loss": 1.1206, + "step": 52327 + }, + { + "epoch": 0.05, + "learning_rate": 4.966416290783037e-05, + "loss": 0.9192, + "step": 52328 + }, + { + "epoch": 0.05, + "learning_rate": 4.966415007615645e-05, + "loss": 0.5169, + "step": 52329 + }, + { + "epoch": 0.05, + "learning_rate": 4.966413724423904e-05, + "loss": 1.0599, + "step": 52330 + }, + { + "epoch": 0.05, + "learning_rate": 4.966412441207817e-05, + "loss": 1.7012, + "step": 52331 + }, + { + "epoch": 0.05, + "learning_rate": 4.966411157967382e-05, + "loss": 0.6451, + "step": 52332 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664098747026e-05, + "loss": 1.0821, + "step": 52333 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664085914134704e-05, + "loss": 1.2234, + "step": 52334 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664073080999935e-05, + "loss": 1.0423, + "step": 52335 + }, + { + "epoch": 0.05, + "learning_rate": 4.966406024762169e-05, + "loss": 1.6216, + "step": 52336 + }, + { + "epoch": 0.05, + "learning_rate": 4.9664047413999977e-05, + "loss": 1.011, + "step": 52337 + }, + { + "epoch": 0.05, + "learning_rate": 4.966403458013479e-05, + "loss": 1.3298, + "step": 52338 + }, + { + "epoch": 0.05, + "learning_rate": 4.966402174602613e-05, + "loss": 1.2345, + "step": 52339 + }, + { + "epoch": 0.05, + "learning_rate": 4.966400891167399e-05, + "loss": 1.1194, + "step": 52340 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663996077078385e-05, + "loss": 1.6122, + "step": 52341 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663983242239305e-05, + "loss": 2.0209, + "step": 52342 + }, + { + "epoch": 0.05, + "learning_rate": 4.966397040715675e-05, + "loss": 1.6462, + "step": 52343 + }, + { + "epoch": 0.05, + "learning_rate": 4.966395757183073e-05, + "loss": 1.7798, + "step": 52344 + }, + { + "epoch": 0.05, + "learning_rate": 4.966394473626123e-05, + "loss": 1.7642, + "step": 52345 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663931900448257e-05, + "loss": 1.071, + "step": 52346 + }, + { + "epoch": 0.05, + "learning_rate": 4.966391906439182e-05, + "loss": 1.3141, + "step": 52347 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663906228091906e-05, + "loss": 1.3022, + "step": 52348 + }, + { + "epoch": 0.05, + "learning_rate": 4.966389339154852e-05, + "loss": 1.1469, + "step": 52349 + }, + { + "epoch": 0.05, + "learning_rate": 4.966388055476166e-05, + "loss": 1.0741, + "step": 52350 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663867717731335e-05, + "loss": 1.0519, + "step": 52351 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663854880457536e-05, + "loss": 1.4569, + "step": 52352 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663842042940266e-05, + "loss": 1.3563, + "step": 52353 + }, + { + "epoch": 0.05, + "learning_rate": 4.966382920517953e-05, + "loss": 0.8598, + "step": 52354 + }, + { + "epoch": 0.05, + "learning_rate": 4.966381636717532e-05, + "loss": 1.2605, + "step": 52355 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663803528927635e-05, + "loss": 0.9527, + "step": 52356 + }, + { + "epoch": 0.05, + "learning_rate": 4.966379069043649e-05, + "loss": 1.3508, + "step": 52357 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663777851701855e-05, + "loss": 1.0188, + "step": 52358 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663765012723764e-05, + "loss": 1.2323, + "step": 52359 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663752173502196e-05, + "loss": 1.0069, + "step": 52360 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663739334037164e-05, + "loss": 1.2482, + "step": 52361 + }, + { + "epoch": 0.05, + "learning_rate": 4.966372649432865e-05, + "loss": 1.6358, + "step": 52362 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663713654376685e-05, + "loss": 1.3663, + "step": 52363 + }, + { + "epoch": 0.05, + "learning_rate": 4.966370081418124e-05, + "loss": 0.9313, + "step": 52364 + }, + { + "epoch": 0.05, + "learning_rate": 4.966368797374232e-05, + "loss": 0.9837, + "step": 52365 + }, + { + "epoch": 0.05, + "learning_rate": 4.966367513305994e-05, + "loss": 1.197, + "step": 52366 + }, + { + "epoch": 0.05, + "learning_rate": 4.966366229213409e-05, + "loss": 1.2715, + "step": 52367 + }, + { + "epoch": 0.05, + "learning_rate": 4.966364945096476e-05, + "loss": 1.002, + "step": 52368 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663636609551975e-05, + "loss": 1.2011, + "step": 52369 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663623767895716e-05, + "loss": 1.1122, + "step": 52370 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663610925995985e-05, + "loss": 1.203, + "step": 52371 + }, + { + "epoch": 0.05, + "learning_rate": 4.966359808385278e-05, + "loss": 1.3143, + "step": 52372 + }, + { + "epoch": 0.05, + "learning_rate": 4.966358524146612e-05, + "loss": 1.1736, + "step": 52373 + }, + { + "epoch": 0.05, + "learning_rate": 4.966357239883599e-05, + "loss": 1.0603, + "step": 52374 + }, + { + "epoch": 0.05, + "learning_rate": 4.966355955596238e-05, + "loss": 0.9869, + "step": 52375 + }, + { + "epoch": 0.05, + "learning_rate": 4.966354671284531e-05, + "loss": 1.0473, + "step": 52376 + }, + { + "epoch": 0.05, + "learning_rate": 4.966353386948477e-05, + "loss": 1.0757, + "step": 52377 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663521025880753e-05, + "loss": 1.024, + "step": 52378 + }, + { + "epoch": 0.05, + "learning_rate": 4.966350818203328e-05, + "loss": 0.9148, + "step": 52379 + }, + { + "epoch": 0.05, + "learning_rate": 4.966349533794234e-05, + "loss": 1.0522, + "step": 52380 + }, + { + "epoch": 0.05, + "learning_rate": 4.966348249360793e-05, + "loss": 1.1035, + "step": 52381 + }, + { + "epoch": 0.05, + "learning_rate": 4.966346964903005e-05, + "loss": 1.2951, + "step": 52382 + }, + { + "epoch": 0.05, + "learning_rate": 4.96634568042087e-05, + "loss": 1.3964, + "step": 52383 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663443959143884e-05, + "loss": 1.2471, + "step": 52384 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663431113835605e-05, + "loss": 1.0191, + "step": 52385 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663418268283854e-05, + "loss": 0.6731, + "step": 52386 + }, + { + "epoch": 0.05, + "learning_rate": 4.966340542248864e-05, + "loss": 1.147, + "step": 52387 + }, + { + "epoch": 0.05, + "learning_rate": 4.966339257644996e-05, + "loss": 1.1394, + "step": 52388 + }, + { + "epoch": 0.05, + "learning_rate": 4.966337973016782e-05, + "loss": 1.0449, + "step": 52389 + }, + { + "epoch": 0.05, + "learning_rate": 4.96633668836422e-05, + "loss": 1.5423, + "step": 52390 + }, + { + "epoch": 0.05, + "learning_rate": 4.966335403687312e-05, + "loss": 1.1961, + "step": 52391 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663341189860566e-05, + "loss": 1.4979, + "step": 52392 + }, + { + "epoch": 0.05, + "learning_rate": 4.966332834260456e-05, + "loss": 1.0509, + "step": 52393 + }, + { + "epoch": 0.05, + "learning_rate": 4.966331549510508e-05, + "loss": 0.6138, + "step": 52394 + }, + { + "epoch": 0.05, + "learning_rate": 4.966330264736213e-05, + "loss": 1.0537, + "step": 52395 + }, + { + "epoch": 0.05, + "learning_rate": 4.966328979937572e-05, + "loss": 1.09, + "step": 52396 + }, + { + "epoch": 0.05, + "learning_rate": 4.966327695114584e-05, + "loss": 1.3453, + "step": 52397 + }, + { + "epoch": 0.05, + "learning_rate": 4.96632641026725e-05, + "loss": 1.0269, + "step": 52398 + }, + { + "epoch": 0.05, + "learning_rate": 4.96632512539557e-05, + "loss": 0.4969, + "step": 52399 + }, + { + "epoch": 0.05, + "learning_rate": 4.966323840499543e-05, + "loss": 1.4004, + "step": 52400 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663225555791684e-05, + "loss": 1.2236, + "step": 52401 + }, + { + "epoch": 0.05, + "learning_rate": 4.966321270634448e-05, + "loss": 1.2448, + "step": 52402 + }, + { + "epoch": 0.05, + "learning_rate": 4.966319985665382e-05, + "loss": 1.0684, + "step": 52403 + }, + { + "epoch": 0.05, + "learning_rate": 4.966318700671969e-05, + "loss": 1.1323, + "step": 52404 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663174156542095e-05, + "loss": 1.2076, + "step": 52405 + }, + { + "epoch": 0.05, + "learning_rate": 4.966316130612103e-05, + "loss": 1.0883, + "step": 52406 + }, + { + "epoch": 0.05, + "learning_rate": 4.96631484554565e-05, + "loss": 1.2781, + "step": 52407 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663135604548514e-05, + "loss": 0.7116, + "step": 52408 + }, + { + "epoch": 0.05, + "learning_rate": 4.966312275339706e-05, + "loss": 0.3162, + "step": 52409 + }, + { + "epoch": 0.05, + "learning_rate": 4.966310990200214e-05, + "loss": 0.3009, + "step": 52410 + }, + { + "epoch": 0.05, + "learning_rate": 4.966309705036376e-05, + "loss": 0.3397, + "step": 52411 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663084198481915e-05, + "loss": 0.2795, + "step": 52412 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663071346356607e-05, + "loss": 0.2088, + "step": 52413 + }, + { + "epoch": 0.05, + "learning_rate": 4.966305849398783e-05, + "loss": 0.3966, + "step": 52414 + }, + { + "epoch": 0.05, + "learning_rate": 4.9663045641375596e-05, + "loss": 1.1493, + "step": 52415 + }, + { + "epoch": 0.05, + "learning_rate": 4.96630327885199e-05, + "loss": 0.8201, + "step": 52416 + }, + { + "epoch": 0.05, + "learning_rate": 4.966301993542074e-05, + "loss": 1.2206, + "step": 52417 + }, + { + "epoch": 0.05, + "learning_rate": 4.966300708207811e-05, + "loss": 1.2574, + "step": 52418 + }, + { + "epoch": 0.05, + "learning_rate": 4.966299422849202e-05, + "loss": 0.3444, + "step": 52419 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662981374662476e-05, + "loss": 0.3989, + "step": 52420 + }, + { + "epoch": 0.05, + "learning_rate": 4.966296852058946e-05, + "loss": 1.1933, + "step": 52421 + }, + { + "epoch": 0.05, + "learning_rate": 4.966295566627298e-05, + "loss": 1.2475, + "step": 52422 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662942811713046e-05, + "loss": 1.1589, + "step": 52423 + }, + { + "epoch": 0.05, + "learning_rate": 4.966292995690965e-05, + "loss": 1.6768, + "step": 52424 + }, + { + "epoch": 0.05, + "learning_rate": 4.966291710186278e-05, + "loss": 1.1351, + "step": 52425 + }, + { + "epoch": 0.05, + "learning_rate": 4.966290424657246e-05, + "loss": 0.7844, + "step": 52426 + }, + { + "epoch": 0.05, + "learning_rate": 4.966289139103868e-05, + "loss": 1.1106, + "step": 52427 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662878535261426e-05, + "loss": 1.2516, + "step": 52428 + }, + { + "epoch": 0.05, + "learning_rate": 4.966286567924072e-05, + "loss": 1.2268, + "step": 52429 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662852822976545e-05, + "loss": 1.4871, + "step": 52430 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662839966468924e-05, + "loss": 1.2483, + "step": 52431 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662827109717826e-05, + "loss": 0.969, + "step": 52432 + }, + { + "epoch": 0.05, + "learning_rate": 4.966281425272327e-05, + "loss": 0.7329, + "step": 52433 + }, + { + "epoch": 0.05, + "learning_rate": 4.966280139548526e-05, + "loss": 0.6142, + "step": 52434 + }, + { + "epoch": 0.05, + "learning_rate": 4.966278853800378e-05, + "loss": 0.3351, + "step": 52435 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662775680278845e-05, + "loss": 1.0107, + "step": 52436 + }, + { + "epoch": 0.05, + "learning_rate": 4.966276282231045e-05, + "loss": 0.9821, + "step": 52437 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662749964098595e-05, + "loss": 1.2907, + "step": 52438 + }, + { + "epoch": 0.05, + "learning_rate": 4.966273710564327e-05, + "loss": 1.3155, + "step": 52439 + }, + { + "epoch": 0.05, + "learning_rate": 4.96627242469445e-05, + "loss": 1.0425, + "step": 52440 + }, + { + "epoch": 0.05, + "learning_rate": 4.966271138800226e-05, + "loss": 1.0594, + "step": 52441 + }, + { + "epoch": 0.05, + "learning_rate": 4.966269852881656e-05, + "loss": 1.5366, + "step": 52442 + }, + { + "epoch": 0.05, + "learning_rate": 4.966268566938741e-05, + "loss": 0.966, + "step": 52443 + }, + { + "epoch": 0.05, + "learning_rate": 4.966267280971479e-05, + "loss": 0.6685, + "step": 52444 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662659949798714e-05, + "loss": 1.6038, + "step": 52445 + }, + { + "epoch": 0.05, + "learning_rate": 4.966264708963918e-05, + "loss": 1.072, + "step": 52446 + }, + { + "epoch": 0.05, + "learning_rate": 4.966263422923618e-05, + "loss": 1.0134, + "step": 52447 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662621368589726e-05, + "loss": 1.1944, + "step": 52448 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662608507699815e-05, + "loss": 1.2265, + "step": 52449 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662595646566447e-05, + "loss": 1.08, + "step": 52450 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662582785189613e-05, + "loss": 1.1481, + "step": 52451 + }, + { + "epoch": 0.05, + "learning_rate": 4.966256992356932e-05, + "loss": 1.1634, + "step": 52452 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662557061705575e-05, + "loss": 1.1226, + "step": 52453 + }, + { + "epoch": 0.05, + "learning_rate": 4.966254419959837e-05, + "loss": 1.0723, + "step": 52454 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662531337247705e-05, + "loss": 1.1611, + "step": 52455 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662518474653584e-05, + "loss": 1.038, + "step": 52456 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662505611816e-05, + "loss": 1.3232, + "step": 52457 + }, + { + "epoch": 0.05, + "learning_rate": 4.966249274873496e-05, + "loss": 1.34, + "step": 52458 + }, + { + "epoch": 0.05, + "learning_rate": 4.966247988541047e-05, + "loss": 1.3859, + "step": 52459 + }, + { + "epoch": 0.05, + "learning_rate": 4.966246702184251e-05, + "loss": 1.2195, + "step": 52460 + }, + { + "epoch": 0.05, + "learning_rate": 4.96624541580311e-05, + "loss": 1.1647, + "step": 52461 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662441293976235e-05, + "loss": 1.0892, + "step": 52462 + }, + { + "epoch": 0.05, + "learning_rate": 4.966242842967791e-05, + "loss": 1.2389, + "step": 52463 + }, + { + "epoch": 0.05, + "learning_rate": 4.966241556513612e-05, + "loss": 1.1004, + "step": 52464 + }, + { + "epoch": 0.05, + "learning_rate": 4.966240270035088e-05, + "loss": 1.1097, + "step": 52465 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662389835322185e-05, + "loss": 1.0663, + "step": 52466 + }, + { + "epoch": 0.05, + "learning_rate": 4.966237697005003e-05, + "loss": 1.4602, + "step": 52467 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662364104534425e-05, + "loss": 1.2157, + "step": 52468 + }, + { + "epoch": 0.05, + "learning_rate": 4.966235123877536e-05, + "loss": 1.258, + "step": 52469 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662338372772835e-05, + "loss": 1.0472, + "step": 52470 + }, + { + "epoch": 0.05, + "learning_rate": 4.966232550652686e-05, + "loss": 0.965, + "step": 52471 + }, + { + "epoch": 0.05, + "learning_rate": 4.966231264003742e-05, + "loss": 1.72, + "step": 52472 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662299773304534e-05, + "loss": 1.3196, + "step": 52473 + }, + { + "epoch": 0.05, + "learning_rate": 4.966228690632818e-05, + "loss": 0.9164, + "step": 52474 + }, + { + "epoch": 0.05, + "learning_rate": 4.966227403910838e-05, + "loss": 1.2754, + "step": 52475 + }, + { + "epoch": 0.05, + "learning_rate": 4.966226117164512e-05, + "loss": 1.5255, + "step": 52476 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662248303938405e-05, + "loss": 1.2944, + "step": 52477 + }, + { + "epoch": 0.05, + "learning_rate": 4.966223543598824e-05, + "loss": 1.2525, + "step": 52478 + }, + { + "epoch": 0.05, + "learning_rate": 4.966222256779461e-05, + "loss": 0.9495, + "step": 52479 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662209699357535e-05, + "loss": 1.0673, + "step": 52480 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662196830677e-05, + "loss": 1.1379, + "step": 52481 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662183961753017e-05, + "loss": 0.7521, + "step": 52482 + }, + { + "epoch": 0.05, + "learning_rate": 4.966217109258557e-05, + "loss": 1.2565, + "step": 52483 + }, + { + "epoch": 0.05, + "learning_rate": 4.966215822317467e-05, + "loss": 1.0279, + "step": 52484 + }, + { + "epoch": 0.05, + "learning_rate": 4.966214535352032e-05, + "loss": 0.9, + "step": 52485 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662132483622515e-05, + "loss": 1.1155, + "step": 52486 + }, + { + "epoch": 0.05, + "learning_rate": 4.966211961348125e-05, + "loss": 0.7999, + "step": 52487 + }, + { + "epoch": 0.05, + "learning_rate": 4.966210674309654e-05, + "loss": 0.8942, + "step": 52488 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662093872468365e-05, + "loss": 1.1467, + "step": 52489 + }, + { + "epoch": 0.05, + "learning_rate": 4.966208100159675e-05, + "loss": 0.9167, + "step": 52490 + }, + { + "epoch": 0.05, + "learning_rate": 4.9662068130481664e-05, + "loss": 1.0602, + "step": 52491 + }, + { + "epoch": 0.05, + "learning_rate": 4.966205525912314e-05, + "loss": 1.3749, + "step": 52492 + }, + { + "epoch": 0.05, + "learning_rate": 4.966204238752115e-05, + "loss": 1.0849, + "step": 52493 + }, + { + "epoch": 0.05, + "learning_rate": 4.966202951567571e-05, + "loss": 0.9083, + "step": 52494 + }, + { + "epoch": 0.05, + "learning_rate": 4.966201664358683e-05, + "loss": 1.1379, + "step": 52495 + }, + { + "epoch": 0.05, + "learning_rate": 4.966200377125448e-05, + "loss": 1.1526, + "step": 52496 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661990898678687e-05, + "loss": 1.0743, + "step": 52497 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661978025859434e-05, + "loss": 1.2279, + "step": 52498 + }, + { + "epoch": 0.05, + "learning_rate": 4.966196515279674e-05, + "loss": 1.2296, + "step": 52499 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661952279490585e-05, + "loss": 0.9678, + "step": 52500 + }, + { + "epoch": 0.05, + "eval_loss": 1.0748170614242554, + "eval_runtime": 127.1145, + "eval_samples_per_second": 10.896, + "eval_steps_per_second": 5.452, + "step": 52500 + }, + { + "epoch": 0.05, + "learning_rate": 4.966193940594098e-05, + "loss": 0.9922, + "step": 52501 + }, + { + "epoch": 0.05, + "learning_rate": 4.966192653214792e-05, + "loss": 1.2711, + "step": 52502 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661913658111405e-05, + "loss": 1.2265, + "step": 52503 + }, + { + "epoch": 0.05, + "learning_rate": 4.966190078383144e-05, + "loss": 1.0425, + "step": 52504 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661887909308033e-05, + "loss": 0.8415, + "step": 52505 + }, + { + "epoch": 0.05, + "learning_rate": 4.966187503454116e-05, + "loss": 0.9677, + "step": 52506 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661862159530845e-05, + "loss": 1.5887, + "step": 52507 + }, + { + "epoch": 0.05, + "learning_rate": 4.966184928427707e-05, + "loss": 1.2484, + "step": 52508 + }, + { + "epoch": 0.05, + "learning_rate": 4.966183640877985e-05, + "loss": 1.1306, + "step": 52509 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661823533039185e-05, + "loss": 1.4165, + "step": 52510 + }, + { + "epoch": 0.05, + "learning_rate": 4.966181065705506e-05, + "loss": 1.0601, + "step": 52511 + }, + { + "epoch": 0.05, + "learning_rate": 4.966179778082749e-05, + "loss": 1.2317, + "step": 52512 + }, + { + "epoch": 0.05, + "learning_rate": 4.966178490435645e-05, + "loss": 1.1551, + "step": 52513 + }, + { + "epoch": 0.05, + "learning_rate": 4.966177202764198e-05, + "loss": 1.0647, + "step": 52514 + }, + { + "epoch": 0.05, + "learning_rate": 4.966175915068406e-05, + "loss": 1.1327, + "step": 52515 + }, + { + "epoch": 0.05, + "learning_rate": 4.966174627348268e-05, + "loss": 1.1203, + "step": 52516 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661733396037854e-05, + "loss": 1.514, + "step": 52517 + }, + { + "epoch": 0.05, + "learning_rate": 4.966172051834957e-05, + "loss": 1.4853, + "step": 52518 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661707640417845e-05, + "loss": 1.1107, + "step": 52519 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661694762242675e-05, + "loss": 1.1327, + "step": 52520 + }, + { + "epoch": 0.05, + "learning_rate": 4.966168188382404e-05, + "loss": 1.2935, + "step": 52521 + }, + { + "epoch": 0.05, + "learning_rate": 4.966166900516196e-05, + "loss": 1.1811, + "step": 52522 + }, + { + "epoch": 0.05, + "learning_rate": 4.966165612625644e-05, + "loss": 1.6893, + "step": 52523 + }, + { + "epoch": 0.05, + "learning_rate": 4.966164324710746e-05, + "loss": 0.9329, + "step": 52524 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661630367715035e-05, + "loss": 1.5871, + "step": 52525 + }, + { + "epoch": 0.05, + "learning_rate": 4.966161748807916e-05, + "loss": 0.9263, + "step": 52526 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661604608199834e-05, + "loss": 1.1587, + "step": 52527 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661591728077064e-05, + "loss": 1.4116, + "step": 52528 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661578847710843e-05, + "loss": 1.0867, + "step": 52529 + }, + { + "epoch": 0.05, + "learning_rate": 4.966156596710117e-05, + "loss": 1.1932, + "step": 52530 + }, + { + "epoch": 0.05, + "learning_rate": 4.966155308624805e-05, + "loss": 1.204, + "step": 52531 + }, + { + "epoch": 0.05, + "learning_rate": 4.966154020515148e-05, + "loss": 1.0563, + "step": 52532 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661527323811466e-05, + "loss": 1.1859, + "step": 52533 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661514442228004e-05, + "loss": 1.2327, + "step": 52534 + }, + { + "epoch": 0.05, + "learning_rate": 4.966150156040109e-05, + "loss": 1.2511, + "step": 52535 + }, + { + "epoch": 0.05, + "learning_rate": 4.966148867833072e-05, + "loss": 1.2779, + "step": 52536 + }, + { + "epoch": 0.05, + "learning_rate": 4.966147579601692e-05, + "loss": 1.3258, + "step": 52537 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661462913459665e-05, + "loss": 1.392, + "step": 52538 + }, + { + "epoch": 0.05, + "learning_rate": 4.966145003065896e-05, + "loss": 1.0949, + "step": 52539 + }, + { + "epoch": 0.05, + "learning_rate": 4.966143714761481e-05, + "loss": 1.1287, + "step": 52540 + }, + { + "epoch": 0.05, + "learning_rate": 4.966142426432721e-05, + "loss": 1.0563, + "step": 52541 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661411380796166e-05, + "loss": 1.1377, + "step": 52542 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661398497021674e-05, + "loss": 1.005, + "step": 52543 + }, + { + "epoch": 0.05, + "learning_rate": 4.966138561300374e-05, + "loss": 1.1503, + "step": 52544 + }, + { + "epoch": 0.05, + "learning_rate": 4.966137272874235e-05, + "loss": 1.2459, + "step": 52545 + }, + { + "epoch": 0.05, + "learning_rate": 4.966135984423752e-05, + "loss": 1.1333, + "step": 52546 + }, + { + "epoch": 0.05, + "learning_rate": 4.966134695948924e-05, + "loss": 1.2791, + "step": 52547 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661334074497515e-05, + "loss": 1.0147, + "step": 52548 + }, + { + "epoch": 0.05, + "learning_rate": 4.966132118926234e-05, + "loss": 1.5031, + "step": 52549 + }, + { + "epoch": 0.05, + "learning_rate": 4.966130830378372e-05, + "loss": 1.466, + "step": 52550 + }, + { + "epoch": 0.05, + "learning_rate": 4.966129541806166e-05, + "loss": 1.419, + "step": 52551 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661282532096146e-05, + "loss": 1.1791, + "step": 52552 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661269645887186e-05, + "loss": 1.1603, + "step": 52553 + }, + { + "epoch": 0.05, + "learning_rate": 4.966125675943479e-05, + "loss": 1.0217, + "step": 52554 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661243872738936e-05, + "loss": 1.2152, + "step": 52555 + }, + { + "epoch": 0.05, + "learning_rate": 4.966123098579965e-05, + "loss": 1.1455, + "step": 52556 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661218098616916e-05, + "loss": 1.6097, + "step": 52557 + }, + { + "epoch": 0.05, + "learning_rate": 4.966120521119073e-05, + "loss": 0.9347, + "step": 52558 + }, + { + "epoch": 0.05, + "learning_rate": 4.96611923235211e-05, + "loss": 1.4738, + "step": 52559 + }, + { + "epoch": 0.05, + "learning_rate": 4.966117943560803e-05, + "loss": 1.1777, + "step": 52560 + }, + { + "epoch": 0.05, + "learning_rate": 4.966116654745151e-05, + "loss": 0.9212, + "step": 52561 + }, + { + "epoch": 0.05, + "learning_rate": 4.966115365905155e-05, + "loss": 1.3055, + "step": 52562 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661140770408145e-05, + "loss": 1.0905, + "step": 52563 + }, + { + "epoch": 0.05, + "learning_rate": 4.966112788152129e-05, + "loss": 1.1611, + "step": 52564 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661114992390994e-05, + "loss": 1.2452, + "step": 52565 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661102103017255e-05, + "loss": 0.666, + "step": 52566 + }, + { + "epoch": 0.05, + "learning_rate": 4.966108921340007e-05, + "loss": 0.9874, + "step": 52567 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661076323539446e-05, + "loss": 1.446, + "step": 52568 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661063433435375e-05, + "loss": 1.0978, + "step": 52569 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661050543087854e-05, + "loss": 1.0455, + "step": 52570 + }, + { + "epoch": 0.05, + "learning_rate": 4.96610376524969e-05, + "loss": 1.4322, + "step": 52571 + }, + { + "epoch": 0.05, + "learning_rate": 4.96610247616625e-05, + "loss": 0.9937, + "step": 52572 + }, + { + "epoch": 0.05, + "learning_rate": 4.966101187058465e-05, + "loss": 1.1447, + "step": 52573 + }, + { + "epoch": 0.05, + "learning_rate": 4.966099897926336e-05, + "loss": 0.8781, + "step": 52574 + }, + { + "epoch": 0.05, + "learning_rate": 4.966098608769863e-05, + "loss": 0.9307, + "step": 52575 + }, + { + "epoch": 0.05, + "learning_rate": 4.966097319589046e-05, + "loss": 0.9124, + "step": 52576 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660960303838845e-05, + "loss": 0.9879, + "step": 52577 + }, + { + "epoch": 0.05, + "learning_rate": 4.966094741154378e-05, + "loss": 1.1222, + "step": 52578 + }, + { + "epoch": 0.05, + "learning_rate": 4.966093451900527e-05, + "loss": 1.0568, + "step": 52579 + }, + { + "epoch": 0.05, + "learning_rate": 4.966092162622333e-05, + "loss": 1.4597, + "step": 52580 + }, + { + "epoch": 0.05, + "learning_rate": 4.966090873319795e-05, + "loss": 1.7605, + "step": 52581 + }, + { + "epoch": 0.05, + "learning_rate": 4.966089583992912e-05, + "loss": 0.9986, + "step": 52582 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660882946416856e-05, + "loss": 0.7788, + "step": 52583 + }, + { + "epoch": 0.05, + "learning_rate": 4.966087005266113e-05, + "loss": 0.6569, + "step": 52584 + }, + { + "epoch": 0.05, + "learning_rate": 4.966085715866198e-05, + "loss": 0.9821, + "step": 52585 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660844264419384e-05, + "loss": 1.4081, + "step": 52586 + }, + { + "epoch": 0.05, + "learning_rate": 4.966083136993335e-05, + "loss": 1.0973, + "step": 52587 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660818475203874e-05, + "loss": 1.0675, + "step": 52588 + }, + { + "epoch": 0.05, + "learning_rate": 4.966080558023095e-05, + "loss": 0.9894, + "step": 52589 + }, + { + "epoch": 0.05, + "learning_rate": 4.966079268501459e-05, + "loss": 1.329, + "step": 52590 + }, + { + "epoch": 0.05, + "learning_rate": 4.966077978955479e-05, + "loss": 1.5998, + "step": 52591 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660766893851546e-05, + "loss": 1.1335, + "step": 52592 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660753997904865e-05, + "loss": 0.9855, + "step": 52593 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660741101714734e-05, + "loss": 1.2618, + "step": 52594 + }, + { + "epoch": 0.05, + "learning_rate": 4.966072820528118e-05, + "loss": 0.9893, + "step": 52595 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660715308604174e-05, + "loss": 0.9944, + "step": 52596 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660702411683724e-05, + "loss": 1.394, + "step": 52597 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660689514519844e-05, + "loss": 1.6166, + "step": 52598 + }, + { + "epoch": 0.05, + "learning_rate": 4.966067661711252e-05, + "loss": 1.2384, + "step": 52599 + }, + { + "epoch": 0.05, + "learning_rate": 4.966066371946175e-05, + "loss": 1.3295, + "step": 52600 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660650821567546e-05, + "loss": 1.2096, + "step": 52601 + }, + { + "epoch": 0.05, + "learning_rate": 4.96606379234299e-05, + "loss": 1.2938, + "step": 52602 + }, + { + "epoch": 0.05, + "learning_rate": 4.966062502504882e-05, + "loss": 1.0496, + "step": 52603 + }, + { + "epoch": 0.05, + "learning_rate": 4.96606121264243e-05, + "loss": 1.2039, + "step": 52604 + }, + { + "epoch": 0.05, + "learning_rate": 4.966059922755634e-05, + "loss": 1.1637, + "step": 52605 + }, + { + "epoch": 0.05, + "learning_rate": 4.966058632844494e-05, + "loss": 1.0569, + "step": 52606 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660573429090096e-05, + "loss": 1.1787, + "step": 52607 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660560529491816e-05, + "loss": 0.8765, + "step": 52608 + }, + { + "epoch": 0.05, + "learning_rate": 4.96605476296501e-05, + "loss": 1.0382, + "step": 52609 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660534729564944e-05, + "loss": 1.0548, + "step": 52610 + }, + { + "epoch": 0.05, + "learning_rate": 4.966052182923635e-05, + "loss": 1.0889, + "step": 52611 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660508928664316e-05, + "loss": 1.5536, + "step": 52612 + }, + { + "epoch": 0.05, + "learning_rate": 4.966049602784884e-05, + "loss": 1.0333, + "step": 52613 + }, + { + "epoch": 0.05, + "learning_rate": 4.966048312678994e-05, + "loss": 1.0421, + "step": 52614 + }, + { + "epoch": 0.05, + "learning_rate": 4.966047022548759e-05, + "loss": 1.147, + "step": 52615 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660457323941814e-05, + "loss": 1.0163, + "step": 52616 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660444422152585e-05, + "loss": 0.9954, + "step": 52617 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660431520119925e-05, + "loss": 1.123, + "step": 52618 + }, + { + "epoch": 0.05, + "learning_rate": 4.966041861784383e-05, + "loss": 1.3002, + "step": 52619 + }, + { + "epoch": 0.05, + "learning_rate": 4.96604057153243e-05, + "loss": 0.9984, + "step": 52620 + }, + { + "epoch": 0.05, + "learning_rate": 4.966039281256133e-05, + "loss": 1.3207, + "step": 52621 + }, + { + "epoch": 0.05, + "learning_rate": 4.966037990955492e-05, + "loss": 1.0732, + "step": 52622 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660367006305076e-05, + "loss": 1.0862, + "step": 52623 + }, + { + "epoch": 0.05, + "learning_rate": 4.966035410281179e-05, + "loss": 1.1369, + "step": 52624 + }, + { + "epoch": 0.05, + "learning_rate": 4.966034119907507e-05, + "loss": 1.072, + "step": 52625 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660328295094914e-05, + "loss": 1.105, + "step": 52626 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660315390871326e-05, + "loss": 1.3005, + "step": 52627 + }, + { + "epoch": 0.05, + "learning_rate": 4.96603024864043e-05, + "loss": 1.1881, + "step": 52628 + }, + { + "epoch": 0.05, + "learning_rate": 4.966028958169384e-05, + "loss": 1.3806, + "step": 52629 + }, + { + "epoch": 0.05, + "learning_rate": 4.966027667673994e-05, + "loss": 1.1643, + "step": 52630 + }, + { + "epoch": 0.05, + "learning_rate": 4.96602637715426e-05, + "loss": 1.1513, + "step": 52631 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660250866101834e-05, + "loss": 1.2069, + "step": 52632 + }, + { + "epoch": 0.05, + "learning_rate": 4.966023796041762e-05, + "loss": 1.1676, + "step": 52633 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660225054489986e-05, + "loss": 1.5451, + "step": 52634 + }, + { + "epoch": 0.05, + "learning_rate": 4.96602121483189e-05, + "loss": 1.2797, + "step": 52635 + }, + { + "epoch": 0.05, + "learning_rate": 4.966019924190439e-05, + "loss": 1.3519, + "step": 52636 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660186335246436e-05, + "loss": 1.9977, + "step": 52637 + }, + { + "epoch": 0.05, + "learning_rate": 4.966017342834506e-05, + "loss": 2.0619, + "step": 52638 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660160521200236e-05, + "loss": 1.203, + "step": 52639 + }, + { + "epoch": 0.05, + "learning_rate": 4.966014761381199e-05, + "loss": 1.2518, + "step": 52640 + }, + { + "epoch": 0.05, + "learning_rate": 4.96601347061803e-05, + "loss": 1.1858, + "step": 52641 + }, + { + "epoch": 0.05, + "learning_rate": 4.966012179830518e-05, + "loss": 1.3036, + "step": 52642 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660108890186624e-05, + "loss": 0.7339, + "step": 52643 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660095981824636e-05, + "loss": 1.1493, + "step": 52644 + }, + { + "epoch": 0.05, + "learning_rate": 4.966008307321921e-05, + "loss": 1.196, + "step": 52645 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660070164370356e-05, + "loss": 1.2627, + "step": 52646 + }, + { + "epoch": 0.05, + "learning_rate": 4.9660057255278056e-05, + "loss": 1.0744, + "step": 52647 + }, + { + "epoch": 0.05, + "learning_rate": 4.966004434594233e-05, + "loss": 1.2702, + "step": 52648 + }, + { + "epoch": 0.05, + "learning_rate": 4.966003143636317e-05, + "loss": 1.407, + "step": 52649 + }, + { + "epoch": 0.05, + "learning_rate": 4.966001852654059e-05, + "loss": 0.8651, + "step": 52650 + }, + { + "epoch": 0.05, + "learning_rate": 4.966000561647456e-05, + "loss": 0.8537, + "step": 52651 + }, + { + "epoch": 0.05, + "learning_rate": 4.96599927061651e-05, + "loss": 0.8522, + "step": 52652 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659979795612204e-05, + "loss": 1.299, + "step": 52653 + }, + { + "epoch": 0.05, + "learning_rate": 4.965996688481588e-05, + "loss": 1.958, + "step": 52654 + }, + { + "epoch": 0.05, + "learning_rate": 4.965995397377612e-05, + "loss": 1.7712, + "step": 52655 + }, + { + "epoch": 0.05, + "learning_rate": 4.965994106249293e-05, + "loss": 1.4916, + "step": 52656 + }, + { + "epoch": 0.05, + "learning_rate": 4.965992815096631e-05, + "loss": 1.4539, + "step": 52657 + }, + { + "epoch": 0.05, + "learning_rate": 4.965991523919625e-05, + "loss": 0.5085, + "step": 52658 + }, + { + "epoch": 0.05, + "learning_rate": 4.965990232718277e-05, + "loss": 1.252, + "step": 52659 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659889414925844e-05, + "loss": 0.716, + "step": 52660 + }, + { + "epoch": 0.05, + "learning_rate": 4.96598765024255e-05, + "loss": 0.883, + "step": 52661 + }, + { + "epoch": 0.05, + "learning_rate": 4.965986358968171e-05, + "loss": 0.8661, + "step": 52662 + }, + { + "epoch": 0.05, + "learning_rate": 4.96598506766945e-05, + "loss": 0.6722, + "step": 52663 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659837763463846e-05, + "loss": 0.6252, + "step": 52664 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659824849989764e-05, + "loss": 0.5045, + "step": 52665 + }, + { + "epoch": 0.05, + "learning_rate": 4.965981193627226e-05, + "loss": 0.7305, + "step": 52666 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659799022311314e-05, + "loss": 1.3441, + "step": 52667 + }, + { + "epoch": 0.05, + "learning_rate": 4.965978610810694e-05, + "loss": 0.9752, + "step": 52668 + }, + { + "epoch": 0.05, + "learning_rate": 4.965977319365914e-05, + "loss": 1.1049, + "step": 52669 + }, + { + "epoch": 0.05, + "learning_rate": 4.96597602789679e-05, + "loss": 0.8638, + "step": 52670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659747364033236e-05, + "loss": 0.9816, + "step": 52671 + }, + { + "epoch": 0.05, + "learning_rate": 4.965973444885515e-05, + "loss": 1.0423, + "step": 52672 + }, + { + "epoch": 0.05, + "learning_rate": 4.965972153343362e-05, + "loss": 1.2044, + "step": 52673 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659708617768664e-05, + "loss": 1.2894, + "step": 52674 + }, + { + "epoch": 0.05, + "learning_rate": 4.965969570186028e-05, + "loss": 1.1773, + "step": 52675 + }, + { + "epoch": 0.05, + "learning_rate": 4.965968278570846e-05, + "loss": 1.0832, + "step": 52676 + }, + { + "epoch": 0.05, + "learning_rate": 4.965966986931321e-05, + "loss": 1.2172, + "step": 52677 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659656952674525e-05, + "loss": 0.9128, + "step": 52678 + }, + { + "epoch": 0.05, + "learning_rate": 4.965964403579242e-05, + "loss": 0.8411, + "step": 52679 + }, + { + "epoch": 0.05, + "learning_rate": 4.965963111866689e-05, + "loss": 0.9171, + "step": 52680 + }, + { + "epoch": 0.05, + "learning_rate": 4.965961820129792e-05, + "loss": 1.1875, + "step": 52681 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659605283685525e-05, + "loss": 1.0353, + "step": 52682 + }, + { + "epoch": 0.05, + "learning_rate": 4.96595923658297e-05, + "loss": 0.9921, + "step": 52683 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659579447730446e-05, + "loss": 1.1975, + "step": 52684 + }, + { + "epoch": 0.05, + "learning_rate": 4.965956652938777e-05, + "loss": 1.4613, + "step": 52685 + }, + { + "epoch": 0.05, + "learning_rate": 4.965955361080166e-05, + "loss": 1.5579, + "step": 52686 + }, + { + "epoch": 0.05, + "learning_rate": 4.965954069197212e-05, + "loss": 0.905, + "step": 52687 + }, + { + "epoch": 0.05, + "learning_rate": 4.965952777289915e-05, + "loss": 1.1752, + "step": 52688 + }, + { + "epoch": 0.05, + "learning_rate": 4.965951485358275e-05, + "loss": 1.0645, + "step": 52689 + }, + { + "epoch": 0.05, + "learning_rate": 4.965950193402292e-05, + "loss": 1.0529, + "step": 52690 + }, + { + "epoch": 0.05, + "learning_rate": 4.965948901421967e-05, + "loss": 1.316, + "step": 52691 + }, + { + "epoch": 0.05, + "learning_rate": 4.965947609417298e-05, + "loss": 1.0407, + "step": 52692 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659463173882874e-05, + "loss": 0.9792, + "step": 52693 + }, + { + "epoch": 0.05, + "learning_rate": 4.965945025334934e-05, + "loss": 1.0193, + "step": 52694 + }, + { + "epoch": 0.05, + "learning_rate": 4.965943733257237e-05, + "loss": 1.3149, + "step": 52695 + }, + { + "epoch": 0.05, + "learning_rate": 4.965942441155198e-05, + "loss": 1.1756, + "step": 52696 + }, + { + "epoch": 0.05, + "learning_rate": 4.965941149028816e-05, + "loss": 0.9197, + "step": 52697 + }, + { + "epoch": 0.05, + "learning_rate": 4.965939856878091e-05, + "loss": 1.2866, + "step": 52698 + }, + { + "epoch": 0.05, + "learning_rate": 4.965938564703024e-05, + "loss": 1.4422, + "step": 52699 + }, + { + "epoch": 0.05, + "learning_rate": 4.965937272503614e-05, + "loss": 1.1593, + "step": 52700 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659359802798605e-05, + "loss": 1.2471, + "step": 52701 + }, + { + "epoch": 0.05, + "learning_rate": 4.965934688031765e-05, + "loss": 0.8797, + "step": 52702 + }, + { + "epoch": 0.05, + "learning_rate": 4.965933395759327e-05, + "loss": 1.2007, + "step": 52703 + }, + { + "epoch": 0.05, + "learning_rate": 4.965932103462546e-05, + "loss": 1.1505, + "step": 52704 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659308111414224e-05, + "loss": 0.6863, + "step": 52705 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659295187959566e-05, + "loss": 0.7197, + "step": 52706 + }, + { + "epoch": 0.05, + "learning_rate": 4.965928226426148e-05, + "loss": 1.0609, + "step": 52707 + }, + { + "epoch": 0.05, + "learning_rate": 4.965926934031996e-05, + "loss": 1.2162, + "step": 52708 + }, + { + "epoch": 0.05, + "learning_rate": 4.965925641613502e-05, + "loss": 1.1317, + "step": 52709 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659243491706656e-05, + "loss": 1.1046, + "step": 52710 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659230567034866e-05, + "loss": 1.0278, + "step": 52711 + }, + { + "epoch": 0.05, + "learning_rate": 4.965921764211965e-05, + "loss": 1.0267, + "step": 52712 + }, + { + "epoch": 0.05, + "learning_rate": 4.965920471696101e-05, + "loss": 0.9934, + "step": 52713 + }, + { + "epoch": 0.05, + "learning_rate": 4.965919179155893e-05, + "loss": 0.9159, + "step": 52714 + }, + { + "epoch": 0.05, + "learning_rate": 4.965917886591345e-05, + "loss": 0.9303, + "step": 52715 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659165940024525e-05, + "loss": 0.9199, + "step": 52716 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659153013892186e-05, + "loss": 0.8915, + "step": 52717 + }, + { + "epoch": 0.05, + "learning_rate": 4.965914008751642e-05, + "loss": 0.8685, + "step": 52718 + }, + { + "epoch": 0.05, + "learning_rate": 4.965912716089722e-05, + "loss": 0.8628, + "step": 52719 + }, + { + "epoch": 0.05, + "learning_rate": 4.965911423403461e-05, + "loss": 0.8268, + "step": 52720 + }, + { + "epoch": 0.05, + "learning_rate": 4.965910130692857e-05, + "loss": 0.8483, + "step": 52721 + }, + { + "epoch": 0.05, + "learning_rate": 4.96590883795791e-05, + "loss": 0.8217, + "step": 52722 + }, + { + "epoch": 0.05, + "learning_rate": 4.965907545198621e-05, + "loss": 0.8151, + "step": 52723 + }, + { + "epoch": 0.05, + "learning_rate": 4.96590625241499e-05, + "loss": 0.809, + "step": 52724 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659049596070164e-05, + "loss": 0.7924, + "step": 52725 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659036667747e-05, + "loss": 0.8102, + "step": 52726 + }, + { + "epoch": 0.05, + "learning_rate": 4.9659023739180415e-05, + "loss": 0.7898, + "step": 52727 + }, + { + "epoch": 0.05, + "learning_rate": 4.965901081037041e-05, + "loss": 0.8023, + "step": 52728 + }, + { + "epoch": 0.05, + "learning_rate": 4.965899788131698e-05, + "loss": 0.8087, + "step": 52729 + }, + { + "epoch": 0.05, + "learning_rate": 4.965898495202013e-05, + "loss": 0.8019, + "step": 52730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658972022479845e-05, + "loss": 0.8129, + "step": 52731 + }, + { + "epoch": 0.05, + "learning_rate": 4.965895909269614e-05, + "loss": 0.7811, + "step": 52732 + }, + { + "epoch": 0.05, + "learning_rate": 4.965894616266902e-05, + "loss": 0.767, + "step": 52733 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658933232398475e-05, + "loss": 0.7878, + "step": 52734 + }, + { + "epoch": 0.05, + "learning_rate": 4.96589203018845e-05, + "loss": 0.7556, + "step": 52735 + }, + { + "epoch": 0.05, + "learning_rate": 4.965890737112712e-05, + "loss": 0.7739, + "step": 52736 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658894440126305e-05, + "loss": 0.7014, + "step": 52737 + }, + { + "epoch": 0.05, + "learning_rate": 4.965888150888207e-05, + "loss": 0.7125, + "step": 52738 + }, + { + "epoch": 0.05, + "learning_rate": 4.965886857739441e-05, + "loss": 0.7595, + "step": 52739 + }, + { + "epoch": 0.05, + "learning_rate": 4.965885564566333e-05, + "loss": 0.7377, + "step": 52740 + }, + { + "epoch": 0.05, + "learning_rate": 4.965884271368883e-05, + "loss": 0.8228, + "step": 52741 + }, + { + "epoch": 0.05, + "learning_rate": 4.96588297814709e-05, + "loss": 0.7856, + "step": 52742 + }, + { + "epoch": 0.05, + "learning_rate": 4.965881684900956e-05, + "loss": 0.7648, + "step": 52743 + }, + { + "epoch": 0.05, + "learning_rate": 4.965880391630479e-05, + "loss": 0.7631, + "step": 52744 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658790983356604e-05, + "loss": 0.761, + "step": 52745 + }, + { + "epoch": 0.05, + "learning_rate": 4.965877805016499e-05, + "loss": 0.7564, + "step": 52746 + }, + { + "epoch": 0.05, + "learning_rate": 4.965876511672997e-05, + "loss": 0.7718, + "step": 52747 + }, + { + "epoch": 0.05, + "learning_rate": 4.965875218305151e-05, + "loss": 0.7897, + "step": 52748 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658739249129635e-05, + "loss": 0.7407, + "step": 52749 + }, + { + "epoch": 0.05, + "learning_rate": 4.965872631496434e-05, + "loss": 0.7603, + "step": 52750 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658713380555635e-05, + "loss": 0.7801, + "step": 52751 + }, + { + "epoch": 0.05, + "learning_rate": 4.96587004459035e-05, + "loss": 0.7447, + "step": 52752 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658687511007954e-05, + "loss": 0.7769, + "step": 52753 + }, + { + "epoch": 0.05, + "learning_rate": 4.965867457586897e-05, + "loss": 0.743, + "step": 52754 + }, + { + "epoch": 0.05, + "learning_rate": 4.965866164048658e-05, + "loss": 0.7226, + "step": 52755 + }, + { + "epoch": 0.05, + "learning_rate": 4.965864870486077e-05, + "loss": 0.7443, + "step": 52756 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658635768991534e-05, + "loss": 0.7516, + "step": 52757 + }, + { + "epoch": 0.05, + "learning_rate": 4.965862283287888e-05, + "loss": 0.7556, + "step": 52758 + }, + { + "epoch": 0.05, + "learning_rate": 4.965860989652281e-05, + "loss": 0.7302, + "step": 52759 + }, + { + "epoch": 0.05, + "learning_rate": 4.965859695992331e-05, + "loss": 0.6991, + "step": 52760 + }, + { + "epoch": 0.05, + "learning_rate": 4.96585840230804e-05, + "loss": 0.7649, + "step": 52761 + }, + { + "epoch": 0.05, + "learning_rate": 4.965857108599407e-05, + "loss": 0.757, + "step": 52762 + }, + { + "epoch": 0.05, + "learning_rate": 4.965855814866432e-05, + "loss": 0.7144, + "step": 52763 + }, + { + "epoch": 0.05, + "learning_rate": 4.965854521109115e-05, + "loss": 0.7103, + "step": 52764 + }, + { + "epoch": 0.05, + "learning_rate": 4.965853227327456e-05, + "loss": 0.7295, + "step": 52765 + }, + { + "epoch": 0.05, + "learning_rate": 4.965851933521456e-05, + "loss": 0.7439, + "step": 52766 + }, + { + "epoch": 0.05, + "learning_rate": 4.965850639691113e-05, + "loss": 0.7395, + "step": 52767 + }, + { + "epoch": 0.05, + "learning_rate": 4.965849345836429e-05, + "loss": 0.6893, + "step": 52768 + }, + { + "epoch": 0.05, + "learning_rate": 4.965848051957403e-05, + "loss": 0.8337, + "step": 52769 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658467580540345e-05, + "loss": 0.9888, + "step": 52770 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658454641263254e-05, + "loss": 1.3617, + "step": 52771 + }, + { + "epoch": 0.05, + "learning_rate": 4.965844170174274e-05, + "loss": 1.2492, + "step": 52772 + }, + { + "epoch": 0.05, + "learning_rate": 4.965842876197881e-05, + "loss": 0.6104, + "step": 52773 + }, + { + "epoch": 0.05, + "learning_rate": 4.965841582197145e-05, + "loss": 1.255, + "step": 52774 + }, + { + "epoch": 0.05, + "learning_rate": 4.965840288172069e-05, + "loss": 1.2801, + "step": 52775 + }, + { + "epoch": 0.05, + "learning_rate": 4.96583899412265e-05, + "loss": 0.7179, + "step": 52776 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658377000488896e-05, + "loss": 1.1391, + "step": 52777 + }, + { + "epoch": 0.05, + "learning_rate": 4.965836405950788e-05, + "loss": 0.8283, + "step": 52778 + }, + { + "epoch": 0.05, + "learning_rate": 4.965835111828344e-05, + "loss": 1.4846, + "step": 52779 + }, + { + "epoch": 0.05, + "learning_rate": 4.965833817681559e-05, + "loss": 1.0396, + "step": 52780 + }, + { + "epoch": 0.05, + "learning_rate": 4.965832523510432e-05, + "loss": 1.1025, + "step": 52781 + }, + { + "epoch": 0.05, + "learning_rate": 4.965831229314963e-05, + "loss": 1.3216, + "step": 52782 + }, + { + "epoch": 0.05, + "learning_rate": 4.965829935095152e-05, + "loss": 1.7823, + "step": 52783 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658286408510005e-05, + "loss": 1.147, + "step": 52784 + }, + { + "epoch": 0.05, + "learning_rate": 4.965827346582507e-05, + "loss": 1.1268, + "step": 52785 + }, + { + "epoch": 0.05, + "learning_rate": 4.965826052289672e-05, + "loss": 1.1589, + "step": 52786 + }, + { + "epoch": 0.05, + "learning_rate": 4.965824757972495e-05, + "loss": 1.1255, + "step": 52787 + }, + { + "epoch": 0.05, + "learning_rate": 4.965823463630976e-05, + "loss": 1.4913, + "step": 52788 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658221692651166e-05, + "loss": 1.1126, + "step": 52789 + }, + { + "epoch": 0.05, + "learning_rate": 4.965820874874915e-05, + "loss": 0.9338, + "step": 52790 + }, + { + "epoch": 0.05, + "learning_rate": 4.965819580460372e-05, + "loss": 0.9597, + "step": 52791 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658182860214876e-05, + "loss": 1.5599, + "step": 52792 + }, + { + "epoch": 0.05, + "learning_rate": 4.965816991558261e-05, + "loss": 1.1436, + "step": 52793 + }, + { + "epoch": 0.05, + "learning_rate": 4.965815697070694e-05, + "loss": 1.1504, + "step": 52794 + }, + { + "epoch": 0.05, + "learning_rate": 4.965814402558785e-05, + "loss": 1.1681, + "step": 52795 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658131080225346e-05, + "loss": 1.3185, + "step": 52796 + }, + { + "epoch": 0.05, + "learning_rate": 4.965811813461942e-05, + "loss": 2.0054, + "step": 52797 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658105188770086e-05, + "loss": 1.2655, + "step": 52798 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658092242677334e-05, + "loss": 1.0946, + "step": 52799 + }, + { + "epoch": 0.05, + "learning_rate": 4.965807929634117e-05, + "loss": 1.1241, + "step": 52800 + }, + { + "epoch": 0.05, + "learning_rate": 4.965806634976159e-05, + "loss": 0.7005, + "step": 52801 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658053402938604e-05, + "loss": 0.4864, + "step": 52802 + }, + { + "epoch": 0.05, + "learning_rate": 4.96580404558722e-05, + "loss": 0.3227, + "step": 52803 + }, + { + "epoch": 0.05, + "learning_rate": 4.9658027508562375e-05, + "loss": 0.3733, + "step": 52804 + }, + { + "epoch": 0.05, + "learning_rate": 4.965801456100915e-05, + "loss": 0.695, + "step": 52805 + }, + { + "epoch": 0.05, + "learning_rate": 4.96580016132125e-05, + "loss": 0.6983, + "step": 52806 + }, + { + "epoch": 0.05, + "learning_rate": 4.965798866517244e-05, + "loss": 0.6251, + "step": 52807 + }, + { + "epoch": 0.05, + "learning_rate": 4.965797571688896e-05, + "loss": 0.42, + "step": 52808 + }, + { + "epoch": 0.05, + "learning_rate": 4.965796276836208e-05, + "loss": 0.2931, + "step": 52809 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657949819591775e-05, + "loss": 0.6407, + "step": 52810 + }, + { + "epoch": 0.05, + "learning_rate": 4.965793687057807e-05, + "loss": 0.7012, + "step": 52811 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657923921320936e-05, + "loss": 0.5186, + "step": 52812 + }, + { + "epoch": 0.05, + "learning_rate": 4.96579109718204e-05, + "loss": 0.4672, + "step": 52813 + }, + { + "epoch": 0.05, + "learning_rate": 4.965789802207645e-05, + "loss": 0.3636, + "step": 52814 + }, + { + "epoch": 0.05, + "learning_rate": 4.965788507208908e-05, + "loss": 3.4605, + "step": 52815 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657872121858315e-05, + "loss": 4.5698, + "step": 52816 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657859171384126e-05, + "loss": 0.4874, + "step": 52817 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657846220666526e-05, + "loss": 0.7034, + "step": 52818 + }, + { + "epoch": 0.05, + "learning_rate": 4.965783326970551e-05, + "loss": 0.5194, + "step": 52819 + }, + { + "epoch": 0.05, + "learning_rate": 4.965782031850109e-05, + "loss": 0.3614, + "step": 52820 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657807367053254e-05, + "loss": 0.3457, + "step": 52821 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657794415362014e-05, + "loss": 0.3036, + "step": 52822 + }, + { + "epoch": 0.05, + "learning_rate": 4.965778146342736e-05, + "loss": 0.1829, + "step": 52823 + }, + { + "epoch": 0.05, + "learning_rate": 4.965776851124928e-05, + "loss": 0.1619, + "step": 52824 + }, + { + "epoch": 0.05, + "learning_rate": 4.96577555588278e-05, + "loss": 0.4828, + "step": 52825 + }, + { + "epoch": 0.05, + "learning_rate": 4.965774260616292e-05, + "loss": 0.5881, + "step": 52826 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657729653254614e-05, + "loss": 0.4118, + "step": 52827 + }, + { + "epoch": 0.05, + "learning_rate": 4.965771670010289e-05, + "loss": 0.414, + "step": 52828 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657703746707775e-05, + "loss": 0.6691, + "step": 52829 + }, + { + "epoch": 0.05, + "learning_rate": 4.965769079306925e-05, + "loss": 0.3277, + "step": 52830 + }, + { + "epoch": 0.05, + "learning_rate": 4.96576778391873e-05, + "loss": 0.4397, + "step": 52831 + }, + { + "epoch": 0.05, + "learning_rate": 4.965766488506195e-05, + "loss": 0.4395, + "step": 52832 + }, + { + "epoch": 0.05, + "learning_rate": 4.965765193069318e-05, + "loss": 0.8959, + "step": 52833 + }, + { + "epoch": 0.05, + "learning_rate": 4.965763897608101e-05, + "loss": 1.3052, + "step": 52834 + }, + { + "epoch": 0.05, + "learning_rate": 4.965762602122542e-05, + "loss": 1.263, + "step": 52835 + }, + { + "epoch": 0.05, + "learning_rate": 4.965761306612643e-05, + "loss": 1.2045, + "step": 52836 + }, + { + "epoch": 0.05, + "learning_rate": 4.965760011078403e-05, + "loss": 1.2566, + "step": 52837 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657587155198216e-05, + "loss": 1.3237, + "step": 52838 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657574199369e-05, + "loss": 1.4148, + "step": 52839 + }, + { + "epoch": 0.05, + "learning_rate": 4.965756124329636e-05, + "loss": 1.0998, + "step": 52840 + }, + { + "epoch": 0.05, + "learning_rate": 4.965754828698033e-05, + "loss": 1.0577, + "step": 52841 + }, + { + "epoch": 0.05, + "learning_rate": 4.965753533042087e-05, + "loss": 0.9079, + "step": 52842 + }, + { + "epoch": 0.05, + "learning_rate": 4.965752237361802e-05, + "loss": 0.8772, + "step": 52843 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657509416571754e-05, + "loss": 1.014, + "step": 52844 + }, + { + "epoch": 0.05, + "learning_rate": 4.965749645928208e-05, + "loss": 0.9325, + "step": 52845 + }, + { + "epoch": 0.05, + "learning_rate": 4.965748350174899e-05, + "loss": 1.0654, + "step": 52846 + }, + { + "epoch": 0.05, + "learning_rate": 4.96574705439725e-05, + "loss": 0.915, + "step": 52847 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657457585952605e-05, + "loss": 1.136, + "step": 52848 + }, + { + "epoch": 0.05, + "learning_rate": 4.965744462768929e-05, + "loss": 1.4024, + "step": 52849 + }, + { + "epoch": 0.05, + "learning_rate": 4.965743166918257e-05, + "loss": 1.684, + "step": 52850 + }, + { + "epoch": 0.05, + "learning_rate": 4.965741871043246e-05, + "loss": 1.2459, + "step": 52851 + }, + { + "epoch": 0.05, + "learning_rate": 4.965740575143893e-05, + "loss": 1.0749, + "step": 52852 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657392792201985e-05, + "loss": 0.7163, + "step": 52853 + }, + { + "epoch": 0.05, + "learning_rate": 4.965737983272164e-05, + "loss": 1.5596, + "step": 52854 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657366872997885e-05, + "loss": 1.5941, + "step": 52855 + }, + { + "epoch": 0.05, + "learning_rate": 4.965735391303073e-05, + "loss": 1.261, + "step": 52856 + }, + { + "epoch": 0.05, + "learning_rate": 4.965734095282016e-05, + "loss": 1.5457, + "step": 52857 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657327992366186e-05, + "loss": 1.2975, + "step": 52858 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657315031668804e-05, + "loss": 0.3285, + "step": 52859 + }, + { + "epoch": 0.05, + "learning_rate": 4.965730207072802e-05, + "loss": 0.7747, + "step": 52860 + }, + { + "epoch": 0.05, + "learning_rate": 4.965728910954382e-05, + "loss": 0.967, + "step": 52861 + }, + { + "epoch": 0.05, + "learning_rate": 4.965727614811622e-05, + "loss": 0.8019, + "step": 52862 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657263186445215e-05, + "loss": 1.2223, + "step": 52863 + }, + { + "epoch": 0.05, + "learning_rate": 4.96572502245308e-05, + "loss": 1.1725, + "step": 52864 + }, + { + "epoch": 0.05, + "learning_rate": 4.965723726237298e-05, + "loss": 1.2557, + "step": 52865 + }, + { + "epoch": 0.05, + "learning_rate": 4.965722429997176e-05, + "loss": 1.2069, + "step": 52866 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657211337327124e-05, + "loss": 1.702, + "step": 52867 + }, + { + "epoch": 0.05, + "learning_rate": 4.965719837443909e-05, + "loss": 1.3388, + "step": 52868 + }, + { + "epoch": 0.05, + "learning_rate": 4.965718541130765e-05, + "loss": 0.8551, + "step": 52869 + }, + { + "epoch": 0.05, + "learning_rate": 4.96571724479328e-05, + "loss": 0.9545, + "step": 52870 + }, + { + "epoch": 0.05, + "learning_rate": 4.965715948431455e-05, + "loss": 0.5908, + "step": 52871 + }, + { + "epoch": 0.05, + "learning_rate": 4.965714652045289e-05, + "loss": 0.9513, + "step": 52872 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657133556347834e-05, + "loss": 1.2006, + "step": 52873 + }, + { + "epoch": 0.05, + "learning_rate": 4.965712059199936e-05, + "loss": 1.3999, + "step": 52874 + }, + { + "epoch": 0.05, + "learning_rate": 4.96571076274075e-05, + "loss": 1.2954, + "step": 52875 + }, + { + "epoch": 0.05, + "learning_rate": 4.965709466257221e-05, + "loss": 1.2768, + "step": 52876 + }, + { + "epoch": 0.05, + "learning_rate": 4.965708169749353e-05, + "loss": 1.3392, + "step": 52877 + }, + { + "epoch": 0.05, + "learning_rate": 4.965706873217145e-05, + "loss": 1.1768, + "step": 52878 + }, + { + "epoch": 0.05, + "learning_rate": 4.965705576660596e-05, + "loss": 1.546, + "step": 52879 + }, + { + "epoch": 0.05, + "learning_rate": 4.965704280079707e-05, + "loss": 2.134, + "step": 52880 + }, + { + "epoch": 0.05, + "learning_rate": 4.965702983474477e-05, + "loss": 1.3711, + "step": 52881 + }, + { + "epoch": 0.05, + "learning_rate": 4.965701686844907e-05, + "loss": 1.216, + "step": 52882 + }, + { + "epoch": 0.05, + "learning_rate": 4.9657003901909966e-05, + "loss": 1.3095, + "step": 52883 + }, + { + "epoch": 0.05, + "learning_rate": 4.965699093512746e-05, + "loss": 1.0205, + "step": 52884 + }, + { + "epoch": 0.05, + "learning_rate": 4.965697796810155e-05, + "loss": 1.1242, + "step": 52885 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656965000832234e-05, + "loss": 1.1124, + "step": 52886 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656952033319516e-05, + "loss": 1.4525, + "step": 52887 + }, + { + "epoch": 0.05, + "learning_rate": 4.965693906556339e-05, + "loss": 0.7894, + "step": 52888 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656926097563864e-05, + "loss": 1.0865, + "step": 52889 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656913129320936e-05, + "loss": 1.0817, + "step": 52890 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656900160834605e-05, + "loss": 1.3402, + "step": 52891 + }, + { + "epoch": 0.05, + "learning_rate": 4.965688719210488e-05, + "loss": 1.134, + "step": 52892 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656874223131746e-05, + "loss": 1.2715, + "step": 52893 + }, + { + "epoch": 0.05, + "learning_rate": 4.965686125391521e-05, + "loss": 1.2782, + "step": 52894 + }, + { + "epoch": 0.05, + "learning_rate": 4.965684828445527e-05, + "loss": 1.3081, + "step": 52895 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656835314751925e-05, + "loss": 1.1604, + "step": 52896 + }, + { + "epoch": 0.05, + "learning_rate": 4.965682234480519e-05, + "loss": 1.5323, + "step": 52897 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656809374615046e-05, + "loss": 1.3819, + "step": 52898 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656796404181494e-05, + "loss": 1.3121, + "step": 52899 + }, + { + "epoch": 0.05, + "learning_rate": 4.965678343350455e-05, + "loss": 0.9717, + "step": 52900 + }, + { + "epoch": 0.05, + "learning_rate": 4.96567704625842e-05, + "loss": 1.0636, + "step": 52901 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656757491420446e-05, + "loss": 1.0193, + "step": 52902 + }, + { + "epoch": 0.05, + "learning_rate": 4.96567445200133e-05, + "loss": 1.178, + "step": 52903 + }, + { + "epoch": 0.05, + "learning_rate": 4.965673154836275e-05, + "loss": 1.2832, + "step": 52904 + }, + { + "epoch": 0.05, + "learning_rate": 4.965671857646879e-05, + "loss": 1.2163, + "step": 52905 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656705604331434e-05, + "loss": 0.9797, + "step": 52906 + }, + { + "epoch": 0.05, + "learning_rate": 4.965669263195068e-05, + "loss": 1.1569, + "step": 52907 + }, + { + "epoch": 0.05, + "learning_rate": 4.965667965932653e-05, + "loss": 1.1515, + "step": 52908 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656666686458976e-05, + "loss": 1.0096, + "step": 52909 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656653713348014e-05, + "loss": 1.1373, + "step": 52910 + }, + { + "epoch": 0.05, + "learning_rate": 4.965664073999366e-05, + "loss": 1.1082, + "step": 52911 + }, + { + "epoch": 0.05, + "learning_rate": 4.96566277663959e-05, + "loss": 0.8872, + "step": 52912 + }, + { + "epoch": 0.05, + "learning_rate": 4.965661479255474e-05, + "loss": 1.3254, + "step": 52913 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656601818470194e-05, + "loss": 1.1995, + "step": 52914 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656588844142236e-05, + "loss": 0.7632, + "step": 52915 + }, + { + "epoch": 0.05, + "learning_rate": 4.965657586957089e-05, + "loss": 0.8684, + "step": 52916 + }, + { + "epoch": 0.05, + "learning_rate": 4.965656289475613e-05, + "loss": 0.9951, + "step": 52917 + }, + { + "epoch": 0.05, + "learning_rate": 4.965654991969798e-05, + "loss": 1.2688, + "step": 52918 + }, + { + "epoch": 0.05, + "learning_rate": 4.965653694439642e-05, + "loss": 1.7789, + "step": 52919 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656523968851475e-05, + "loss": 1.1898, + "step": 52920 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656510993063123e-05, + "loss": 1.2944, + "step": 52921 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656498017031375e-05, + "loss": 1.493, + "step": 52922 + }, + { + "epoch": 0.05, + "learning_rate": 4.965648504075623e-05, + "loss": 0.9522, + "step": 52923 + }, + { + "epoch": 0.05, + "learning_rate": 4.965647206423768e-05, + "loss": 0.8703, + "step": 52924 + }, + { + "epoch": 0.05, + "learning_rate": 4.965645908747574e-05, + "loss": 1.3021, + "step": 52925 + }, + { + "epoch": 0.05, + "learning_rate": 4.96564461104704e-05, + "loss": 0.9336, + "step": 52926 + }, + { + "epoch": 0.05, + "learning_rate": 4.965643313322166e-05, + "loss": 0.627, + "step": 52927 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656420155729524e-05, + "loss": 0.6956, + "step": 52928 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656407177993986e-05, + "loss": 0.9962, + "step": 52929 + }, + { + "epoch": 0.05, + "learning_rate": 4.965639420001506e-05, + "loss": 1.063, + "step": 52930 + }, + { + "epoch": 0.05, + "learning_rate": 4.965638122179273e-05, + "loss": 1.1923, + "step": 52931 + }, + { + "epoch": 0.05, + "learning_rate": 4.965636824332699e-05, + "loss": 1.0973, + "step": 52932 + }, + { + "epoch": 0.05, + "learning_rate": 4.965635526461787e-05, + "loss": 1.077, + "step": 52933 + }, + { + "epoch": 0.05, + "learning_rate": 4.965634228566535e-05, + "loss": 1.2719, + "step": 52934 + }, + { + "epoch": 0.05, + "learning_rate": 4.965632930646943e-05, + "loss": 0.9668, + "step": 52935 + }, + { + "epoch": 0.05, + "learning_rate": 4.965631632703011e-05, + "loss": 1.0571, + "step": 52936 + }, + { + "epoch": 0.05, + "learning_rate": 4.96563033473474e-05, + "loss": 0.8367, + "step": 52937 + }, + { + "epoch": 0.05, + "learning_rate": 4.965629036742129e-05, + "loss": 1.4848, + "step": 52938 + }, + { + "epoch": 0.05, + "learning_rate": 4.965627738725178e-05, + "loss": 1.3378, + "step": 52939 + }, + { + "epoch": 0.05, + "learning_rate": 4.965626440683888e-05, + "loss": 1.1154, + "step": 52940 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656251426182574e-05, + "loss": 1.2499, + "step": 52941 + }, + { + "epoch": 0.05, + "learning_rate": 4.965623844528288e-05, + "loss": 1.3406, + "step": 52942 + }, + { + "epoch": 0.05, + "learning_rate": 4.96562254641398e-05, + "loss": 0.6351, + "step": 52943 + }, + { + "epoch": 0.05, + "learning_rate": 4.96562124827533e-05, + "loss": 0.8832, + "step": 52944 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656199501123426e-05, + "loss": 1.1156, + "step": 52945 + }, + { + "epoch": 0.05, + "learning_rate": 4.965618651925014e-05, + "loss": 1.347, + "step": 52946 + }, + { + "epoch": 0.05, + "learning_rate": 4.965617353713347e-05, + "loss": 1.1126, + "step": 52947 + }, + { + "epoch": 0.05, + "learning_rate": 4.96561605547734e-05, + "loss": 1.0564, + "step": 52948 + }, + { + "epoch": 0.05, + "learning_rate": 4.965614757216994e-05, + "loss": 1.1093, + "step": 52949 + }, + { + "epoch": 0.05, + "learning_rate": 4.965613458932308e-05, + "loss": 1.2028, + "step": 52950 + }, + { + "epoch": 0.05, + "learning_rate": 4.965612160623282e-05, + "loss": 1.0488, + "step": 52951 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656108622899175e-05, + "loss": 1.4638, + "step": 52952 + }, + { + "epoch": 0.05, + "learning_rate": 4.965609563932213e-05, + "loss": 1.3498, + "step": 52953 + }, + { + "epoch": 0.05, + "learning_rate": 4.965608265550169e-05, + "loss": 1.4222, + "step": 52954 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656069671437856e-05, + "loss": 0.6852, + "step": 52955 + }, + { + "epoch": 0.05, + "learning_rate": 4.965605668713062e-05, + "loss": 0.8749, + "step": 52956 + }, + { + "epoch": 0.05, + "learning_rate": 4.965604370258001e-05, + "loss": 0.9618, + "step": 52957 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656030717785995e-05, + "loss": 1.1342, + "step": 52958 + }, + { + "epoch": 0.05, + "learning_rate": 4.9656017732748585e-05, + "loss": 0.9014, + "step": 52959 + }, + { + "epoch": 0.05, + "learning_rate": 4.965600474746778e-05, + "loss": 0.8572, + "step": 52960 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655991761943584e-05, + "loss": 0.8862, + "step": 52961 + }, + { + "epoch": 0.05, + "learning_rate": 4.965597877617599e-05, + "loss": 0.7677, + "step": 52962 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655965790165e-05, + "loss": 1.3602, + "step": 52963 + }, + { + "epoch": 0.05, + "learning_rate": 4.965595280391063e-05, + "loss": 1.2945, + "step": 52964 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655939817412856e-05, + "loss": 1.3422, + "step": 52965 + }, + { + "epoch": 0.05, + "learning_rate": 4.96559268306717e-05, + "loss": 1.5799, + "step": 52966 + }, + { + "epoch": 0.05, + "learning_rate": 4.965591384368714e-05, + "loss": 1.0474, + "step": 52967 + }, + { + "epoch": 0.05, + "learning_rate": 4.965590085645919e-05, + "loss": 1.3369, + "step": 52968 + }, + { + "epoch": 0.05, + "learning_rate": 4.965588786898785e-05, + "loss": 1.3637, + "step": 52969 + }, + { + "epoch": 0.05, + "learning_rate": 4.965587488127311e-05, + "loss": 1.3163, + "step": 52970 + }, + { + "epoch": 0.05, + "learning_rate": 4.965586189331498e-05, + "loss": 1.1551, + "step": 52971 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655848905113466e-05, + "loss": 1.5121, + "step": 52972 + }, + { + "epoch": 0.05, + "learning_rate": 4.965583591666855e-05, + "loss": 1.4458, + "step": 52973 + }, + { + "epoch": 0.05, + "learning_rate": 4.965582292798025e-05, + "loss": 1.1531, + "step": 52974 + }, + { + "epoch": 0.05, + "learning_rate": 4.965580993904855e-05, + "loss": 1.0789, + "step": 52975 + }, + { + "epoch": 0.05, + "learning_rate": 4.965579694987347e-05, + "loss": 1.0917, + "step": 52976 + }, + { + "epoch": 0.05, + "learning_rate": 4.965578396045499e-05, + "loss": 0.8156, + "step": 52977 + }, + { + "epoch": 0.05, + "learning_rate": 4.965577097079312e-05, + "loss": 0.5572, + "step": 52978 + }, + { + "epoch": 0.05, + "learning_rate": 4.965575798088786e-05, + "loss": 1.2733, + "step": 52979 + }, + { + "epoch": 0.05, + "learning_rate": 4.965574499073921e-05, + "loss": 1.2591, + "step": 52980 + }, + { + "epoch": 0.05, + "learning_rate": 4.965573200034716e-05, + "loss": 0.9837, + "step": 52981 + }, + { + "epoch": 0.05, + "learning_rate": 4.965571900971172e-05, + "loss": 0.7394, + "step": 52982 + }, + { + "epoch": 0.05, + "learning_rate": 4.96557060188329e-05, + "loss": 1.2243, + "step": 52983 + }, + { + "epoch": 0.05, + "learning_rate": 4.965569302771068e-05, + "loss": 1.4155, + "step": 52984 + }, + { + "epoch": 0.05, + "learning_rate": 4.965568003634508e-05, + "loss": 1.3308, + "step": 52985 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655667044736075e-05, + "loss": 1.094, + "step": 52986 + }, + { + "epoch": 0.05, + "learning_rate": 4.965565405288369e-05, + "loss": 1.022, + "step": 52987 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655641060787914e-05, + "loss": 1.2374, + "step": 52988 + }, + { + "epoch": 0.05, + "learning_rate": 4.965562806844874e-05, + "loss": 1.3106, + "step": 52989 + }, + { + "epoch": 0.05, + "learning_rate": 4.965561507586618e-05, + "loss": 1.2101, + "step": 52990 + }, + { + "epoch": 0.05, + "learning_rate": 4.965560208304023e-05, + "loss": 1.2033, + "step": 52991 + }, + { + "epoch": 0.05, + "learning_rate": 4.96555890899709e-05, + "loss": 1.0466, + "step": 52992 + }, + { + "epoch": 0.05, + "learning_rate": 4.965557609665817e-05, + "loss": 0.3061, + "step": 52993 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655563103102055e-05, + "loss": 1.1072, + "step": 52994 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655550109302544e-05, + "loss": 1.2317, + "step": 52995 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655537115259656e-05, + "loss": 1.0075, + "step": 52996 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655524120973366e-05, + "loss": 0.975, + "step": 52997 + }, + { + "epoch": 0.05, + "learning_rate": 4.965551112644369e-05, + "loss": 1.0692, + "step": 52998 + }, + { + "epoch": 0.05, + "learning_rate": 4.965549813167063e-05, + "loss": 1.178, + "step": 52999 + }, + { + "epoch": 0.05, + "learning_rate": 4.965548513665418e-05, + "loss": 1.2086, + "step": 53000 + }, + { + "epoch": 0.05, + "eval_loss": 1.0795215368270874, + "eval_runtime": 125.2775, + "eval_samples_per_second": 11.055, + "eval_steps_per_second": 5.532, + "step": 53000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655472141394345e-05, + "loss": 1.0918, + "step": 53001 + }, + { + "epoch": 0.05, + "learning_rate": 4.965545914589111e-05, + "loss": 1.4648, + "step": 53002 + }, + { + "epoch": 0.05, + "learning_rate": 4.965544615014449e-05, + "loss": 1.5264, + "step": 53003 + }, + { + "epoch": 0.05, + "learning_rate": 4.965543315415449e-05, + "loss": 1.0094, + "step": 53004 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655420157921094e-05, + "loss": 0.7957, + "step": 53005 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655407161444315e-05, + "loss": 1.0061, + "step": 53006 + }, + { + "epoch": 0.05, + "learning_rate": 4.965539416472415e-05, + "loss": 1.3517, + "step": 53007 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655381167760595e-05, + "loss": 1.2169, + "step": 53008 + }, + { + "epoch": 0.05, + "learning_rate": 4.965536817055365e-05, + "loss": 1.1411, + "step": 53009 + }, + { + "epoch": 0.05, + "learning_rate": 4.965535517310332e-05, + "loss": 1.0667, + "step": 53010 + }, + { + "epoch": 0.05, + "learning_rate": 4.96553421754096e-05, + "loss": 0.7201, + "step": 53011 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655329177472496e-05, + "loss": 1.1017, + "step": 53012 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655316179292e-05, + "loss": 0.8591, + "step": 53013 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655303180868125e-05, + "loss": 1.125, + "step": 53014 + }, + { + "epoch": 0.05, + "learning_rate": 4.965529018220086e-05, + "loss": 1.185, + "step": 53015 + }, + { + "epoch": 0.05, + "learning_rate": 4.965527718329021e-05, + "loss": 1.264, + "step": 53016 + }, + { + "epoch": 0.05, + "learning_rate": 4.965526418413616e-05, + "loss": 1.1416, + "step": 53017 + }, + { + "epoch": 0.05, + "learning_rate": 4.965525118473874e-05, + "loss": 0.9915, + "step": 53018 + }, + { + "epoch": 0.05, + "learning_rate": 4.965523818509793e-05, + "loss": 1.058, + "step": 53019 + }, + { + "epoch": 0.05, + "learning_rate": 4.965522518521373e-05, + "loss": 1.1575, + "step": 53020 + }, + { + "epoch": 0.05, + "learning_rate": 4.965521218508615e-05, + "loss": 1.1177, + "step": 53021 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655199184715174e-05, + "loss": 1.0491, + "step": 53022 + }, + { + "epoch": 0.05, + "learning_rate": 4.965518618410082e-05, + "loss": 1.1381, + "step": 53023 + }, + { + "epoch": 0.05, + "learning_rate": 4.965517318324308e-05, + "loss": 1.0955, + "step": 53024 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655160182141956e-05, + "loss": 1.3482, + "step": 53025 + }, + { + "epoch": 0.05, + "learning_rate": 4.965514718079745e-05, + "loss": 1.206, + "step": 53026 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655134179209544e-05, + "loss": 1.1459, + "step": 53027 + }, + { + "epoch": 0.05, + "learning_rate": 4.965512117737826e-05, + "loss": 1.2255, + "step": 53028 + }, + { + "epoch": 0.05, + "learning_rate": 4.96551081753036e-05, + "loss": 1.0166, + "step": 53029 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655095172985546e-05, + "loss": 1.2725, + "step": 53030 + }, + { + "epoch": 0.05, + "learning_rate": 4.96550821704241e-05, + "loss": 1.0934, + "step": 53031 + }, + { + "epoch": 0.05, + "learning_rate": 4.965506916761928e-05, + "loss": 0.9668, + "step": 53032 + }, + { + "epoch": 0.05, + "learning_rate": 4.965505616457108e-05, + "loss": 1.2376, + "step": 53033 + }, + { + "epoch": 0.05, + "learning_rate": 4.9655043161279494e-05, + "loss": 0.9925, + "step": 53034 + }, + { + "epoch": 0.05, + "learning_rate": 4.965503015774451e-05, + "loss": 1.3103, + "step": 53035 + }, + { + "epoch": 0.05, + "learning_rate": 4.965501715396615e-05, + "loss": 0.8787, + "step": 53036 + }, + { + "epoch": 0.05, + "learning_rate": 4.965500414994442e-05, + "loss": 1.0273, + "step": 53037 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654991145679286e-05, + "loss": 1.075, + "step": 53038 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654978141170785e-05, + "loss": 1.2223, + "step": 53039 + }, + { + "epoch": 0.05, + "learning_rate": 4.965496513641889e-05, + "loss": 1.1786, + "step": 53040 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654952131423606e-05, + "loss": 0.9231, + "step": 53041 + }, + { + "epoch": 0.05, + "learning_rate": 4.965493912618495e-05, + "loss": 1.0355, + "step": 53042 + }, + { + "epoch": 0.05, + "learning_rate": 4.96549261207029e-05, + "loss": 1.0205, + "step": 53043 + }, + { + "epoch": 0.05, + "learning_rate": 4.965491311497747e-05, + "loss": 0.9292, + "step": 53044 + }, + { + "epoch": 0.05, + "learning_rate": 4.965490010900867e-05, + "loss": 0.3167, + "step": 53045 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654887102796477e-05, + "loss": 0.3081, + "step": 53046 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654874096340904e-05, + "loss": 0.3848, + "step": 53047 + }, + { + "epoch": 0.05, + "learning_rate": 4.965486108964195e-05, + "loss": 0.3763, + "step": 53048 + }, + { + "epoch": 0.05, + "learning_rate": 4.96548480826996e-05, + "loss": 0.2225, + "step": 53049 + }, + { + "epoch": 0.05, + "learning_rate": 4.965483507551388e-05, + "loss": 0.1422, + "step": 53050 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654822068084784e-05, + "loss": 0.1709, + "step": 53051 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654809060412296e-05, + "loss": 0.1831, + "step": 53052 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654796052496426e-05, + "loss": 0.1252, + "step": 53053 + }, + { + "epoch": 0.05, + "learning_rate": 4.965478304433718e-05, + "loss": 0.1843, + "step": 53054 + }, + { + "epoch": 0.05, + "learning_rate": 4.965477003593454e-05, + "loss": 0.2029, + "step": 53055 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654757027288535e-05, + "loss": 0.0846, + "step": 53056 + }, + { + "epoch": 0.05, + "learning_rate": 4.965474401839913e-05, + "loss": 0.11, + "step": 53057 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654731009266367e-05, + "loss": 0.1668, + "step": 53058 + }, + { + "epoch": 0.05, + "learning_rate": 4.965471799989021e-05, + "loss": 0.0909, + "step": 53059 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654704990270666e-05, + "loss": 0.1357, + "step": 53060 + }, + { + "epoch": 0.05, + "learning_rate": 4.965469198040775e-05, + "loss": 0.4276, + "step": 53061 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654678970301446e-05, + "loss": 1.3594, + "step": 53062 + }, + { + "epoch": 0.05, + "learning_rate": 4.965466595995177e-05, + "loss": 0.9351, + "step": 53063 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654652949358714e-05, + "loss": 1.1641, + "step": 53064 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654639938522274e-05, + "loss": 1.1929, + "step": 53065 + }, + { + "epoch": 0.05, + "learning_rate": 4.965462692744245e-05, + "loss": 1.3604, + "step": 53066 + }, + { + "epoch": 0.05, + "learning_rate": 4.965461391611925e-05, + "loss": 1.4407, + "step": 53067 + }, + { + "epoch": 0.05, + "learning_rate": 4.965460090455267e-05, + "loss": 1.3064, + "step": 53068 + }, + { + "epoch": 0.05, + "learning_rate": 4.965458789274271e-05, + "loss": 0.8301, + "step": 53069 + }, + { + "epoch": 0.05, + "learning_rate": 4.965457488068937e-05, + "loss": 1.4657, + "step": 53070 + }, + { + "epoch": 0.05, + "learning_rate": 4.965456186839265e-05, + "loss": 1.329, + "step": 53071 + }, + { + "epoch": 0.05, + "learning_rate": 4.965454885585255e-05, + "loss": 1.0141, + "step": 53072 + }, + { + "epoch": 0.05, + "learning_rate": 4.965453584306907e-05, + "loss": 1.1431, + "step": 53073 + }, + { + "epoch": 0.05, + "learning_rate": 4.965452283004221e-05, + "loss": 2.1085, + "step": 53074 + }, + { + "epoch": 0.05, + "learning_rate": 4.965450981677198e-05, + "loss": 1.0998, + "step": 53075 + }, + { + "epoch": 0.05, + "learning_rate": 4.965449680325836e-05, + "loss": 0.7222, + "step": 53076 + }, + { + "epoch": 0.05, + "learning_rate": 4.965448378950136e-05, + "loss": 1.1724, + "step": 53077 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654470775500996e-05, + "loss": 1.215, + "step": 53078 + }, + { + "epoch": 0.05, + "learning_rate": 4.965445776125724e-05, + "loss": 0.5172, + "step": 53079 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654444746770115e-05, + "loss": 0.96, + "step": 53080 + }, + { + "epoch": 0.05, + "learning_rate": 4.96544317320396e-05, + "loss": 1.0981, + "step": 53081 + }, + { + "epoch": 0.05, + "learning_rate": 4.965441871706571e-05, + "loss": 1.4621, + "step": 53082 + }, + { + "epoch": 0.05, + "learning_rate": 4.965440570184845e-05, + "loss": 1.0534, + "step": 53083 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654392686387804e-05, + "loss": 1.4694, + "step": 53084 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654379670683784e-05, + "loss": 1.8152, + "step": 53085 + }, + { + "epoch": 0.05, + "learning_rate": 4.965436665473639e-05, + "loss": 1.2454, + "step": 53086 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654353638545614e-05, + "loss": 1.0771, + "step": 53087 + }, + { + "epoch": 0.05, + "learning_rate": 4.965434062211146e-05, + "loss": 0.35, + "step": 53088 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654327605433927e-05, + "loss": 0.5211, + "step": 53089 + }, + { + "epoch": 0.05, + "learning_rate": 4.965431458851302e-05, + "loss": 0.9494, + "step": 53090 + }, + { + "epoch": 0.05, + "learning_rate": 4.965430157134874e-05, + "loss": 1.3897, + "step": 53091 + }, + { + "epoch": 0.05, + "learning_rate": 4.965428855394107e-05, + "loss": 1.0161, + "step": 53092 + }, + { + "epoch": 0.05, + "learning_rate": 4.965427553629004e-05, + "loss": 1.4859, + "step": 53093 + }, + { + "epoch": 0.05, + "learning_rate": 4.965426251839562e-05, + "loss": 1.2105, + "step": 53094 + }, + { + "epoch": 0.05, + "learning_rate": 4.965424950025783e-05, + "loss": 1.0042, + "step": 53095 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654236481876666e-05, + "loss": 1.0785, + "step": 53096 + }, + { + "epoch": 0.05, + "learning_rate": 4.965422346325212e-05, + "loss": 1.149, + "step": 53097 + }, + { + "epoch": 0.05, + "learning_rate": 4.96542104443842e-05, + "loss": 1.2495, + "step": 53098 + }, + { + "epoch": 0.05, + "learning_rate": 4.965419742527291e-05, + "loss": 0.9229, + "step": 53099 + }, + { + "epoch": 0.05, + "learning_rate": 4.965418440591824e-05, + "loss": 0.7052, + "step": 53100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654171386320184e-05, + "loss": 0.9676, + "step": 53101 + }, + { + "epoch": 0.05, + "learning_rate": 4.965415836647877e-05, + "loss": 1.0949, + "step": 53102 + }, + { + "epoch": 0.05, + "learning_rate": 4.965414534639397e-05, + "loss": 1.2735, + "step": 53103 + }, + { + "epoch": 0.05, + "learning_rate": 4.96541323260658e-05, + "loss": 1.2502, + "step": 53104 + }, + { + "epoch": 0.05, + "learning_rate": 4.965411930549424e-05, + "loss": 1.0803, + "step": 53105 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654106284679316e-05, + "loss": 1.2496, + "step": 53106 + }, + { + "epoch": 0.05, + "learning_rate": 4.965409326362102e-05, + "loss": 1.4077, + "step": 53107 + }, + { + "epoch": 0.05, + "learning_rate": 4.965408024231934e-05, + "loss": 1.1196, + "step": 53108 + }, + { + "epoch": 0.05, + "learning_rate": 4.96540672207743e-05, + "loss": 1.3141, + "step": 53109 + }, + { + "epoch": 0.05, + "learning_rate": 4.965405419898587e-05, + "loss": 0.9451, + "step": 53110 + }, + { + "epoch": 0.05, + "learning_rate": 4.9654041176954075e-05, + "loss": 1.2035, + "step": 53111 + }, + { + "epoch": 0.05, + "learning_rate": 4.96540281546789e-05, + "loss": 1.0976, + "step": 53112 + }, + { + "epoch": 0.05, + "learning_rate": 4.965401513216036e-05, + "loss": 1.1791, + "step": 53113 + }, + { + "epoch": 0.05, + "learning_rate": 4.965400210939843e-05, + "loss": 0.9901, + "step": 53114 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653989086393145e-05, + "loss": 1.7498, + "step": 53115 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653976063144474e-05, + "loss": 1.1143, + "step": 53116 + }, + { + "epoch": 0.05, + "learning_rate": 4.965396303965243e-05, + "loss": 1.3279, + "step": 53117 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653950015917015e-05, + "loss": 1.2797, + "step": 53118 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653936991938235e-05, + "loss": 1.357, + "step": 53119 + }, + { + "epoch": 0.05, + "learning_rate": 4.965392396771607e-05, + "loss": 1.2874, + "step": 53120 + }, + { + "epoch": 0.05, + "learning_rate": 4.965391094325054e-05, + "loss": 1.1594, + "step": 53121 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653897918541624e-05, + "loss": 0.9687, + "step": 53122 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653884893589346e-05, + "loss": 1.1725, + "step": 53123 + }, + { + "epoch": 0.05, + "learning_rate": 4.965387186839369e-05, + "loss": 1.3107, + "step": 53124 + }, + { + "epoch": 0.05, + "learning_rate": 4.965385884295467e-05, + "loss": 0.9059, + "step": 53125 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653845817272274e-05, + "loss": 1.3024, + "step": 53126 + }, + { + "epoch": 0.05, + "learning_rate": 4.96538327913465e-05, + "loss": 1.108, + "step": 53127 + }, + { + "epoch": 0.05, + "learning_rate": 4.965381976517736e-05, + "loss": 1.1219, + "step": 53128 + }, + { + "epoch": 0.05, + "learning_rate": 4.965380673876484e-05, + "loss": 0.8853, + "step": 53129 + }, + { + "epoch": 0.05, + "learning_rate": 4.965379371210895e-05, + "loss": 1.1046, + "step": 53130 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653780685209696e-05, + "loss": 1.3432, + "step": 53131 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653767658067065e-05, + "loss": 0.619, + "step": 53132 + }, + { + "epoch": 0.05, + "learning_rate": 4.965375463068106e-05, + "loss": 0.9098, + "step": 53133 + }, + { + "epoch": 0.05, + "learning_rate": 4.965374160305169e-05, + "loss": 1.1448, + "step": 53134 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653728575178934e-05, + "loss": 1.4842, + "step": 53135 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653715547062825e-05, + "loss": 1.1595, + "step": 53136 + }, + { + "epoch": 0.05, + "learning_rate": 4.965370251870334e-05, + "loss": 1.2213, + "step": 53137 + }, + { + "epoch": 0.05, + "learning_rate": 4.965368949010047e-05, + "loss": 1.2282, + "step": 53138 + }, + { + "epoch": 0.05, + "learning_rate": 4.965367646125425e-05, + "loss": 1.1491, + "step": 53139 + }, + { + "epoch": 0.05, + "learning_rate": 4.965366343216464e-05, + "loss": 1.0519, + "step": 53140 + }, + { + "epoch": 0.05, + "learning_rate": 4.965365040283168e-05, + "loss": 1.1426, + "step": 53141 + }, + { + "epoch": 0.05, + "learning_rate": 4.965363737325534e-05, + "loss": 1.2439, + "step": 53142 + }, + { + "epoch": 0.05, + "learning_rate": 4.965362434343562e-05, + "loss": 0.7255, + "step": 53143 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653611313372536e-05, + "loss": 1.5725, + "step": 53144 + }, + { + "epoch": 0.05, + "learning_rate": 4.965359828306609e-05, + "loss": 1.0603, + "step": 53145 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653585252516264e-05, + "loss": 1.1895, + "step": 53146 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653572221723076e-05, + "loss": 1.0268, + "step": 53147 + }, + { + "epoch": 0.05, + "learning_rate": 4.965355919068651e-05, + "loss": 1.1062, + "step": 53148 + }, + { + "epoch": 0.05, + "learning_rate": 4.965354615940658e-05, + "loss": 1.0457, + "step": 53149 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653533127883285e-05, + "loss": 1.5248, + "step": 53150 + }, + { + "epoch": 0.05, + "learning_rate": 4.965352009611661e-05, + "loss": 1.1876, + "step": 53151 + }, + { + "epoch": 0.05, + "learning_rate": 4.965350706410658e-05, + "loss": 1.2426, + "step": 53152 + }, + { + "epoch": 0.05, + "learning_rate": 4.965349403185317e-05, + "loss": 1.1893, + "step": 53153 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653480999356393e-05, + "loss": 1.1679, + "step": 53154 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653467966616243e-05, + "loss": 1.2156, + "step": 53155 + }, + { + "epoch": 0.05, + "learning_rate": 4.965345493363273e-05, + "loss": 1.1117, + "step": 53156 + }, + { + "epoch": 0.05, + "learning_rate": 4.965344190040585e-05, + "loss": 1.0002, + "step": 53157 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653428866935604e-05, + "loss": 1.0671, + "step": 53158 + }, + { + "epoch": 0.05, + "learning_rate": 4.965341583322198e-05, + "loss": 1.2269, + "step": 53159 + }, + { + "epoch": 0.05, + "learning_rate": 4.965340279926499e-05, + "loss": 1.0846, + "step": 53160 + }, + { + "epoch": 0.05, + "learning_rate": 4.965338976506464e-05, + "loss": 1.0529, + "step": 53161 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653376730620916e-05, + "loss": 1.4735, + "step": 53162 + }, + { + "epoch": 0.05, + "learning_rate": 4.965336369593382e-05, + "loss": 1.1667, + "step": 53163 + }, + { + "epoch": 0.05, + "learning_rate": 4.965335066100336e-05, + "loss": 0.9316, + "step": 53164 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653337625829536e-05, + "loss": 1.0858, + "step": 53165 + }, + { + "epoch": 0.05, + "learning_rate": 4.965332459041234e-05, + "loss": 1.0452, + "step": 53166 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653311554751785e-05, + "loss": 1.0667, + "step": 53167 + }, + { + "epoch": 0.05, + "learning_rate": 4.965329851884786e-05, + "loss": 1.357, + "step": 53168 + }, + { + "epoch": 0.05, + "learning_rate": 4.965328548270056e-05, + "loss": 1.2924, + "step": 53169 + }, + { + "epoch": 0.05, + "learning_rate": 4.96532724463099e-05, + "loss": 1.1172, + "step": 53170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653259409675876e-05, + "loss": 1.6051, + "step": 53171 + }, + { + "epoch": 0.05, + "learning_rate": 4.965324637279848e-05, + "loss": 1.5256, + "step": 53172 + }, + { + "epoch": 0.05, + "learning_rate": 4.965323333567772e-05, + "loss": 1.2752, + "step": 53173 + }, + { + "epoch": 0.05, + "learning_rate": 4.965322029831359e-05, + "loss": 1.0376, + "step": 53174 + }, + { + "epoch": 0.05, + "learning_rate": 4.965320726070609e-05, + "loss": 0.7585, + "step": 53175 + }, + { + "epoch": 0.05, + "learning_rate": 4.965319422285524e-05, + "loss": 0.7985, + "step": 53176 + }, + { + "epoch": 0.05, + "learning_rate": 4.965318118476101e-05, + "loss": 1.143, + "step": 53177 + }, + { + "epoch": 0.05, + "learning_rate": 4.965316814642342e-05, + "loss": 0.8395, + "step": 53178 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653155107842464e-05, + "loss": 1.0376, + "step": 53179 + }, + { + "epoch": 0.05, + "learning_rate": 4.965314206901814e-05, + "loss": 1.1177, + "step": 53180 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653129029950455e-05, + "loss": 0.9937, + "step": 53181 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653115990639396e-05, + "loss": 1.1624, + "step": 53182 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653102951084974e-05, + "loss": 1.0969, + "step": 53183 + }, + { + "epoch": 0.05, + "learning_rate": 4.965308991128719e-05, + "loss": 1.0646, + "step": 53184 + }, + { + "epoch": 0.05, + "learning_rate": 4.965307687124604e-05, + "loss": 1.0465, + "step": 53185 + }, + { + "epoch": 0.05, + "learning_rate": 4.965306383096153e-05, + "loss": 1.2556, + "step": 53186 + }, + { + "epoch": 0.05, + "learning_rate": 4.9653050790433654e-05, + "loss": 0.4969, + "step": 53187 + }, + { + "epoch": 0.05, + "learning_rate": 4.965303774966241e-05, + "loss": 0.765, + "step": 53188 + }, + { + "epoch": 0.05, + "learning_rate": 4.96530247086478e-05, + "loss": 1.006, + "step": 53189 + }, + { + "epoch": 0.05, + "learning_rate": 4.965301166738983e-05, + "loss": 1.3217, + "step": 53190 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652998625888496e-05, + "loss": 1.2746, + "step": 53191 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652985584143796e-05, + "loss": 1.2008, + "step": 53192 + }, + { + "epoch": 0.05, + "learning_rate": 4.965297254215573e-05, + "loss": 0.7246, + "step": 53193 + }, + { + "epoch": 0.05, + "learning_rate": 4.965295949992431e-05, + "loss": 1.2258, + "step": 53194 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652946457449514e-05, + "loss": 1.1334, + "step": 53195 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652933414731364e-05, + "loss": 1.0117, + "step": 53196 + }, + { + "epoch": 0.05, + "learning_rate": 4.965292037176984e-05, + "loss": 1.0925, + "step": 53197 + }, + { + "epoch": 0.05, + "learning_rate": 4.965290732856496e-05, + "loss": 1.3899, + "step": 53198 + }, + { + "epoch": 0.05, + "learning_rate": 4.965289428511672e-05, + "loss": 1.5564, + "step": 53199 + }, + { + "epoch": 0.05, + "learning_rate": 4.965288124142511e-05, + "loss": 1.156, + "step": 53200 + }, + { + "epoch": 0.05, + "learning_rate": 4.965286819749014e-05, + "loss": 1.4517, + "step": 53201 + }, + { + "epoch": 0.05, + "learning_rate": 4.965285515331181e-05, + "loss": 2.0287, + "step": 53202 + }, + { + "epoch": 0.05, + "learning_rate": 4.965284210889011e-05, + "loss": 1.3747, + "step": 53203 + }, + { + "epoch": 0.05, + "learning_rate": 4.965282906422506e-05, + "loss": 1.3385, + "step": 53204 + }, + { + "epoch": 0.05, + "learning_rate": 4.965281601931664e-05, + "loss": 1.3633, + "step": 53205 + }, + { + "epoch": 0.05, + "learning_rate": 4.965280297416485e-05, + "loss": 1.2381, + "step": 53206 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652789928769706e-05, + "loss": 1.4876, + "step": 53207 + }, + { + "epoch": 0.05, + "learning_rate": 4.96527768831312e-05, + "loss": 1.4048, + "step": 53208 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652763837249336e-05, + "loss": 1.3431, + "step": 53209 + }, + { + "epoch": 0.05, + "learning_rate": 4.96527507911241e-05, + "loss": 1.2417, + "step": 53210 + }, + { + "epoch": 0.05, + "learning_rate": 4.965273774475551e-05, + "loss": 1.4127, + "step": 53211 + }, + { + "epoch": 0.05, + "learning_rate": 4.965272469814356e-05, + "loss": 1.2553, + "step": 53212 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652711651288244e-05, + "loss": 1.1956, + "step": 53213 + }, + { + "epoch": 0.05, + "learning_rate": 4.965269860418957e-05, + "loss": 1.1004, + "step": 53214 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652685556847536e-05, + "loss": 1.1371, + "step": 53215 + }, + { + "epoch": 0.05, + "learning_rate": 4.965267250926214e-05, + "loss": 1.4847, + "step": 53216 + }, + { + "epoch": 0.05, + "learning_rate": 4.965265946143338e-05, + "loss": 1.2149, + "step": 53217 + }, + { + "epoch": 0.05, + "learning_rate": 4.965264641336126e-05, + "loss": 1.2932, + "step": 53218 + }, + { + "epoch": 0.05, + "learning_rate": 4.965263336504578e-05, + "loss": 1.2239, + "step": 53219 + }, + { + "epoch": 0.05, + "learning_rate": 4.965262031648694e-05, + "loss": 1.4982, + "step": 53220 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652607267684745e-05, + "loss": 1.0904, + "step": 53221 + }, + { + "epoch": 0.05, + "learning_rate": 4.965259421863918e-05, + "loss": 1.0879, + "step": 53222 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652581169350266e-05, + "loss": 1.1686, + "step": 53223 + }, + { + "epoch": 0.05, + "learning_rate": 4.965256811981798e-05, + "loss": 0.9726, + "step": 53224 + }, + { + "epoch": 0.05, + "learning_rate": 4.965255507004234e-05, + "loss": 1.0661, + "step": 53225 + }, + { + "epoch": 0.05, + "learning_rate": 4.965254202002334e-05, + "loss": 1.0883, + "step": 53226 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652528969760983e-05, + "loss": 0.9092, + "step": 53227 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652515919255266e-05, + "loss": 1.1259, + "step": 53228 + }, + { + "epoch": 0.05, + "learning_rate": 4.965250286850619e-05, + "loss": 1.2653, + "step": 53229 + }, + { + "epoch": 0.05, + "learning_rate": 4.965248981751375e-05, + "loss": 1.2713, + "step": 53230 + }, + { + "epoch": 0.05, + "learning_rate": 4.965247676627796e-05, + "loss": 1.2564, + "step": 53231 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652463714798803e-05, + "loss": 1.2612, + "step": 53232 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652450663076286e-05, + "loss": 1.135, + "step": 53233 + }, + { + "epoch": 0.05, + "learning_rate": 4.965243761111041e-05, + "loss": 1.1289, + "step": 53234 + }, + { + "epoch": 0.05, + "learning_rate": 4.965242455890118e-05, + "loss": 1.1935, + "step": 53235 + }, + { + "epoch": 0.05, + "learning_rate": 4.965241150644859e-05, + "loss": 1.0604, + "step": 53236 + }, + { + "epoch": 0.05, + "learning_rate": 4.965239845375264e-05, + "loss": 1.1751, + "step": 53237 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652385400813346e-05, + "loss": 1.3916, + "step": 53238 + }, + { + "epoch": 0.05, + "learning_rate": 4.965237234763068e-05, + "loss": 1.1844, + "step": 53239 + }, + { + "epoch": 0.05, + "learning_rate": 4.965235929420466e-05, + "loss": 1.1626, + "step": 53240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652346240535285e-05, + "loss": 0.964, + "step": 53241 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652333186622543e-05, + "loss": 1.0133, + "step": 53242 + }, + { + "epoch": 0.05, + "learning_rate": 4.965232013246645e-05, + "loss": 1.0056, + "step": 53243 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652307078067e-05, + "loss": 1.018, + "step": 53244 + }, + { + "epoch": 0.05, + "learning_rate": 4.96522940234242e-05, + "loss": 0.9067, + "step": 53245 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652280968538025e-05, + "loss": 1.3766, + "step": 53246 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652267913408504e-05, + "loss": 1.0725, + "step": 53247 + }, + { + "epoch": 0.05, + "learning_rate": 4.965225485803563e-05, + "loss": 1.0633, + "step": 53248 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652241802419394e-05, + "loss": 0.9355, + "step": 53249 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652228746559805e-05, + "loss": 1.2827, + "step": 53250 + }, + { + "epoch": 0.05, + "learning_rate": 4.965221569045685e-05, + "loss": 1.0303, + "step": 53251 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652202634110546e-05, + "loss": 1.2783, + "step": 53252 + }, + { + "epoch": 0.05, + "learning_rate": 4.965218957752088e-05, + "loss": 1.3718, + "step": 53253 + }, + { + "epoch": 0.05, + "learning_rate": 4.965217652068786e-05, + "loss": 1.1512, + "step": 53254 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652163463611494e-05, + "loss": 1.197, + "step": 53255 + }, + { + "epoch": 0.05, + "learning_rate": 4.965215040629176e-05, + "loss": 0.9446, + "step": 53256 + }, + { + "epoch": 0.05, + "learning_rate": 4.965213734872868e-05, + "loss": 1.0328, + "step": 53257 + }, + { + "epoch": 0.05, + "learning_rate": 4.965212429092224e-05, + "loss": 0.4073, + "step": 53258 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652111232872446e-05, + "loss": 0.8353, + "step": 53259 + }, + { + "epoch": 0.05, + "learning_rate": 4.96520981745793e-05, + "loss": 1.1374, + "step": 53260 + }, + { + "epoch": 0.05, + "learning_rate": 4.965208511604279e-05, + "loss": 1.1039, + "step": 53261 + }, + { + "epoch": 0.05, + "learning_rate": 4.965207205726293e-05, + "loss": 1.6085, + "step": 53262 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652058998239716e-05, + "loss": 0.9535, + "step": 53263 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652045938973144e-05, + "loss": 1.0242, + "step": 53264 + }, + { + "epoch": 0.05, + "learning_rate": 4.965203287946322e-05, + "loss": 1.1574, + "step": 53265 + }, + { + "epoch": 0.05, + "learning_rate": 4.9652019819709945e-05, + "loss": 1.172, + "step": 53266 + }, + { + "epoch": 0.05, + "learning_rate": 4.965200675971331e-05, + "loss": 1.5505, + "step": 53267 + }, + { + "epoch": 0.05, + "learning_rate": 4.965199369947332e-05, + "loss": 1.0477, + "step": 53268 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651980638989984e-05, + "loss": 1.0185, + "step": 53269 + }, + { + "epoch": 0.05, + "learning_rate": 4.965196757826328e-05, + "loss": 1.2357, + "step": 53270 + }, + { + "epoch": 0.05, + "learning_rate": 4.965195451729323e-05, + "loss": 0.9918, + "step": 53271 + }, + { + "epoch": 0.05, + "learning_rate": 4.965194145607983e-05, + "loss": 0.9173, + "step": 53272 + }, + { + "epoch": 0.05, + "learning_rate": 4.965192839462307e-05, + "loss": 1.369, + "step": 53273 + }, + { + "epoch": 0.05, + "learning_rate": 4.965191533292296e-05, + "loss": 0.9896, + "step": 53274 + }, + { + "epoch": 0.05, + "learning_rate": 4.96519022709795e-05, + "loss": 1.113, + "step": 53275 + }, + { + "epoch": 0.05, + "learning_rate": 4.965188920879268e-05, + "loss": 1.0288, + "step": 53276 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651876146362515e-05, + "loss": 1.3298, + "step": 53277 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651863083688987e-05, + "loss": 1.0366, + "step": 53278 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651850020772116e-05, + "loss": 1.3824, + "step": 53279 + }, + { + "epoch": 0.05, + "learning_rate": 4.965183695761189e-05, + "loss": 1.0523, + "step": 53280 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651823894208306e-05, + "loss": 0.8083, + "step": 53281 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651810830561374e-05, + "loss": 0.8094, + "step": 53282 + }, + { + "epoch": 0.05, + "learning_rate": 4.965179776667108e-05, + "loss": 0.8013, + "step": 53283 + }, + { + "epoch": 0.05, + "learning_rate": 4.965178470253745e-05, + "loss": 0.6517, + "step": 53284 + }, + { + "epoch": 0.05, + "learning_rate": 4.965177163816046e-05, + "loss": 0.7045, + "step": 53285 + }, + { + "epoch": 0.05, + "learning_rate": 4.965175857354012e-05, + "loss": 0.455, + "step": 53286 + }, + { + "epoch": 0.05, + "learning_rate": 4.965174550867642e-05, + "loss": 0.6921, + "step": 53287 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651732443569374e-05, + "loss": 0.4191, + "step": 53288 + }, + { + "epoch": 0.05, + "learning_rate": 4.965171937821898e-05, + "loss": 0.5449, + "step": 53289 + }, + { + "epoch": 0.05, + "learning_rate": 4.965170631262523e-05, + "loss": 0.3782, + "step": 53290 + }, + { + "epoch": 0.05, + "learning_rate": 4.965169324678814e-05, + "loss": 0.3592, + "step": 53291 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651680180707686e-05, + "loss": 0.3977, + "step": 53292 + }, + { + "epoch": 0.05, + "learning_rate": 4.965166711438388e-05, + "loss": 0.3411, + "step": 53293 + }, + { + "epoch": 0.05, + "learning_rate": 4.965165404781673e-05, + "loss": 0.2928, + "step": 53294 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651640981006224e-05, + "loss": 0.2743, + "step": 53295 + }, + { + "epoch": 0.05, + "learning_rate": 4.965162791395237e-05, + "loss": 0.2434, + "step": 53296 + }, + { + "epoch": 0.05, + "learning_rate": 4.965161484665517e-05, + "loss": 0.4154, + "step": 53297 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651601779114613e-05, + "loss": 0.4264, + "step": 53298 + }, + { + "epoch": 0.05, + "learning_rate": 4.965158871133071e-05, + "loss": 1.1799, + "step": 53299 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651575643303454e-05, + "loss": 1.3861, + "step": 53300 + }, + { + "epoch": 0.05, + "learning_rate": 4.965156257503285e-05, + "loss": 1.2216, + "step": 53301 + }, + { + "epoch": 0.05, + "learning_rate": 4.96515495065189e-05, + "loss": 1.3466, + "step": 53302 + }, + { + "epoch": 0.05, + "learning_rate": 4.965153643776159e-05, + "loss": 1.1063, + "step": 53303 + }, + { + "epoch": 0.05, + "learning_rate": 4.965152336876094e-05, + "loss": 1.2642, + "step": 53304 + }, + { + "epoch": 0.05, + "learning_rate": 4.965151029951693e-05, + "loss": 1.1761, + "step": 53305 + }, + { + "epoch": 0.05, + "learning_rate": 4.965149723002958e-05, + "loss": 1.2504, + "step": 53306 + }, + { + "epoch": 0.05, + "learning_rate": 4.965148416029888e-05, + "loss": 1.1238, + "step": 53307 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651471090324833e-05, + "loss": 0.8288, + "step": 53308 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651458020107426e-05, + "loss": 1.2031, + "step": 53309 + }, + { + "epoch": 0.05, + "learning_rate": 4.965144494964668e-05, + "loss": 1.0686, + "step": 53310 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651431878942584e-05, + "loss": 0.9942, + "step": 53311 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651418807995136e-05, + "loss": 1.4284, + "step": 53312 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651405736804346e-05, + "loss": 1.3363, + "step": 53313 + }, + { + "epoch": 0.05, + "learning_rate": 4.96513926653702e-05, + "loss": 0.7364, + "step": 53314 + }, + { + "epoch": 0.05, + "learning_rate": 4.965137959369271e-05, + "loss": 1.0554, + "step": 53315 + }, + { + "epoch": 0.05, + "learning_rate": 4.965136652177187e-05, + "loss": 0.727, + "step": 53316 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651353449607686e-05, + "loss": 1.1455, + "step": 53317 + }, + { + "epoch": 0.05, + "learning_rate": 4.965134037720015e-05, + "loss": 1.2183, + "step": 53318 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651327304549264e-05, + "loss": 1.1313, + "step": 53319 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651314231655036e-05, + "loss": 1.1253, + "step": 53320 + }, + { + "epoch": 0.05, + "learning_rate": 4.965130115851746e-05, + "loss": 1.0025, + "step": 53321 + }, + { + "epoch": 0.05, + "learning_rate": 4.965128808513654e-05, + "loss": 1.1156, + "step": 53322 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651275011512264e-05, + "loss": 1.1353, + "step": 53323 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651261937644646e-05, + "loss": 1.4051, + "step": 53324 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651248863533686e-05, + "loss": 2.0279, + "step": 53325 + }, + { + "epoch": 0.05, + "learning_rate": 4.965123578917937e-05, + "loss": 1.0955, + "step": 53326 + }, + { + "epoch": 0.05, + "learning_rate": 4.965122271458171e-05, + "loss": 0.6125, + "step": 53327 + }, + { + "epoch": 0.05, + "learning_rate": 4.96512096397407e-05, + "loss": 0.9983, + "step": 53328 + }, + { + "epoch": 0.05, + "learning_rate": 4.965119656465635e-05, + "loss": 1.2834, + "step": 53329 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651183489328653e-05, + "loss": 1.1513, + "step": 53330 + }, + { + "epoch": 0.05, + "learning_rate": 4.965117041375761e-05, + "loss": 1.1778, + "step": 53331 + }, + { + "epoch": 0.05, + "learning_rate": 4.965115733794322e-05, + "loss": 1.0513, + "step": 53332 + }, + { + "epoch": 0.05, + "learning_rate": 4.965114426188549e-05, + "loss": 1.1943, + "step": 53333 + }, + { + "epoch": 0.05, + "learning_rate": 4.965113118558441e-05, + "loss": 1.3158, + "step": 53334 + }, + { + "epoch": 0.05, + "learning_rate": 4.965111810903998e-05, + "loss": 1.2732, + "step": 53335 + }, + { + "epoch": 0.05, + "learning_rate": 4.965110503225221e-05, + "loss": 1.153, + "step": 53336 + }, + { + "epoch": 0.05, + "learning_rate": 4.965109195522108e-05, + "loss": 1.3121, + "step": 53337 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651078877946624e-05, + "loss": 1.2115, + "step": 53338 + }, + { + "epoch": 0.05, + "learning_rate": 4.965106580042882e-05, + "loss": 1.336, + "step": 53339 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651052722667665e-05, + "loss": 1.7577, + "step": 53340 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651039644663166e-05, + "loss": 1.3294, + "step": 53341 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651026566415323e-05, + "loss": 0.8671, + "step": 53342 + }, + { + "epoch": 0.05, + "learning_rate": 4.965101348792414e-05, + "loss": 1.0814, + "step": 53343 + }, + { + "epoch": 0.05, + "learning_rate": 4.9651000409189605e-05, + "loss": 1.3998, + "step": 53344 + }, + { + "epoch": 0.05, + "learning_rate": 4.965098733021173e-05, + "loss": 1.1623, + "step": 53345 + }, + { + "epoch": 0.05, + "learning_rate": 4.965097425099051e-05, + "loss": 0.9759, + "step": 53346 + }, + { + "epoch": 0.05, + "learning_rate": 4.965096117152594e-05, + "loss": 1.0782, + "step": 53347 + }, + { + "epoch": 0.05, + "learning_rate": 4.965094809181804e-05, + "loss": 1.1268, + "step": 53348 + }, + { + "epoch": 0.05, + "learning_rate": 4.965093501186679e-05, + "loss": 0.9828, + "step": 53349 + }, + { + "epoch": 0.05, + "learning_rate": 4.965092193167219e-05, + "loss": 1.0102, + "step": 53350 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650908851234256e-05, + "loss": 1.366, + "step": 53351 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650895770552976e-05, + "loss": 0.9978, + "step": 53352 + }, + { + "epoch": 0.05, + "learning_rate": 4.965088268962835e-05, + "loss": 1.1448, + "step": 53353 + }, + { + "epoch": 0.05, + "learning_rate": 4.965086960846038e-05, + "loss": 0.5632, + "step": 53354 + }, + { + "epoch": 0.05, + "learning_rate": 4.965085652704907e-05, + "loss": 2.1856, + "step": 53355 + }, + { + "epoch": 0.05, + "learning_rate": 4.965084344539441e-05, + "loss": 0.8523, + "step": 53356 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650830363496415e-05, + "loss": 1.0147, + "step": 53357 + }, + { + "epoch": 0.05, + "learning_rate": 4.965081728135508e-05, + "loss": 1.291, + "step": 53358 + }, + { + "epoch": 0.05, + "learning_rate": 4.96508041989704e-05, + "loss": 1.2388, + "step": 53359 + }, + { + "epoch": 0.05, + "learning_rate": 4.965079111634237e-05, + "loss": 0.9936, + "step": 53360 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650778033471004e-05, + "loss": 0.7459, + "step": 53361 + }, + { + "epoch": 0.05, + "learning_rate": 4.96507649503563e-05, + "loss": 1.2244, + "step": 53362 + }, + { + "epoch": 0.05, + "learning_rate": 4.965075186699825e-05, + "loss": 1.4897, + "step": 53363 + }, + { + "epoch": 0.05, + "learning_rate": 4.965073878339685e-05, + "loss": 1.2613, + "step": 53364 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650725699552115e-05, + "loss": 1.5784, + "step": 53365 + }, + { + "epoch": 0.05, + "learning_rate": 4.965071261546405e-05, + "loss": 1.4486, + "step": 53366 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650699531132626e-05, + "loss": 0.9335, + "step": 53367 + }, + { + "epoch": 0.05, + "learning_rate": 4.965068644655787e-05, + "loss": 1.1896, + "step": 53368 + }, + { + "epoch": 0.05, + "learning_rate": 4.965067336173976e-05, + "loss": 1.226, + "step": 53369 + }, + { + "epoch": 0.05, + "learning_rate": 4.965066027667833e-05, + "loss": 1.3734, + "step": 53370 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650647191373546e-05, + "loss": 1.1724, + "step": 53371 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650634105825425e-05, + "loss": 1.3147, + "step": 53372 + }, + { + "epoch": 0.05, + "learning_rate": 4.965062102003396e-05, + "loss": 0.8169, + "step": 53373 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650607933999156e-05, + "loss": 0.4805, + "step": 53374 + }, + { + "epoch": 0.05, + "learning_rate": 4.965059484772101e-05, + "loss": 0.3922, + "step": 53375 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650581761199524e-05, + "loss": 0.5699, + "step": 53376 + }, + { + "epoch": 0.05, + "learning_rate": 4.96505686744347e-05, + "loss": 1.4413, + "step": 53377 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650555587426536e-05, + "loss": 1.4076, + "step": 53378 + }, + { + "epoch": 0.05, + "learning_rate": 4.965054250017503e-05, + "loss": 0.9276, + "step": 53379 + }, + { + "epoch": 0.05, + "learning_rate": 4.965052941268019e-05, + "loss": 1.4123, + "step": 53380 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650516324942004e-05, + "loss": 0.8831, + "step": 53381 + }, + { + "epoch": 0.05, + "learning_rate": 4.965050323696048e-05, + "loss": 1.1193, + "step": 53382 + }, + { + "epoch": 0.05, + "learning_rate": 4.965049014873562e-05, + "loss": 1.4213, + "step": 53383 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650477060267416e-05, + "loss": 1.0988, + "step": 53384 + }, + { + "epoch": 0.05, + "learning_rate": 4.965046397155587e-05, + "loss": 1.5921, + "step": 53385 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650450882601e-05, + "loss": 0.455, + "step": 53386 + }, + { + "epoch": 0.05, + "learning_rate": 4.965043779340277e-05, + "loss": 0.8456, + "step": 53387 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650424703961216e-05, + "loss": 1.3148, + "step": 53388 + }, + { + "epoch": 0.05, + "learning_rate": 4.965041161427632e-05, + "loss": 1.274, + "step": 53389 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650398524348085e-05, + "loss": 1.0808, + "step": 53390 + }, + { + "epoch": 0.05, + "learning_rate": 4.965038543417652e-05, + "loss": 1.5877, + "step": 53391 + }, + { + "epoch": 0.05, + "learning_rate": 4.96503723437616e-05, + "loss": 1.6189, + "step": 53392 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650359253103354e-05, + "loss": 1.3348, + "step": 53393 + }, + { + "epoch": 0.05, + "learning_rate": 4.965034616220177e-05, + "loss": 1.2192, + "step": 53394 + }, + { + "epoch": 0.05, + "learning_rate": 4.965033307105684e-05, + "loss": 1.1583, + "step": 53395 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650319979668576e-05, + "loss": 1.3498, + "step": 53396 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650306888036976e-05, + "loss": 1.1378, + "step": 53397 + }, + { + "epoch": 0.05, + "learning_rate": 4.965029379616204e-05, + "loss": 1.6397, + "step": 53398 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650280704043764e-05, + "loss": 1.3773, + "step": 53399 + }, + { + "epoch": 0.05, + "learning_rate": 4.965026761168215e-05, + "loss": 1.0581, + "step": 53400 + }, + { + "epoch": 0.05, + "learning_rate": 4.96502545190772e-05, + "loss": 0.8439, + "step": 53401 + }, + { + "epoch": 0.05, + "learning_rate": 4.965024142622892e-05, + "loss": 0.9932, + "step": 53402 + }, + { + "epoch": 0.05, + "learning_rate": 4.965022833313729e-05, + "loss": 1.1995, + "step": 53403 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650215239802334e-05, + "loss": 1.1866, + "step": 53404 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650202146224036e-05, + "loss": 0.789, + "step": 53405 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650189052402396e-05, + "loss": 1.2928, + "step": 53406 + }, + { + "epoch": 0.05, + "learning_rate": 4.965017595833743e-05, + "loss": 1.13, + "step": 53407 + }, + { + "epoch": 0.05, + "learning_rate": 4.965016286402913e-05, + "loss": 1.5606, + "step": 53408 + }, + { + "epoch": 0.05, + "learning_rate": 4.965014976947748e-05, + "loss": 1.4316, + "step": 53409 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650136674682504e-05, + "loss": 1.0732, + "step": 53410 + }, + { + "epoch": 0.05, + "learning_rate": 4.965012357964419e-05, + "loss": 0.8635, + "step": 53411 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650110484362545e-05, + "loss": 0.6968, + "step": 53412 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650097388837555e-05, + "loss": 1.2507, + "step": 53413 + }, + { + "epoch": 0.05, + "learning_rate": 4.965008429306924e-05, + "loss": 1.2112, + "step": 53414 + }, + { + "epoch": 0.05, + "learning_rate": 4.965007119705759e-05, + "loss": 0.8854, + "step": 53415 + }, + { + "epoch": 0.05, + "learning_rate": 4.96500581008026e-05, + "loss": 1.2242, + "step": 53416 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650045004304266e-05, + "loss": 1.4097, + "step": 53417 + }, + { + "epoch": 0.05, + "learning_rate": 4.965003190756261e-05, + "loss": 0.8556, + "step": 53418 + }, + { + "epoch": 0.05, + "learning_rate": 4.9650018810577615e-05, + "loss": 1.0677, + "step": 53419 + }, + { + "epoch": 0.05, + "learning_rate": 4.965000571334929e-05, + "loss": 0.8459, + "step": 53420 + }, + { + "epoch": 0.05, + "learning_rate": 4.964999261587763e-05, + "loss": 0.6918, + "step": 53421 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649979518162624e-05, + "loss": 1.1071, + "step": 53422 + }, + { + "epoch": 0.05, + "learning_rate": 4.96499664202043e-05, + "loss": 0.2128, + "step": 53423 + }, + { + "epoch": 0.05, + "learning_rate": 4.964995332200263e-05, + "loss": 0.1865, + "step": 53424 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649940223557626e-05, + "loss": 0.58, + "step": 53425 + }, + { + "epoch": 0.05, + "learning_rate": 4.964992712486929e-05, + "loss": 0.8398, + "step": 53426 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649914025937625e-05, + "loss": 1.4104, + "step": 53427 + }, + { + "epoch": 0.05, + "learning_rate": 4.964990092676263e-05, + "loss": 1.3822, + "step": 53428 + }, + { + "epoch": 0.05, + "learning_rate": 4.964988782734429e-05, + "loss": 0.9715, + "step": 53429 + }, + { + "epoch": 0.05, + "learning_rate": 4.964987472768262e-05, + "loss": 1.3089, + "step": 53430 + }, + { + "epoch": 0.05, + "learning_rate": 4.964986162777762e-05, + "loss": 1.3317, + "step": 53431 + }, + { + "epoch": 0.05, + "learning_rate": 4.964984852762928e-05, + "loss": 1.027, + "step": 53432 + }, + { + "epoch": 0.05, + "learning_rate": 4.964983542723762e-05, + "loss": 1.7388, + "step": 53433 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649822326602623e-05, + "loss": 2.7404, + "step": 53434 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649809225724284e-05, + "loss": 1.2421, + "step": 53435 + }, + { + "epoch": 0.05, + "learning_rate": 4.964979612460262e-05, + "loss": 1.1585, + "step": 53436 + }, + { + "epoch": 0.05, + "learning_rate": 4.964978302323763e-05, + "loss": 1.0153, + "step": 53437 + }, + { + "epoch": 0.05, + "learning_rate": 4.96497699216293e-05, + "loss": 1.0342, + "step": 53438 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649756819777636e-05, + "loss": 1.2083, + "step": 53439 + }, + { + "epoch": 0.05, + "learning_rate": 4.964974371768264e-05, + "loss": 1.0403, + "step": 53440 + }, + { + "epoch": 0.05, + "learning_rate": 4.964973061534431e-05, + "loss": 1.0746, + "step": 53441 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649717512762657e-05, + "loss": 1.1292, + "step": 53442 + }, + { + "epoch": 0.05, + "learning_rate": 4.964970440993767e-05, + "loss": 1.2352, + "step": 53443 + }, + { + "epoch": 0.05, + "learning_rate": 4.964969130686935e-05, + "loss": 0.9307, + "step": 53444 + }, + { + "epoch": 0.05, + "learning_rate": 4.964967820355769e-05, + "loss": 1.1609, + "step": 53445 + }, + { + "epoch": 0.05, + "learning_rate": 4.964966510000271e-05, + "loss": 1.0361, + "step": 53446 + }, + { + "epoch": 0.05, + "learning_rate": 4.964965199620439e-05, + "loss": 1.0579, + "step": 53447 + }, + { + "epoch": 0.05, + "learning_rate": 4.964963889216275e-05, + "loss": 1.0572, + "step": 53448 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649625787877774e-05, + "loss": 0.9948, + "step": 53449 + }, + { + "epoch": 0.05, + "learning_rate": 4.964961268334947e-05, + "loss": 0.9593, + "step": 53450 + }, + { + "epoch": 0.05, + "learning_rate": 4.964959957857783e-05, + "loss": 0.9208, + "step": 53451 + }, + { + "epoch": 0.05, + "learning_rate": 4.964958647356287e-05, + "loss": 1.5734, + "step": 53452 + }, + { + "epoch": 0.05, + "learning_rate": 4.964957336830456e-05, + "loss": 1.2935, + "step": 53453 + }, + { + "epoch": 0.05, + "learning_rate": 4.964956026280293e-05, + "loss": 1.0484, + "step": 53454 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649547157057975e-05, + "loss": 1.0802, + "step": 53455 + }, + { + "epoch": 0.05, + "learning_rate": 4.964953405106969e-05, + "loss": 0.7419, + "step": 53456 + }, + { + "epoch": 0.05, + "learning_rate": 4.964952094483807e-05, + "loss": 1.2038, + "step": 53457 + }, + { + "epoch": 0.05, + "learning_rate": 4.964950783836312e-05, + "loss": 1.0615, + "step": 53458 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649494731644844e-05, + "loss": 1.4757, + "step": 53459 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649481624683235e-05, + "loss": 0.6666, + "step": 53460 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649468517478305e-05, + "loss": 1.2464, + "step": 53461 + }, + { + "epoch": 0.05, + "learning_rate": 4.964945541003004e-05, + "loss": 0.8573, + "step": 53462 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649442302338444e-05, + "loss": 0.8307, + "step": 53463 + }, + { + "epoch": 0.05, + "learning_rate": 4.964942919440352e-05, + "loss": 1.2441, + "step": 53464 + }, + { + "epoch": 0.05, + "learning_rate": 4.964941608622527e-05, + "loss": 1.3275, + "step": 53465 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649402977803685e-05, + "loss": 1.1877, + "step": 53466 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649389869138775e-05, + "loss": 1.2296, + "step": 53467 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649376760230536e-05, + "loss": 1.1287, + "step": 53468 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649363651078975e-05, + "loss": 1.1117, + "step": 53469 + }, + { + "epoch": 0.05, + "learning_rate": 4.964935054168408e-05, + "loss": 1.3034, + "step": 53470 + }, + { + "epoch": 0.05, + "learning_rate": 4.964933743204586e-05, + "loss": 1.117, + "step": 53471 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649324322164304e-05, + "loss": 1.5013, + "step": 53472 + }, + { + "epoch": 0.05, + "learning_rate": 4.964931121203943e-05, + "loss": 1.1563, + "step": 53473 + }, + { + "epoch": 0.05, + "learning_rate": 4.964929810167123e-05, + "loss": 1.2608, + "step": 53474 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649284991059694e-05, + "loss": 1.2248, + "step": 53475 + }, + { + "epoch": 0.05, + "learning_rate": 4.964927188020483e-05, + "loss": 1.3571, + "step": 53476 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649258769106645e-05, + "loss": 1.1941, + "step": 53477 + }, + { + "epoch": 0.05, + "learning_rate": 4.964924565776513e-05, + "loss": 1.0989, + "step": 53478 + }, + { + "epoch": 0.05, + "learning_rate": 4.964923254618029e-05, + "loss": 1.1462, + "step": 53479 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649219434352116e-05, + "loss": 1.3468, + "step": 53480 + }, + { + "epoch": 0.05, + "learning_rate": 4.964920632228062e-05, + "loss": 1.4275, + "step": 53481 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649193209965806e-05, + "loss": 1.24, + "step": 53482 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649180097407655e-05, + "loss": 1.1106, + "step": 53483 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649166984606174e-05, + "loss": 0.8835, + "step": 53484 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649153871561385e-05, + "loss": 1.1966, + "step": 53485 + }, + { + "epoch": 0.05, + "learning_rate": 4.964914075827325e-05, + "loss": 1.0938, + "step": 53486 + }, + { + "epoch": 0.05, + "learning_rate": 4.96491276447418e-05, + "loss": 1.4593, + "step": 53487 + }, + { + "epoch": 0.05, + "learning_rate": 4.9649114530967025e-05, + "loss": 1.0211, + "step": 53488 + }, + { + "epoch": 0.05, + "learning_rate": 4.964910141694892e-05, + "loss": 1.1989, + "step": 53489 + }, + { + "epoch": 0.05, + "learning_rate": 4.964908830268749e-05, + "loss": 0.9258, + "step": 53490 + }, + { + "epoch": 0.05, + "learning_rate": 4.964907518818274e-05, + "loss": 1.0004, + "step": 53491 + }, + { + "epoch": 0.05, + "learning_rate": 4.964906207343465e-05, + "loss": 1.0698, + "step": 53492 + }, + { + "epoch": 0.05, + "learning_rate": 4.964904895844325e-05, + "loss": 1.5022, + "step": 53493 + }, + { + "epoch": 0.05, + "learning_rate": 4.964903584320851e-05, + "loss": 1.1452, + "step": 53494 + }, + { + "epoch": 0.05, + "learning_rate": 4.964902272773046e-05, + "loss": 1.3497, + "step": 53495 + }, + { + "epoch": 0.05, + "learning_rate": 4.964900961200908e-05, + "loss": 1.2659, + "step": 53496 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648996496044375e-05, + "loss": 0.6762, + "step": 53497 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648983379836345e-05, + "loss": 1.0466, + "step": 53498 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648970263385e-05, + "loss": 1.4191, + "step": 53499 + }, + { + "epoch": 0.05, + "learning_rate": 4.964895714669032e-05, + "loss": 1.4758, + "step": 53500 + }, + { + "epoch": 0.05, + "eval_loss": 1.0732150077819824, + "eval_runtime": 128.3448, + "eval_samples_per_second": 10.791, + "eval_steps_per_second": 5.4, + "step": 53500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648944029752315e-05, + "loss": 0.9445, + "step": 53501 + }, + { + "epoch": 0.05, + "learning_rate": 4.964893091257099e-05, + "loss": 0.7266, + "step": 53502 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648917795146336e-05, + "loss": 0.8277, + "step": 53503 + }, + { + "epoch": 0.05, + "learning_rate": 4.964890467747837e-05, + "loss": 1.3445, + "step": 53504 + }, + { + "epoch": 0.05, + "learning_rate": 4.964889155956707e-05, + "loss": 1.1832, + "step": 53505 + }, + { + "epoch": 0.05, + "learning_rate": 4.964887844141245e-05, + "loss": 0.9986, + "step": 53506 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648865323014506e-05, + "loss": 0.8222, + "step": 53507 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648852204373235e-05, + "loss": 1.1422, + "step": 53508 + }, + { + "epoch": 0.05, + "learning_rate": 4.964883908548865e-05, + "loss": 1.1903, + "step": 53509 + }, + { + "epoch": 0.05, + "learning_rate": 4.964882596636073e-05, + "loss": 0.7375, + "step": 53510 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648812846989495e-05, + "loss": 2.1387, + "step": 53511 + }, + { + "epoch": 0.05, + "learning_rate": 4.964879972737494e-05, + "loss": 1.1294, + "step": 53512 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648786607517054e-05, + "loss": 1.2348, + "step": 53513 + }, + { + "epoch": 0.05, + "learning_rate": 4.964877348741586e-05, + "loss": 1.0015, + "step": 53514 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648760367071325e-05, + "loss": 1.2099, + "step": 53515 + }, + { + "epoch": 0.05, + "learning_rate": 4.964874724648348e-05, + "loss": 0.9007, + "step": 53516 + }, + { + "epoch": 0.05, + "learning_rate": 4.964873412565231e-05, + "loss": 0.9535, + "step": 53517 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648721004577815e-05, + "loss": 0.8274, + "step": 53518 + }, + { + "epoch": 0.05, + "learning_rate": 4.964870788326e-05, + "loss": 1.3481, + "step": 53519 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648694761698865e-05, + "loss": 1.2123, + "step": 53520 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648681639894414e-05, + "loss": 1.4581, + "step": 53521 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648668517846634e-05, + "loss": 1.3828, + "step": 53522 + }, + { + "epoch": 0.05, + "learning_rate": 4.964865539555553e-05, + "loss": 1.3851, + "step": 53523 + }, + { + "epoch": 0.05, + "learning_rate": 4.964864227302111e-05, + "loss": 1.1952, + "step": 53524 + }, + { + "epoch": 0.05, + "learning_rate": 4.964862915024337e-05, + "loss": 1.2249, + "step": 53525 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648616027222305e-05, + "loss": 1.3056, + "step": 53526 + }, + { + "epoch": 0.05, + "learning_rate": 4.964860290395792e-05, + "loss": 0.9062, + "step": 53527 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648589780450216e-05, + "loss": 0.8244, + "step": 53528 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648576656699195e-05, + "loss": 0.9901, + "step": 53529 + }, + { + "epoch": 0.05, + "learning_rate": 4.964856353270485e-05, + "loss": 1.0981, + "step": 53530 + }, + { + "epoch": 0.05, + "learning_rate": 4.964855040846718e-05, + "loss": 1.0267, + "step": 53531 + }, + { + "epoch": 0.05, + "learning_rate": 4.96485372839862e-05, + "loss": 1.1889, + "step": 53532 + }, + { + "epoch": 0.05, + "learning_rate": 4.964852415926189e-05, + "loss": 1.1327, + "step": 53533 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648511034294264e-05, + "loss": 1.0918, + "step": 53534 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648497909083324e-05, + "loss": 0.8724, + "step": 53535 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648484783629055e-05, + "loss": 0.9584, + "step": 53536 + }, + { + "epoch": 0.05, + "learning_rate": 4.964847165793147e-05, + "loss": 1.135, + "step": 53537 + }, + { + "epoch": 0.05, + "learning_rate": 4.964845853199057e-05, + "loss": 1.3751, + "step": 53538 + }, + { + "epoch": 0.05, + "learning_rate": 4.964844540580634e-05, + "loss": 1.0032, + "step": 53539 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648432279378806e-05, + "loss": 1.201, + "step": 53540 + }, + { + "epoch": 0.05, + "learning_rate": 4.964841915270795e-05, + "loss": 1.1408, + "step": 53541 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648406025793766e-05, + "loss": 1.2378, + "step": 53542 + }, + { + "epoch": 0.05, + "learning_rate": 4.964839289863626e-05, + "loss": 1.1345, + "step": 53543 + }, + { + "epoch": 0.05, + "learning_rate": 4.964837977123545e-05, + "loss": 0.7726, + "step": 53544 + }, + { + "epoch": 0.05, + "learning_rate": 4.964836664359132e-05, + "loss": 1.0088, + "step": 53545 + }, + { + "epoch": 0.05, + "learning_rate": 4.964835351570386e-05, + "loss": 0.6479, + "step": 53546 + }, + { + "epoch": 0.05, + "learning_rate": 4.964834038757309e-05, + "loss": 1.221, + "step": 53547 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648327259199e-05, + "loss": 1.1097, + "step": 53548 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648314130581596e-05, + "loss": 1.4635, + "step": 53549 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648301001720866e-05, + "loss": 0.9433, + "step": 53550 + }, + { + "epoch": 0.05, + "learning_rate": 4.964828787261683e-05, + "loss": 1.1199, + "step": 53551 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648274743269466e-05, + "loss": 0.9827, + "step": 53552 + }, + { + "epoch": 0.05, + "learning_rate": 4.964826161367879e-05, + "loss": 1.2483, + "step": 53553 + }, + { + "epoch": 0.05, + "learning_rate": 4.96482484838448e-05, + "loss": 0.9408, + "step": 53554 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648235353767484e-05, + "loss": 0.8752, + "step": 53555 + }, + { + "epoch": 0.05, + "learning_rate": 4.964822222344685e-05, + "loss": 0.8584, + "step": 53556 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648209092882904e-05, + "loss": 0.932, + "step": 53557 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648195962075644e-05, + "loss": 1.1189, + "step": 53558 + }, + { + "epoch": 0.05, + "learning_rate": 4.964818283102506e-05, + "loss": 2.0638, + "step": 53559 + }, + { + "epoch": 0.05, + "learning_rate": 4.964816969973117e-05, + "loss": 1.43, + "step": 53560 + }, + { + "epoch": 0.05, + "learning_rate": 4.964815656819396e-05, + "loss": 1.4726, + "step": 53561 + }, + { + "epoch": 0.05, + "learning_rate": 4.964814343641343e-05, + "loss": 1.2545, + "step": 53562 + }, + { + "epoch": 0.05, + "learning_rate": 4.964813030438958e-05, + "loss": 1.4676, + "step": 53563 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648117172122424e-05, + "loss": 1.112, + "step": 53564 + }, + { + "epoch": 0.05, + "learning_rate": 4.964810403961195e-05, + "loss": 1.1126, + "step": 53565 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648090906858155e-05, + "loss": 1.0902, + "step": 53566 + }, + { + "epoch": 0.05, + "learning_rate": 4.9648077773861044e-05, + "loss": 1.2213, + "step": 53567 + }, + { + "epoch": 0.05, + "learning_rate": 4.964806464062063e-05, + "loss": 1.2061, + "step": 53568 + }, + { + "epoch": 0.05, + "learning_rate": 4.964805150713688e-05, + "loss": 1.3248, + "step": 53569 + }, + { + "epoch": 0.05, + "learning_rate": 4.964803837340983e-05, + "loss": 0.7229, + "step": 53570 + }, + { + "epoch": 0.05, + "learning_rate": 4.964802523943947e-05, + "loss": 0.9528, + "step": 53571 + }, + { + "epoch": 0.05, + "learning_rate": 4.964801210522578e-05, + "loss": 1.0722, + "step": 53572 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647998970768785e-05, + "loss": 1.1501, + "step": 53573 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647985836068474e-05, + "loss": 1.2962, + "step": 53574 + }, + { + "epoch": 0.05, + "learning_rate": 4.964797270112485e-05, + "loss": 1.226, + "step": 53575 + }, + { + "epoch": 0.05, + "learning_rate": 4.96479595659379e-05, + "loss": 1.2196, + "step": 53576 + }, + { + "epoch": 0.05, + "learning_rate": 4.964794643050765e-05, + "loss": 0.9531, + "step": 53577 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647933294834076e-05, + "loss": 1.1579, + "step": 53578 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647920158917195e-05, + "loss": 0.7328, + "step": 53579 + }, + { + "epoch": 0.05, + "learning_rate": 4.964790702275699e-05, + "loss": 0.8047, + "step": 53580 + }, + { + "epoch": 0.05, + "learning_rate": 4.964789388635348e-05, + "loss": 1.1028, + "step": 53581 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647880749706654e-05, + "loss": 0.9733, + "step": 53582 + }, + { + "epoch": 0.05, + "learning_rate": 4.964786761281652e-05, + "loss": 1.0767, + "step": 53583 + }, + { + "epoch": 0.05, + "learning_rate": 4.964785447568306e-05, + "loss": 1.1157, + "step": 53584 + }, + { + "epoch": 0.05, + "learning_rate": 4.96478413383063e-05, + "loss": 0.7339, + "step": 53585 + }, + { + "epoch": 0.05, + "learning_rate": 4.964782820068622e-05, + "loss": 1.0277, + "step": 53586 + }, + { + "epoch": 0.05, + "learning_rate": 4.964781506282283e-05, + "loss": 1.3039, + "step": 53587 + }, + { + "epoch": 0.05, + "learning_rate": 4.964780192471613e-05, + "loss": 1.1862, + "step": 53588 + }, + { + "epoch": 0.05, + "learning_rate": 4.964778878636611e-05, + "loss": 1.1229, + "step": 53589 + }, + { + "epoch": 0.05, + "learning_rate": 4.964777564777278e-05, + "loss": 1.2639, + "step": 53590 + }, + { + "epoch": 0.05, + "learning_rate": 4.964776250893613e-05, + "loss": 1.1428, + "step": 53591 + }, + { + "epoch": 0.05, + "learning_rate": 4.964774936985618e-05, + "loss": 1.2236, + "step": 53592 + }, + { + "epoch": 0.05, + "learning_rate": 4.964773623053291e-05, + "loss": 1.423, + "step": 53593 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647723090966336e-05, + "loss": 1.2869, + "step": 53594 + }, + { + "epoch": 0.05, + "learning_rate": 4.964770995115645e-05, + "loss": 1.5588, + "step": 53595 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647696811103244e-05, + "loss": 1.134, + "step": 53596 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647683670806735e-05, + "loss": 1.0572, + "step": 53597 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647670530266905e-05, + "loss": 2.0288, + "step": 53598 + }, + { + "epoch": 0.05, + "learning_rate": 4.964765738948377e-05, + "loss": 1.1733, + "step": 53599 + }, + { + "epoch": 0.05, + "learning_rate": 4.964764424845732e-05, + "loss": 1.2516, + "step": 53600 + }, + { + "epoch": 0.05, + "learning_rate": 4.964763110718757e-05, + "loss": 0.8472, + "step": 53601 + }, + { + "epoch": 0.05, + "learning_rate": 4.964761796567449e-05, + "loss": 0.932, + "step": 53602 + }, + { + "epoch": 0.05, + "learning_rate": 4.964760482391811e-05, + "loss": 0.8923, + "step": 53603 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647591681918414e-05, + "loss": 1.4513, + "step": 53604 + }, + { + "epoch": 0.05, + "learning_rate": 4.964757853967541e-05, + "loss": 1.3391, + "step": 53605 + }, + { + "epoch": 0.05, + "learning_rate": 4.96475653971891e-05, + "loss": 1.0197, + "step": 53606 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647552254459476e-05, + "loss": 0.7182, + "step": 53607 + }, + { + "epoch": 0.05, + "learning_rate": 4.964753911148654e-05, + "loss": 0.9987, + "step": 53608 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647525968270295e-05, + "loss": 0.8713, + "step": 53609 + }, + { + "epoch": 0.05, + "learning_rate": 4.964751282481074e-05, + "loss": 1.5749, + "step": 53610 + }, + { + "epoch": 0.05, + "learning_rate": 4.964749968110788e-05, + "loss": 1.1103, + "step": 53611 + }, + { + "epoch": 0.05, + "learning_rate": 4.96474865371617e-05, + "loss": 1.1516, + "step": 53612 + }, + { + "epoch": 0.05, + "learning_rate": 4.964747339297222e-05, + "loss": 1.5528, + "step": 53613 + }, + { + "epoch": 0.05, + "learning_rate": 4.964746024853943e-05, + "loss": 1.519, + "step": 53614 + }, + { + "epoch": 0.05, + "learning_rate": 4.964744710386333e-05, + "loss": 0.6871, + "step": 53615 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647433958943915e-05, + "loss": 1.1476, + "step": 53616 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647420813781195e-05, + "loss": 1.4705, + "step": 53617 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647407668375166e-05, + "loss": 1.3695, + "step": 53618 + }, + { + "epoch": 0.05, + "learning_rate": 4.964739452272583e-05, + "loss": 1.4015, + "step": 53619 + }, + { + "epoch": 0.05, + "learning_rate": 4.964738137683318e-05, + "loss": 1.0395, + "step": 53620 + }, + { + "epoch": 0.05, + "learning_rate": 4.964736823069723e-05, + "loss": 0.4603, + "step": 53621 + }, + { + "epoch": 0.05, + "learning_rate": 4.964735508431796e-05, + "loss": 0.5323, + "step": 53622 + }, + { + "epoch": 0.05, + "learning_rate": 4.964734193769539e-05, + "loss": 0.6964, + "step": 53623 + }, + { + "epoch": 0.05, + "learning_rate": 4.964732879082951e-05, + "loss": 1.6326, + "step": 53624 + }, + { + "epoch": 0.05, + "learning_rate": 4.964731564372032e-05, + "loss": 1.4376, + "step": 53625 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647302496367826e-05, + "loss": 1.2346, + "step": 53626 + }, + { + "epoch": 0.05, + "learning_rate": 4.964728934877202e-05, + "loss": 0.8531, + "step": 53627 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647276200932914e-05, + "loss": 0.7909, + "step": 53628 + }, + { + "epoch": 0.05, + "learning_rate": 4.964726305285049e-05, + "loss": 1.1049, + "step": 53629 + }, + { + "epoch": 0.05, + "learning_rate": 4.964724990452476e-05, + "loss": 1.3088, + "step": 53630 + }, + { + "epoch": 0.05, + "learning_rate": 4.964723675595573e-05, + "loss": 1.1045, + "step": 53631 + }, + { + "epoch": 0.05, + "learning_rate": 4.964722360714339e-05, + "loss": 1.3116, + "step": 53632 + }, + { + "epoch": 0.05, + "learning_rate": 4.964721045808774e-05, + "loss": 1.4276, + "step": 53633 + }, + { + "epoch": 0.05, + "learning_rate": 4.964719730878878e-05, + "loss": 1.2076, + "step": 53634 + }, + { + "epoch": 0.05, + "learning_rate": 4.964718415924652e-05, + "loss": 1.1472, + "step": 53635 + }, + { + "epoch": 0.05, + "learning_rate": 4.964717100946096e-05, + "loss": 1.41, + "step": 53636 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647157859432084e-05, + "loss": 1.1339, + "step": 53637 + }, + { + "epoch": 0.05, + "learning_rate": 4.96471447091599e-05, + "loss": 1.2681, + "step": 53638 + }, + { + "epoch": 0.05, + "learning_rate": 4.964713155864441e-05, + "loss": 0.8322, + "step": 53639 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647118407885616e-05, + "loss": 1.1735, + "step": 53640 + }, + { + "epoch": 0.05, + "learning_rate": 4.964710525688352e-05, + "loss": 0.9869, + "step": 53641 + }, + { + "epoch": 0.05, + "learning_rate": 4.964709210563811e-05, + "loss": 1.2504, + "step": 53642 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647078954149406e-05, + "loss": 1.4052, + "step": 53643 + }, + { + "epoch": 0.05, + "learning_rate": 4.964706580241739e-05, + "loss": 1.2207, + "step": 53644 + }, + { + "epoch": 0.05, + "learning_rate": 4.964705265044206e-05, + "loss": 1.1052, + "step": 53645 + }, + { + "epoch": 0.05, + "learning_rate": 4.964703949822344e-05, + "loss": 0.899, + "step": 53646 + }, + { + "epoch": 0.05, + "learning_rate": 4.9647026345761504e-05, + "loss": 1.0529, + "step": 53647 + }, + { + "epoch": 0.05, + "learning_rate": 4.964701319305627e-05, + "loss": 0.9516, + "step": 53648 + }, + { + "epoch": 0.05, + "learning_rate": 4.964700004010773e-05, + "loss": 1.0315, + "step": 53649 + }, + { + "epoch": 0.05, + "learning_rate": 4.964698688691588e-05, + "loss": 1.1384, + "step": 53650 + }, + { + "epoch": 0.05, + "learning_rate": 4.964697373348073e-05, + "loss": 0.6509, + "step": 53651 + }, + { + "epoch": 0.05, + "learning_rate": 4.964696057980227e-05, + "loss": 1.264, + "step": 53652 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646947425880505e-05, + "loss": 1.3241, + "step": 53653 + }, + { + "epoch": 0.05, + "learning_rate": 4.964693427171544e-05, + "loss": 1.1309, + "step": 53654 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646921117307075e-05, + "loss": 0.9645, + "step": 53655 + }, + { + "epoch": 0.05, + "learning_rate": 4.96469079626554e-05, + "loss": 1.261, + "step": 53656 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646894807760425e-05, + "loss": 1.2591, + "step": 53657 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646881652622144e-05, + "loss": 1.1691, + "step": 53658 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646868497240554e-05, + "loss": 0.9919, + "step": 53659 + }, + { + "epoch": 0.05, + "learning_rate": 4.964685534161567e-05, + "loss": 0.9989, + "step": 53660 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646842185747477e-05, + "loss": 1.0905, + "step": 53661 + }, + { + "epoch": 0.05, + "learning_rate": 4.964682902963598e-05, + "loss": 1.2065, + "step": 53662 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646815873281185e-05, + "loss": 1.1167, + "step": 53663 + }, + { + "epoch": 0.05, + "learning_rate": 4.964680271668308e-05, + "loss": 1.2462, + "step": 53664 + }, + { + "epoch": 0.05, + "learning_rate": 4.964678955984168e-05, + "loss": 0.8905, + "step": 53665 + }, + { + "epoch": 0.05, + "learning_rate": 4.964677640275698e-05, + "loss": 0.4813, + "step": 53666 + }, + { + "epoch": 0.05, + "learning_rate": 4.964676324542896e-05, + "loss": 0.5339, + "step": 53667 + }, + { + "epoch": 0.05, + "learning_rate": 4.964675008785765e-05, + "loss": 0.5664, + "step": 53668 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646736930043035e-05, + "loss": 0.6133, + "step": 53669 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646723771985127e-05, + "loss": 1.2583, + "step": 53670 + }, + { + "epoch": 0.05, + "learning_rate": 4.96467106136839e-05, + "loss": 1.2892, + "step": 53671 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646697455139383e-05, + "loss": 1.1444, + "step": 53672 + }, + { + "epoch": 0.05, + "learning_rate": 4.964668429635156e-05, + "loss": 1.1628, + "step": 53673 + }, + { + "epoch": 0.05, + "learning_rate": 4.964667113732045e-05, + "loss": 1.1409, + "step": 53674 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646657978046016e-05, + "loss": 1.6138, + "step": 53675 + }, + { + "epoch": 0.05, + "learning_rate": 4.964664481852829e-05, + "loss": 1.5815, + "step": 53676 + }, + { + "epoch": 0.05, + "learning_rate": 4.964663165876726e-05, + "loss": 1.236, + "step": 53677 + }, + { + "epoch": 0.05, + "learning_rate": 4.964661849876293e-05, + "loss": 1.305, + "step": 53678 + }, + { + "epoch": 0.05, + "learning_rate": 4.964660533851531e-05, + "loss": 0.8162, + "step": 53679 + }, + { + "epoch": 0.05, + "learning_rate": 4.964659217802437e-05, + "loss": 1.1754, + "step": 53680 + }, + { + "epoch": 0.05, + "learning_rate": 4.964657901729014e-05, + "loss": 1.6576, + "step": 53681 + }, + { + "epoch": 0.05, + "learning_rate": 4.964656585631261e-05, + "loss": 1.5637, + "step": 53682 + }, + { + "epoch": 0.05, + "learning_rate": 4.964655269509177e-05, + "loss": 1.5125, + "step": 53683 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646539533627646e-05, + "loss": 1.5124, + "step": 53684 + }, + { + "epoch": 0.05, + "learning_rate": 4.964652637192021e-05, + "loss": 1.4581, + "step": 53685 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646513209969475e-05, + "loss": 1.3815, + "step": 53686 + }, + { + "epoch": 0.05, + "learning_rate": 4.964650004777544e-05, + "loss": 1.3752, + "step": 53687 + }, + { + "epoch": 0.05, + "learning_rate": 4.96464868853381e-05, + "loss": 1.4857, + "step": 53688 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646473722657475e-05, + "loss": 1.4073, + "step": 53689 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646460559733544e-05, + "loss": 1.3232, + "step": 53690 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646447396566306e-05, + "loss": 1.2923, + "step": 53691 + }, + { + "epoch": 0.05, + "learning_rate": 4.964643423315578e-05, + "loss": 1.2166, + "step": 53692 + }, + { + "epoch": 0.05, + "learning_rate": 4.964642106950195e-05, + "loss": 1.1668, + "step": 53693 + }, + { + "epoch": 0.05, + "learning_rate": 4.964640790560482e-05, + "loss": 1.4523, + "step": 53694 + }, + { + "epoch": 0.05, + "learning_rate": 4.964639474146439e-05, + "loss": 1.7908, + "step": 53695 + }, + { + "epoch": 0.05, + "learning_rate": 4.964638157708066e-05, + "loss": 1.8521, + "step": 53696 + }, + { + "epoch": 0.05, + "learning_rate": 4.964636841245364e-05, + "loss": 1.2084, + "step": 53697 + }, + { + "epoch": 0.05, + "learning_rate": 4.964635524758331e-05, + "loss": 1.0226, + "step": 53698 + }, + { + "epoch": 0.05, + "learning_rate": 4.964634208246969e-05, + "loss": 1.2601, + "step": 53699 + }, + { + "epoch": 0.05, + "learning_rate": 4.964632891711276e-05, + "loss": 1.1726, + "step": 53700 + }, + { + "epoch": 0.05, + "learning_rate": 4.964631575151255e-05, + "loss": 1.0158, + "step": 53701 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646302585669024e-05, + "loss": 1.102, + "step": 53702 + }, + { + "epoch": 0.05, + "learning_rate": 4.964628941958221e-05, + "loss": 0.9349, + "step": 53703 + }, + { + "epoch": 0.05, + "learning_rate": 4.96462762532521e-05, + "loss": 0.8835, + "step": 53704 + }, + { + "epoch": 0.05, + "learning_rate": 4.964626308667868e-05, + "loss": 0.847, + "step": 53705 + }, + { + "epoch": 0.05, + "learning_rate": 4.964624991986198e-05, + "loss": 0.7605, + "step": 53706 + }, + { + "epoch": 0.05, + "learning_rate": 4.964623675280197e-05, + "loss": 0.852, + "step": 53707 + }, + { + "epoch": 0.05, + "learning_rate": 4.964622358549867e-05, + "loss": 0.6911, + "step": 53708 + }, + { + "epoch": 0.05, + "learning_rate": 4.964621041795206e-05, + "loss": 1.4987, + "step": 53709 + }, + { + "epoch": 0.05, + "learning_rate": 4.964619725016217e-05, + "loss": 1.1238, + "step": 53710 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646184082128974e-05, + "loss": 1.1569, + "step": 53711 + }, + { + "epoch": 0.05, + "learning_rate": 4.964617091385249e-05, + "loss": 1.1149, + "step": 53712 + }, + { + "epoch": 0.05, + "learning_rate": 4.96461577453327e-05, + "loss": 0.7421, + "step": 53713 + }, + { + "epoch": 0.05, + "learning_rate": 4.964614457656961e-05, + "loss": 1.0674, + "step": 53714 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646131407563235e-05, + "loss": 1.4945, + "step": 53715 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646118238313555e-05, + "loss": 1.1999, + "step": 53716 + }, + { + "epoch": 0.05, + "learning_rate": 4.964610506882058e-05, + "loss": 1.246, + "step": 53717 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646091899084315e-05, + "loss": 1.3247, + "step": 53718 + }, + { + "epoch": 0.05, + "learning_rate": 4.964607872910475e-05, + "loss": 1.2337, + "step": 53719 + }, + { + "epoch": 0.05, + "learning_rate": 4.964606555888189e-05, + "loss": 0.9453, + "step": 53720 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646052388415734e-05, + "loss": 1.1312, + "step": 53721 + }, + { + "epoch": 0.05, + "learning_rate": 4.9646039217706284e-05, + "loss": 1.1009, + "step": 53722 + }, + { + "epoch": 0.05, + "learning_rate": 4.964602604675354e-05, + "loss": 1.16, + "step": 53723 + }, + { + "epoch": 0.05, + "learning_rate": 4.964601287555749e-05, + "loss": 0.6375, + "step": 53724 + }, + { + "epoch": 0.05, + "learning_rate": 4.964599970411815e-05, + "loss": 0.8474, + "step": 53725 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645986532435526e-05, + "loss": 1.5428, + "step": 53726 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645973360509594e-05, + "loss": 1.5092, + "step": 53727 + }, + { + "epoch": 0.05, + "learning_rate": 4.964596018834038e-05, + "loss": 1.417, + "step": 53728 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645947015927865e-05, + "loss": 1.26, + "step": 53729 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645933843272055e-05, + "loss": 1.3668, + "step": 53730 + }, + { + "epoch": 0.05, + "learning_rate": 4.964592067037295e-05, + "loss": 1.3591, + "step": 53731 + }, + { + "epoch": 0.05, + "learning_rate": 4.964590749723055e-05, + "loss": 1.2914, + "step": 53732 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645894323844855e-05, + "loss": 1.2059, + "step": 53733 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645881150215865e-05, + "loss": 1.2937, + "step": 53734 + }, + { + "epoch": 0.05, + "learning_rate": 4.964586797634359e-05, + "loss": 1.2522, + "step": 53735 + }, + { + "epoch": 0.05, + "learning_rate": 4.964585480222801e-05, + "loss": 1.2714, + "step": 53736 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645841627869146e-05, + "loss": 1.1234, + "step": 53737 + }, + { + "epoch": 0.05, + "learning_rate": 4.964582845326698e-05, + "loss": 1.2199, + "step": 53738 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645815278421525e-05, + "loss": 1.2759, + "step": 53739 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645802103332785e-05, + "loss": 1.0915, + "step": 53740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645788928000744e-05, + "loss": 1.2433, + "step": 53741 + }, + { + "epoch": 0.05, + "learning_rate": 4.964577575242541e-05, + "loss": 1.2447, + "step": 53742 + }, + { + "epoch": 0.05, + "learning_rate": 4.964576257660678e-05, + "loss": 1.2759, + "step": 53743 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645749400544864e-05, + "loss": 1.2502, + "step": 53744 + }, + { + "epoch": 0.05, + "learning_rate": 4.964573622423966e-05, + "loss": 1.2475, + "step": 53745 + }, + { + "epoch": 0.05, + "learning_rate": 4.964572304769115e-05, + "loss": 1.2174, + "step": 53746 + }, + { + "epoch": 0.05, + "learning_rate": 4.964570987089936e-05, + "loss": 1.1893, + "step": 53747 + }, + { + "epoch": 0.05, + "learning_rate": 4.964569669386427e-05, + "loss": 1.2181, + "step": 53748 + }, + { + "epoch": 0.05, + "learning_rate": 4.964568351658589e-05, + "loss": 1.2078, + "step": 53749 + }, + { + "epoch": 0.05, + "learning_rate": 4.964567033906422e-05, + "loss": 1.1958, + "step": 53750 + }, + { + "epoch": 0.05, + "learning_rate": 4.964565716129925e-05, + "loss": 1.2544, + "step": 53751 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645643983291e-05, + "loss": 1.2431, + "step": 53752 + }, + { + "epoch": 0.05, + "learning_rate": 4.964563080503946e-05, + "loss": 1.1869, + "step": 53753 + }, + { + "epoch": 0.05, + "learning_rate": 4.964561762654462e-05, + "loss": 1.2166, + "step": 53754 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645604447806494e-05, + "loss": 1.1412, + "step": 53755 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645591268825076e-05, + "loss": 1.2292, + "step": 53756 + }, + { + "epoch": 0.05, + "learning_rate": 4.964557808960036e-05, + "loss": 0.8164, + "step": 53757 + }, + { + "epoch": 0.05, + "learning_rate": 4.964556491013236e-05, + "loss": 1.1411, + "step": 53758 + }, + { + "epoch": 0.05, + "learning_rate": 4.964555173042107e-05, + "loss": 0.875, + "step": 53759 + }, + { + "epoch": 0.05, + "learning_rate": 4.964553855046649e-05, + "loss": 0.9388, + "step": 53760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645525370268614e-05, + "loss": 1.074, + "step": 53761 + }, + { + "epoch": 0.05, + "learning_rate": 4.964551218982745e-05, + "loss": 1.1788, + "step": 53762 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645499009143004e-05, + "loss": 1.0452, + "step": 53763 + }, + { + "epoch": 0.05, + "learning_rate": 4.964548582821525e-05, + "loss": 1.171, + "step": 53764 + }, + { + "epoch": 0.05, + "learning_rate": 4.964547264704422e-05, + "loss": 0.9714, + "step": 53765 + }, + { + "epoch": 0.05, + "learning_rate": 4.964545946562991e-05, + "loss": 1.0021, + "step": 53766 + }, + { + "epoch": 0.05, + "learning_rate": 4.964544628397229e-05, + "loss": 1.1416, + "step": 53767 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645433102071395e-05, + "loss": 1.1449, + "step": 53768 + }, + { + "epoch": 0.05, + "learning_rate": 4.96454199199272e-05, + "loss": 1.1113, + "step": 53769 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645406737539716e-05, + "loss": 1.1737, + "step": 53770 + }, + { + "epoch": 0.05, + "learning_rate": 4.964539355490895e-05, + "loss": 1.0182, + "step": 53771 + }, + { + "epoch": 0.05, + "learning_rate": 4.964538037203489e-05, + "loss": 1.0311, + "step": 53772 + }, + { + "epoch": 0.05, + "learning_rate": 4.964536718891755e-05, + "loss": 1.1444, + "step": 53773 + }, + { + "epoch": 0.05, + "learning_rate": 4.964535400555691e-05, + "loss": 1.073, + "step": 53774 + }, + { + "epoch": 0.05, + "learning_rate": 4.964534082195299e-05, + "loss": 1.231, + "step": 53775 + }, + { + "epoch": 0.05, + "learning_rate": 4.964532763810578e-05, + "loss": 0.9005, + "step": 53776 + }, + { + "epoch": 0.05, + "learning_rate": 4.964531445401527e-05, + "loss": 1.0512, + "step": 53777 + }, + { + "epoch": 0.05, + "learning_rate": 4.964530126968149e-05, + "loss": 1.0012, + "step": 53778 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645288085104407e-05, + "loss": 0.987, + "step": 53779 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645274900284045e-05, + "loss": 0.8449, + "step": 53780 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645261715220394e-05, + "loss": 0.8957, + "step": 53781 + }, + { + "epoch": 0.05, + "learning_rate": 4.964524852991345e-05, + "loss": 1.1314, + "step": 53782 + }, + { + "epoch": 0.05, + "learning_rate": 4.964523534436323e-05, + "loss": 1.2063, + "step": 53783 + }, + { + "epoch": 0.05, + "learning_rate": 4.964522215856971e-05, + "loss": 1.2102, + "step": 53784 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645208972532905e-05, + "loss": 1.2218, + "step": 53785 + }, + { + "epoch": 0.05, + "learning_rate": 4.964519578625282e-05, + "loss": 1.2674, + "step": 53786 + }, + { + "epoch": 0.05, + "learning_rate": 4.964518259972945e-05, + "loss": 1.2373, + "step": 53787 + }, + { + "epoch": 0.05, + "learning_rate": 4.964516941296278e-05, + "loss": 1.1518, + "step": 53788 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645156225952825e-05, + "loss": 1.1984, + "step": 53789 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645143038699595e-05, + "loss": 1.0841, + "step": 53790 + }, + { + "epoch": 0.05, + "learning_rate": 4.964512985120307e-05, + "loss": 1.1224, + "step": 53791 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645116663463256e-05, + "loss": 1.1583, + "step": 53792 + }, + { + "epoch": 0.05, + "learning_rate": 4.964510347548017e-05, + "loss": 1.0779, + "step": 53793 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645090287253785e-05, + "loss": 1.1002, + "step": 53794 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645077098784113e-05, + "loss": 1.1071, + "step": 53795 + }, + { + "epoch": 0.05, + "learning_rate": 4.964506391007117e-05, + "loss": 1.0668, + "step": 53796 + }, + { + "epoch": 0.05, + "learning_rate": 4.964505072111492e-05, + "loss": 1.171, + "step": 53797 + }, + { + "epoch": 0.05, + "learning_rate": 4.96450375319154e-05, + "loss": 1.0161, + "step": 53798 + }, + { + "epoch": 0.05, + "learning_rate": 4.964502434247259e-05, + "loss": 1.1038, + "step": 53799 + }, + { + "epoch": 0.05, + "learning_rate": 4.964501115278649e-05, + "loss": 1.1086, + "step": 53800 + }, + { + "epoch": 0.05, + "learning_rate": 4.964499796285711e-05, + "loss": 1.0297, + "step": 53801 + }, + { + "epoch": 0.05, + "learning_rate": 4.964498477268445e-05, + "loss": 1.1192, + "step": 53802 + }, + { + "epoch": 0.05, + "learning_rate": 4.96449715822685e-05, + "loss": 1.007, + "step": 53803 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644958391609266e-05, + "loss": 1.0949, + "step": 53804 + }, + { + "epoch": 0.05, + "learning_rate": 4.964494520070674e-05, + "loss": 1.212, + "step": 53805 + }, + { + "epoch": 0.05, + "learning_rate": 4.964493200956094e-05, + "loss": 1.1708, + "step": 53806 + }, + { + "epoch": 0.05, + "learning_rate": 4.964491881817185e-05, + "loss": 1.1998, + "step": 53807 + }, + { + "epoch": 0.05, + "learning_rate": 4.964490562653947e-05, + "loss": 1.1092, + "step": 53808 + }, + { + "epoch": 0.05, + "learning_rate": 4.964489243466382e-05, + "loss": 1.2021, + "step": 53809 + }, + { + "epoch": 0.05, + "learning_rate": 4.964487924254488e-05, + "loss": 1.2138, + "step": 53810 + }, + { + "epoch": 0.05, + "learning_rate": 4.964486605018265e-05, + "loss": 1.2099, + "step": 53811 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644852857577144e-05, + "loss": 1.2339, + "step": 53812 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644839664728346e-05, + "loss": 1.2403, + "step": 53813 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644826471636274e-05, + "loss": 1.2321, + "step": 53814 + }, + { + "epoch": 0.05, + "learning_rate": 4.964481327830092e-05, + "loss": 1.2562, + "step": 53815 + }, + { + "epoch": 0.05, + "learning_rate": 4.964480008472228e-05, + "loss": 1.194, + "step": 53816 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644786890900354e-05, + "loss": 1.2321, + "step": 53817 + }, + { + "epoch": 0.05, + "learning_rate": 4.964477369683514e-05, + "loss": 1.1841, + "step": 53818 + }, + { + "epoch": 0.05, + "learning_rate": 4.964476050252665e-05, + "loss": 1.2004, + "step": 53819 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644747307974875e-05, + "loss": 1.1949, + "step": 53820 + }, + { + "epoch": 0.05, + "learning_rate": 4.964473411317982e-05, + "loss": 1.2633, + "step": 53821 + }, + { + "epoch": 0.05, + "learning_rate": 4.964472091814148e-05, + "loss": 1.1828, + "step": 53822 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644707722859864e-05, + "loss": 1.1927, + "step": 53823 + }, + { + "epoch": 0.05, + "learning_rate": 4.964469452733496e-05, + "loss": 1.2113, + "step": 53824 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644681331566775e-05, + "loss": 1.2015, + "step": 53825 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644668135555306e-05, + "loss": 1.2308, + "step": 53826 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644654939300555e-05, + "loss": 1.2116, + "step": 53827 + }, + { + "epoch": 0.05, + "learning_rate": 4.964464174280252e-05, + "loss": 1.2007, + "step": 53828 + }, + { + "epoch": 0.05, + "learning_rate": 4.964462854606121e-05, + "loss": 1.2339, + "step": 53829 + }, + { + "epoch": 0.05, + "learning_rate": 4.964461534907662e-05, + "loss": 1.2624, + "step": 53830 + }, + { + "epoch": 0.05, + "learning_rate": 4.964460215184874e-05, + "loss": 1.2797, + "step": 53831 + }, + { + "epoch": 0.05, + "learning_rate": 4.964458895437759e-05, + "loss": 1.1687, + "step": 53832 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644575756663145e-05, + "loss": 1.2374, + "step": 53833 + }, + { + "epoch": 0.05, + "learning_rate": 4.964456255870543e-05, + "loss": 1.1314, + "step": 53834 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644549360504435e-05, + "loss": 1.2012, + "step": 53835 + }, + { + "epoch": 0.05, + "learning_rate": 4.964453616206016e-05, + "loss": 1.1628, + "step": 53836 + }, + { + "epoch": 0.05, + "learning_rate": 4.96445229633726e-05, + "loss": 1.1028, + "step": 53837 + }, + { + "epoch": 0.05, + "learning_rate": 4.964450976444175e-05, + "loss": 1.2326, + "step": 53838 + }, + { + "epoch": 0.05, + "learning_rate": 4.964449656526764e-05, + "loss": 1.2253, + "step": 53839 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644483365850236e-05, + "loss": 1.2103, + "step": 53840 + }, + { + "epoch": 0.05, + "learning_rate": 4.964447016618956e-05, + "loss": 1.1029, + "step": 53841 + }, + { + "epoch": 0.05, + "learning_rate": 4.96444569662856e-05, + "loss": 1.1688, + "step": 53842 + }, + { + "epoch": 0.05, + "learning_rate": 4.964444376613835e-05, + "loss": 1.1077, + "step": 53843 + }, + { + "epoch": 0.05, + "learning_rate": 4.964443056574784e-05, + "loss": 1.1176, + "step": 53844 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644417365114035e-05, + "loss": 1.1019, + "step": 53845 + }, + { + "epoch": 0.05, + "learning_rate": 4.964440416423696e-05, + "loss": 1.1188, + "step": 53846 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644390963116605e-05, + "loss": 1.0891, + "step": 53847 + }, + { + "epoch": 0.05, + "learning_rate": 4.964437776175297e-05, + "loss": 1.1446, + "step": 53848 + }, + { + "epoch": 0.05, + "learning_rate": 4.964436456014605e-05, + "loss": 1.1895, + "step": 53849 + }, + { + "epoch": 0.05, + "learning_rate": 4.964435135829586e-05, + "loss": 1.1657, + "step": 53850 + }, + { + "epoch": 0.05, + "learning_rate": 4.964433815620239e-05, + "loss": 1.0445, + "step": 53851 + }, + { + "epoch": 0.05, + "learning_rate": 4.964432495386563e-05, + "loss": 1.0582, + "step": 53852 + }, + { + "epoch": 0.05, + "learning_rate": 4.96443117512856e-05, + "loss": 1.0769, + "step": 53853 + }, + { + "epoch": 0.05, + "learning_rate": 4.96442985484623e-05, + "loss": 1.1081, + "step": 53854 + }, + { + "epoch": 0.05, + "learning_rate": 4.964428534539571e-05, + "loss": 1.1368, + "step": 53855 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644272142085845e-05, + "loss": 1.1088, + "step": 53856 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644258938532706e-05, + "loss": 1.1353, + "step": 53857 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644245734736285e-05, + "loss": 1.1116, + "step": 53858 + }, + { + "epoch": 0.05, + "learning_rate": 4.964423253069659e-05, + "loss": 1.0412, + "step": 53859 + }, + { + "epoch": 0.05, + "learning_rate": 4.964421932641361e-05, + "loss": 1.0115, + "step": 53860 + }, + { + "epoch": 0.05, + "learning_rate": 4.964420612188736e-05, + "loss": 1.0986, + "step": 53861 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644192917117835e-05, + "loss": 1.1231, + "step": 53862 + }, + { + "epoch": 0.05, + "learning_rate": 4.964417971210502e-05, + "loss": 1.1229, + "step": 53863 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644166506848946e-05, + "loss": 1.1147, + "step": 53864 + }, + { + "epoch": 0.05, + "learning_rate": 4.964415330134958e-05, + "loss": 1.1242, + "step": 53865 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644140095606944e-05, + "loss": 1.1286, + "step": 53866 + }, + { + "epoch": 0.05, + "learning_rate": 4.964412688962103e-05, + "loss": 1.0941, + "step": 53867 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644113683391844e-05, + "loss": 1.1661, + "step": 53868 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644100476919375e-05, + "loss": 1.1328, + "step": 53869 + }, + { + "epoch": 0.05, + "learning_rate": 4.964408727020363e-05, + "loss": 1.177, + "step": 53870 + }, + { + "epoch": 0.05, + "learning_rate": 4.964407406324462e-05, + "loss": 1.1299, + "step": 53871 + }, + { + "epoch": 0.05, + "learning_rate": 4.964406085604232e-05, + "loss": 1.0977, + "step": 53872 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644047648596746e-05, + "loss": 1.1044, + "step": 53873 + }, + { + "epoch": 0.05, + "learning_rate": 4.9644034440907904e-05, + "loss": 1.4144, + "step": 53874 + }, + { + "epoch": 0.05, + "learning_rate": 4.964402123297579e-05, + "loss": 0.6967, + "step": 53875 + }, + { + "epoch": 0.05, + "learning_rate": 4.964400802480039e-05, + "loss": 0.99, + "step": 53876 + }, + { + "epoch": 0.05, + "learning_rate": 4.964399481638171e-05, + "loss": 1.2586, + "step": 53877 + }, + { + "epoch": 0.05, + "learning_rate": 4.964398160771977e-05, + "loss": 1.2022, + "step": 53878 + }, + { + "epoch": 0.05, + "learning_rate": 4.964396839881455e-05, + "loss": 0.6351, + "step": 53879 + }, + { + "epoch": 0.05, + "learning_rate": 4.964395518966605e-05, + "loss": 1.129, + "step": 53880 + }, + { + "epoch": 0.05, + "learning_rate": 4.964394198027428e-05, + "loss": 0.7227, + "step": 53881 + }, + { + "epoch": 0.05, + "learning_rate": 4.964392877063923e-05, + "loss": 0.944, + "step": 53882 + }, + { + "epoch": 0.05, + "learning_rate": 4.964391556076091e-05, + "loss": 1.6016, + "step": 53883 + }, + { + "epoch": 0.05, + "learning_rate": 4.964390235063932e-05, + "loss": 1.5338, + "step": 53884 + }, + { + "epoch": 0.05, + "learning_rate": 4.964388914027445e-05, + "loss": 1.2701, + "step": 53885 + }, + { + "epoch": 0.05, + "learning_rate": 4.96438759296663e-05, + "loss": 0.8623, + "step": 53886 + }, + { + "epoch": 0.05, + "learning_rate": 4.964386271881489e-05, + "loss": 1.0996, + "step": 53887 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643849507720196e-05, + "loss": 1.0355, + "step": 53888 + }, + { + "epoch": 0.05, + "learning_rate": 4.964383629638223e-05, + "loss": 0.8134, + "step": 53889 + }, + { + "epoch": 0.05, + "learning_rate": 4.964382308480099e-05, + "loss": 1.2602, + "step": 53890 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643809872976474e-05, + "loss": 1.3567, + "step": 53891 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643796660908695e-05, + "loss": 1.747, + "step": 53892 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643783448597635e-05, + "loss": 1.0121, + "step": 53893 + }, + { + "epoch": 0.05, + "learning_rate": 4.964377023604331e-05, + "loss": 1.0559, + "step": 53894 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643757023245704e-05, + "loss": 1.1787, + "step": 53895 + }, + { + "epoch": 0.05, + "learning_rate": 4.964374381020483e-05, + "loss": 1.2982, + "step": 53896 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643730596920675e-05, + "loss": 1.284, + "step": 53897 + }, + { + "epoch": 0.05, + "learning_rate": 4.964371738339325e-05, + "loss": 1.0011, + "step": 53898 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643704169622554e-05, + "loss": 1.0428, + "step": 53899 + }, + { + "epoch": 0.05, + "learning_rate": 4.964369095560859e-05, + "loss": 0.7237, + "step": 53900 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643677741351354e-05, + "loss": 1.1944, + "step": 53901 + }, + { + "epoch": 0.05, + "learning_rate": 4.964366452685084e-05, + "loss": 1.3355, + "step": 53902 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643651312107056e-05, + "loss": 1.2254, + "step": 53903 + }, + { + "epoch": 0.05, + "learning_rate": 4.964363809712e-05, + "loss": 1.2851, + "step": 53904 + }, + { + "epoch": 0.05, + "learning_rate": 4.964362488188968e-05, + "loss": 1.2758, + "step": 53905 + }, + { + "epoch": 0.05, + "learning_rate": 4.964361166641608e-05, + "loss": 1.2002, + "step": 53906 + }, + { + "epoch": 0.05, + "learning_rate": 4.964359845069921e-05, + "loss": 1.0318, + "step": 53907 + }, + { + "epoch": 0.05, + "learning_rate": 4.964358523473907e-05, + "loss": 0.7966, + "step": 53908 + }, + { + "epoch": 0.05, + "learning_rate": 4.964357201853565e-05, + "loss": 0.5439, + "step": 53909 + }, + { + "epoch": 0.05, + "learning_rate": 4.964355880208897e-05, + "loss": 0.5151, + "step": 53910 + }, + { + "epoch": 0.05, + "learning_rate": 4.964354558539902e-05, + "loss": 1.3153, + "step": 53911 + }, + { + "epoch": 0.05, + "learning_rate": 4.96435323684658e-05, + "loss": 1.0018, + "step": 53912 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643519151289305e-05, + "loss": 1.2452, + "step": 53913 + }, + { + "epoch": 0.05, + "learning_rate": 4.964350593386954e-05, + "loss": 1.1092, + "step": 53914 + }, + { + "epoch": 0.05, + "learning_rate": 4.964349271620651e-05, + "loss": 1.416, + "step": 53915 + }, + { + "epoch": 0.05, + "learning_rate": 4.96434794983002e-05, + "loss": 1.3117, + "step": 53916 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643466280150625e-05, + "loss": 0.8795, + "step": 53917 + }, + { + "epoch": 0.05, + "learning_rate": 4.964345306175778e-05, + "loss": 1.0044, + "step": 53918 + }, + { + "epoch": 0.05, + "learning_rate": 4.964343984312167e-05, + "loss": 1.3521, + "step": 53919 + }, + { + "epoch": 0.05, + "learning_rate": 4.964342662424228e-05, + "loss": 0.9931, + "step": 53920 + }, + { + "epoch": 0.05, + "learning_rate": 4.964341340511962e-05, + "loss": 1.0882, + "step": 53921 + }, + { + "epoch": 0.05, + "learning_rate": 4.96434001857537e-05, + "loss": 1.3741, + "step": 53922 + }, + { + "epoch": 0.05, + "learning_rate": 4.964338696614451e-05, + "loss": 1.3277, + "step": 53923 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643373746292045e-05, + "loss": 1.1233, + "step": 53924 + }, + { + "epoch": 0.05, + "learning_rate": 4.964336052619631e-05, + "loss": 1.0361, + "step": 53925 + }, + { + "epoch": 0.05, + "learning_rate": 4.964334730585731e-05, + "loss": 1.0865, + "step": 53926 + }, + { + "epoch": 0.05, + "learning_rate": 4.964333408527505e-05, + "loss": 0.1805, + "step": 53927 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643320864449504e-05, + "loss": 0.1625, + "step": 53928 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643307643380706e-05, + "loss": 0.0606, + "step": 53929 + }, + { + "epoch": 0.05, + "learning_rate": 4.964329442206863e-05, + "loss": 0.051, + "step": 53930 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643281200513286e-05, + "loss": 0.0436, + "step": 53931 + }, + { + "epoch": 0.05, + "learning_rate": 4.964326797871468e-05, + "loss": 0.0344, + "step": 53932 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643254756672794e-05, + "loss": 0.1071, + "step": 53933 + }, + { + "epoch": 0.05, + "learning_rate": 4.964324153438765e-05, + "loss": 0.3521, + "step": 53934 + }, + { + "epoch": 0.05, + "learning_rate": 4.964322831185924e-05, + "loss": 0.4284, + "step": 53935 + }, + { + "epoch": 0.05, + "learning_rate": 4.964321508908756e-05, + "loss": 0.533, + "step": 53936 + }, + { + "epoch": 0.05, + "learning_rate": 4.964320186607261e-05, + "loss": 0.4411, + "step": 53937 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643188642814394e-05, + "loss": 0.4249, + "step": 53938 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643175419312904e-05, + "loss": 0.3615, + "step": 53939 + }, + { + "epoch": 0.05, + "learning_rate": 4.964316219556816e-05, + "loss": 0.3791, + "step": 53940 + }, + { + "epoch": 0.05, + "learning_rate": 4.964314897158014e-05, + "loss": 0.4001, + "step": 53941 + }, + { + "epoch": 0.05, + "learning_rate": 4.964313574734886e-05, + "loss": 0.3927, + "step": 53942 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643122522874305e-05, + "loss": 0.2305, + "step": 53943 + }, + { + "epoch": 0.05, + "learning_rate": 4.964310929815649e-05, + "loss": 0.2833, + "step": 53944 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643096073195405e-05, + "loss": 0.3345, + "step": 53945 + }, + { + "epoch": 0.05, + "learning_rate": 4.964308284799105e-05, + "loss": 0.4515, + "step": 53946 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643069622543434e-05, + "loss": 0.3511, + "step": 53947 + }, + { + "epoch": 0.05, + "learning_rate": 4.964305639685255e-05, + "loss": 0.327, + "step": 53948 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643043170918404e-05, + "loss": 0.346, + "step": 53949 + }, + { + "epoch": 0.05, + "learning_rate": 4.9643029944740995e-05, + "loss": 0.3175, + "step": 53950 + }, + { + "epoch": 0.05, + "learning_rate": 4.964301671832031e-05, + "loss": 0.6805, + "step": 53951 + }, + { + "epoch": 0.05, + "learning_rate": 4.964300349165637e-05, + "loss": 0.6152, + "step": 53952 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642990264749165e-05, + "loss": 1.0577, + "step": 53953 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642977037598684e-05, + "loss": 0.688, + "step": 53954 + }, + { + "epoch": 0.05, + "learning_rate": 4.964296381020495e-05, + "loss": 0.5958, + "step": 53955 + }, + { + "epoch": 0.05, + "learning_rate": 4.964295058256794e-05, + "loss": 0.6153, + "step": 53956 + }, + { + "epoch": 0.05, + "learning_rate": 4.964293735468767e-05, + "loss": 0.7644, + "step": 53957 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642924126564135e-05, + "loss": 1.3497, + "step": 53958 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642910898197335e-05, + "loss": 1.112, + "step": 53959 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642897669587274e-05, + "loss": 1.5144, + "step": 53960 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642884440733945e-05, + "loss": 1.3592, + "step": 53961 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642871211637354e-05, + "loss": 1.7695, + "step": 53962 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642857982297496e-05, + "loss": 1.335, + "step": 53963 + }, + { + "epoch": 0.05, + "learning_rate": 4.964284475271437e-05, + "loss": 1.0159, + "step": 53964 + }, + { + "epoch": 0.05, + "learning_rate": 4.964283152288799e-05, + "loss": 0.8385, + "step": 53965 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642818292818335e-05, + "loss": 0.7601, + "step": 53966 + }, + { + "epoch": 0.05, + "learning_rate": 4.964280506250543e-05, + "loss": 0.656, + "step": 53967 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642791831949256e-05, + "loss": 0.9335, + "step": 53968 + }, + { + "epoch": 0.05, + "learning_rate": 4.964277860114982e-05, + "loss": 0.9768, + "step": 53969 + }, + { + "epoch": 0.05, + "learning_rate": 4.964276537010711e-05, + "loss": 0.9773, + "step": 53970 + }, + { + "epoch": 0.05, + "learning_rate": 4.964275213882115e-05, + "loss": 1.6289, + "step": 53971 + }, + { + "epoch": 0.05, + "learning_rate": 4.964273890729193e-05, + "loss": 1.6798, + "step": 53972 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642725675519434e-05, + "loss": 1.5534, + "step": 53973 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642712443503684e-05, + "loss": 1.3942, + "step": 53974 + }, + { + "epoch": 0.05, + "learning_rate": 4.964269921124467e-05, + "loss": 1.1652, + "step": 53975 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642685978742394e-05, + "loss": 1.1445, + "step": 53976 + }, + { + "epoch": 0.05, + "learning_rate": 4.964267274599685e-05, + "loss": 1.1203, + "step": 53977 + }, + { + "epoch": 0.05, + "learning_rate": 4.964265951300805e-05, + "loss": 0.8496, + "step": 53978 + }, + { + "epoch": 0.05, + "learning_rate": 4.964264627977599e-05, + "loss": 1.0156, + "step": 53979 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642633046300666e-05, + "loss": 0.876, + "step": 53980 + }, + { + "epoch": 0.05, + "learning_rate": 4.964261981258208e-05, + "loss": 0.996, + "step": 53981 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642606578620235e-05, + "loss": 0.6997, + "step": 53982 + }, + { + "epoch": 0.05, + "learning_rate": 4.964259334441512e-05, + "loss": 0.7348, + "step": 53983 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642580109966754e-05, + "loss": 0.5917, + "step": 53984 + }, + { + "epoch": 0.05, + "learning_rate": 4.964256687527512e-05, + "loss": 1.1449, + "step": 53985 + }, + { + "epoch": 0.05, + "learning_rate": 4.964255364034022e-05, + "loss": 0.6371, + "step": 53986 + }, + { + "epoch": 0.05, + "learning_rate": 4.964254040516207e-05, + "loss": 0.7489, + "step": 53987 + }, + { + "epoch": 0.05, + "learning_rate": 4.964252716974066e-05, + "loss": 0.6036, + "step": 53988 + }, + { + "epoch": 0.05, + "learning_rate": 4.964251393407598e-05, + "loss": 0.7176, + "step": 53989 + }, + { + "epoch": 0.05, + "learning_rate": 4.964250069816804e-05, + "loss": 0.7779, + "step": 53990 + }, + { + "epoch": 0.05, + "learning_rate": 4.964248746201685e-05, + "loss": 0.8524, + "step": 53991 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642474225622384e-05, + "loss": 0.8996, + "step": 53992 + }, + { + "epoch": 0.05, + "learning_rate": 4.964246098898467e-05, + "loss": 0.6988, + "step": 53993 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642447752103694e-05, + "loss": 0.572, + "step": 53994 + }, + { + "epoch": 0.05, + "learning_rate": 4.964243451497946e-05, + "loss": 0.4776, + "step": 53995 + }, + { + "epoch": 0.05, + "learning_rate": 4.964242127761196e-05, + "loss": 0.7553, + "step": 53996 + }, + { + "epoch": 0.05, + "learning_rate": 4.964240804000121e-05, + "loss": 1.0465, + "step": 53997 + }, + { + "epoch": 0.05, + "learning_rate": 4.964239480214719e-05, + "loss": 1.2774, + "step": 53998 + }, + { + "epoch": 0.05, + "learning_rate": 4.964238156404992e-05, + "loss": 1.539, + "step": 53999 + }, + { + "epoch": 0.05, + "learning_rate": 4.964236832570938e-05, + "loss": 1.5903, + "step": 54000 + }, + { + "epoch": 0.05, + "eval_loss": 1.0967680215835571, + "eval_runtime": 127.9544, + "eval_samples_per_second": 10.824, + "eval_steps_per_second": 5.416, + "step": 54000 + }, + { + "epoch": 0.05, + "learning_rate": 4.964235508712559e-05, + "loss": 1.0501, + "step": 54001 + }, + { + "epoch": 0.05, + "learning_rate": 4.964234184829853e-05, + "loss": 1.1754, + "step": 54002 + }, + { + "epoch": 0.05, + "learning_rate": 4.964232860922823e-05, + "loss": 1.689, + "step": 54003 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642315369914655e-05, + "loss": 1.1196, + "step": 54004 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642302130357825e-05, + "loss": 1.0868, + "step": 54005 + }, + { + "epoch": 0.05, + "learning_rate": 4.964228889055774e-05, + "loss": 1.0159, + "step": 54006 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642275650514394e-05, + "loss": 1.0501, + "step": 54007 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642262410227794e-05, + "loss": 1.1647, + "step": 54008 + }, + { + "epoch": 0.05, + "learning_rate": 4.964224916969793e-05, + "loss": 1.2549, + "step": 54009 + }, + { + "epoch": 0.05, + "learning_rate": 4.964223592892481e-05, + "loss": 1.1494, + "step": 54010 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642222687908434e-05, + "loss": 1.3756, + "step": 54011 + }, + { + "epoch": 0.05, + "learning_rate": 4.96422094466488e-05, + "loss": 1.1749, + "step": 54012 + }, + { + "epoch": 0.05, + "learning_rate": 4.964219620514591e-05, + "loss": 0.923, + "step": 54013 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642182963399765e-05, + "loss": 0.9811, + "step": 54014 + }, + { + "epoch": 0.05, + "learning_rate": 4.964216972141036e-05, + "loss": 1.1156, + "step": 54015 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642156479177696e-05, + "loss": 0.8178, + "step": 54016 + }, + { + "epoch": 0.05, + "learning_rate": 4.964214323670177e-05, + "loss": 1.0367, + "step": 54017 + }, + { + "epoch": 0.05, + "learning_rate": 4.964212999398259e-05, + "loss": 0.9807, + "step": 54018 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642116751020164e-05, + "loss": 1.1115, + "step": 54019 + }, + { + "epoch": 0.05, + "learning_rate": 4.964210350781447e-05, + "loss": 0.7433, + "step": 54020 + }, + { + "epoch": 0.05, + "learning_rate": 4.964209026436552e-05, + "loss": 0.2843, + "step": 54021 + }, + { + "epoch": 0.05, + "learning_rate": 4.964207702067332e-05, + "loss": 0.803, + "step": 54022 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642063776737855e-05, + "loss": 1.21, + "step": 54023 + }, + { + "epoch": 0.05, + "learning_rate": 4.9642050532559145e-05, + "loss": 1.4708, + "step": 54024 + }, + { + "epoch": 0.05, + "learning_rate": 4.964203728813717e-05, + "loss": 1.4619, + "step": 54025 + }, + { + "epoch": 0.05, + "learning_rate": 4.964202404347195e-05, + "loss": 0.9924, + "step": 54026 + }, + { + "epoch": 0.05, + "learning_rate": 4.964201079856346e-05, + "loss": 1.6853, + "step": 54027 + }, + { + "epoch": 0.05, + "learning_rate": 4.964199755341172e-05, + "loss": 1.0608, + "step": 54028 + }, + { + "epoch": 0.05, + "learning_rate": 4.964198430801673e-05, + "loss": 1.207, + "step": 54029 + }, + { + "epoch": 0.05, + "learning_rate": 4.964197106237848e-05, + "loss": 1.1666, + "step": 54030 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641957816496975e-05, + "loss": 0.8857, + "step": 54031 + }, + { + "epoch": 0.05, + "learning_rate": 4.964194457037221e-05, + "loss": 0.9532, + "step": 54032 + }, + { + "epoch": 0.05, + "learning_rate": 4.96419313240042e-05, + "loss": 1.0557, + "step": 54033 + }, + { + "epoch": 0.05, + "learning_rate": 4.964191807739293e-05, + "loss": 1.1217, + "step": 54034 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641904830538404e-05, + "loss": 1.1299, + "step": 54035 + }, + { + "epoch": 0.05, + "learning_rate": 4.964189158344062e-05, + "loss": 1.5318, + "step": 54036 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641878336099587e-05, + "loss": 1.1455, + "step": 54037 + }, + { + "epoch": 0.05, + "learning_rate": 4.96418650885153e-05, + "loss": 1.3287, + "step": 54038 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641851840687765e-05, + "loss": 0.8211, + "step": 54039 + }, + { + "epoch": 0.05, + "learning_rate": 4.964183859261697e-05, + "loss": 1.0983, + "step": 54040 + }, + { + "epoch": 0.05, + "learning_rate": 4.964182534430292e-05, + "loss": 1.2692, + "step": 54041 + }, + { + "epoch": 0.05, + "learning_rate": 4.964181209574562e-05, + "loss": 1.4209, + "step": 54042 + }, + { + "epoch": 0.05, + "learning_rate": 4.964179884694506e-05, + "loss": 1.2166, + "step": 54043 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641785597901254e-05, + "loss": 1.1683, + "step": 54044 + }, + { + "epoch": 0.05, + "learning_rate": 4.964177234861419e-05, + "loss": 1.2068, + "step": 54045 + }, + { + "epoch": 0.05, + "learning_rate": 4.964175909908387e-05, + "loss": 1.0514, + "step": 54046 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641745849310306e-05, + "loss": 1.035, + "step": 54047 + }, + { + "epoch": 0.05, + "learning_rate": 4.964173259929348e-05, + "loss": 0.9463, + "step": 54048 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641719349033405e-05, + "loss": 1.326, + "step": 54049 + }, + { + "epoch": 0.05, + "learning_rate": 4.964170609853008e-05, + "loss": 0.9728, + "step": 54050 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641692847783494e-05, + "loss": 0.9568, + "step": 54051 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641679596793663e-05, + "loss": 0.8532, + "step": 54052 + }, + { + "epoch": 0.05, + "learning_rate": 4.964166634556058e-05, + "loss": 0.9976, + "step": 54053 + }, + { + "epoch": 0.05, + "learning_rate": 4.964165309408424e-05, + "loss": 0.7573, + "step": 54054 + }, + { + "epoch": 0.05, + "learning_rate": 4.964163984236465e-05, + "loss": 0.9779, + "step": 54055 + }, + { + "epoch": 0.05, + "learning_rate": 4.964162659040181e-05, + "loss": 1.357, + "step": 54056 + }, + { + "epoch": 0.05, + "learning_rate": 4.964161333819572e-05, + "loss": 1.047, + "step": 54057 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641600085746374e-05, + "loss": 1.0067, + "step": 54058 + }, + { + "epoch": 0.05, + "learning_rate": 4.964158683305378e-05, + "loss": 0.9555, + "step": 54059 + }, + { + "epoch": 0.05, + "learning_rate": 4.964157358011793e-05, + "loss": 1.4225, + "step": 54060 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641560326938834e-05, + "loss": 0.9615, + "step": 54061 + }, + { + "epoch": 0.05, + "learning_rate": 4.964154707351648e-05, + "loss": 0.4593, + "step": 54062 + }, + { + "epoch": 0.05, + "learning_rate": 4.964153381985088e-05, + "loss": 0.3505, + "step": 54063 + }, + { + "epoch": 0.05, + "learning_rate": 4.964152056594203e-05, + "loss": 0.4813, + "step": 54064 + }, + { + "epoch": 0.05, + "learning_rate": 4.964150731178993e-05, + "loss": 1.1797, + "step": 54065 + }, + { + "epoch": 0.05, + "learning_rate": 4.964149405739458e-05, + "loss": 1.2668, + "step": 54066 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641480802755975e-05, + "loss": 1.0092, + "step": 54067 + }, + { + "epoch": 0.05, + "learning_rate": 4.964146754787412e-05, + "loss": 1.1013, + "step": 54068 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641454292749016e-05, + "loss": 0.3802, + "step": 54069 + }, + { + "epoch": 0.05, + "learning_rate": 4.964144103738066e-05, + "loss": 0.332, + "step": 54070 + }, + { + "epoch": 0.05, + "learning_rate": 4.964142778176906e-05, + "loss": 0.4423, + "step": 54071 + }, + { + "epoch": 0.05, + "learning_rate": 4.964141452591421e-05, + "loss": 0.4122, + "step": 54072 + }, + { + "epoch": 0.05, + "learning_rate": 4.96414012698161e-05, + "loss": 0.3289, + "step": 54073 + }, + { + "epoch": 0.05, + "learning_rate": 4.964138801347475e-05, + "loss": 0.2983, + "step": 54074 + }, + { + "epoch": 0.05, + "learning_rate": 4.964137475689015e-05, + "loss": 0.219, + "step": 54075 + }, + { + "epoch": 0.05, + "learning_rate": 4.96413615000623e-05, + "loss": 0.2719, + "step": 54076 + }, + { + "epoch": 0.05, + "learning_rate": 4.96413482429912e-05, + "loss": 0.2346, + "step": 54077 + }, + { + "epoch": 0.05, + "learning_rate": 4.964133498567685e-05, + "loss": 0.1692, + "step": 54078 + }, + { + "epoch": 0.05, + "learning_rate": 4.964132172811925e-05, + "loss": 0.2337, + "step": 54079 + }, + { + "epoch": 0.05, + "learning_rate": 4.96413084703184e-05, + "loss": 0.2495, + "step": 54080 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641295212274305e-05, + "loss": 0.2117, + "step": 54081 + }, + { + "epoch": 0.05, + "learning_rate": 4.964128195398696e-05, + "loss": 0.734, + "step": 54082 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641268695456375e-05, + "loss": 0.987, + "step": 54083 + }, + { + "epoch": 0.05, + "learning_rate": 4.964125543668253e-05, + "loss": 1.2432, + "step": 54084 + }, + { + "epoch": 0.05, + "learning_rate": 4.964124217766544e-05, + "loss": 1.2284, + "step": 54085 + }, + { + "epoch": 0.05, + "learning_rate": 4.96412289184051e-05, + "loss": 1.0046, + "step": 54086 + }, + { + "epoch": 0.05, + "learning_rate": 4.964121565890152e-05, + "loss": 1.3075, + "step": 54087 + }, + { + "epoch": 0.05, + "learning_rate": 4.964120239915468e-05, + "loss": 1.0227, + "step": 54088 + }, + { + "epoch": 0.05, + "learning_rate": 4.964118913916461e-05, + "loss": 0.9271, + "step": 54089 + }, + { + "epoch": 0.05, + "learning_rate": 4.964117587893128e-05, + "loss": 1.3558, + "step": 54090 + }, + { + "epoch": 0.05, + "learning_rate": 4.964116261845471e-05, + "loss": 1.4228, + "step": 54091 + }, + { + "epoch": 0.05, + "learning_rate": 4.964114935773488e-05, + "loss": 1.4265, + "step": 54092 + }, + { + "epoch": 0.05, + "learning_rate": 4.964113609677182e-05, + "loss": 1.4813, + "step": 54093 + }, + { + "epoch": 0.05, + "learning_rate": 4.96411228355655e-05, + "loss": 1.3662, + "step": 54094 + }, + { + "epoch": 0.05, + "learning_rate": 4.964110957411594e-05, + "loss": 1.1968, + "step": 54095 + }, + { + "epoch": 0.05, + "learning_rate": 4.964109631242313e-05, + "loss": 1.0854, + "step": 54096 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641083050487075e-05, + "loss": 1.1618, + "step": 54097 + }, + { + "epoch": 0.05, + "learning_rate": 4.964106978830777e-05, + "loss": 1.837, + "step": 54098 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641056525885224e-05, + "loss": 1.5109, + "step": 54099 + }, + { + "epoch": 0.05, + "learning_rate": 4.964104326321943e-05, + "loss": 0.9184, + "step": 54100 + }, + { + "epoch": 0.05, + "learning_rate": 4.964103000031039e-05, + "loss": 0.803, + "step": 54101 + }, + { + "epoch": 0.05, + "learning_rate": 4.96410167371581e-05, + "loss": 1.0526, + "step": 54102 + }, + { + "epoch": 0.05, + "learning_rate": 4.964100347376257e-05, + "loss": 0.7178, + "step": 54103 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640990210123796e-05, + "loss": 1.4574, + "step": 54104 + }, + { + "epoch": 0.05, + "learning_rate": 4.964097694624177e-05, + "loss": 1.1918, + "step": 54105 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640963682116507e-05, + "loss": 0.9974, + "step": 54106 + }, + { + "epoch": 0.05, + "learning_rate": 4.964095041774799e-05, + "loss": 1.1121, + "step": 54107 + }, + { + "epoch": 0.05, + "learning_rate": 4.964093715313623e-05, + "loss": 1.1487, + "step": 54108 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640923888281226e-05, + "loss": 1.1209, + "step": 54109 + }, + { + "epoch": 0.05, + "learning_rate": 4.964091062318298e-05, + "loss": 0.8842, + "step": 54110 + }, + { + "epoch": 0.05, + "learning_rate": 4.964089735784149e-05, + "loss": 0.8445, + "step": 54111 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640884092256746e-05, + "loss": 0.8247, + "step": 54112 + }, + { + "epoch": 0.05, + "learning_rate": 4.964087082642877e-05, + "loss": 1.2269, + "step": 54113 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640857560357536e-05, + "loss": 1.2081, + "step": 54114 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640844294043064e-05, + "loss": 0.8117, + "step": 54115 + }, + { + "epoch": 0.05, + "learning_rate": 4.964083102748535e-05, + "loss": 1.2904, + "step": 54116 + }, + { + "epoch": 0.05, + "learning_rate": 4.96408177606844e-05, + "loss": 1.0741, + "step": 54117 + }, + { + "epoch": 0.05, + "learning_rate": 4.964080449364019e-05, + "loss": 1.27, + "step": 54118 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640791226352746e-05, + "loss": 1.1667, + "step": 54119 + }, + { + "epoch": 0.05, + "learning_rate": 4.964077795882206e-05, + "loss": 1.3654, + "step": 54120 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640764691048126e-05, + "loss": 0.7836, + "step": 54121 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640751423030954e-05, + "loss": 1.2215, + "step": 54122 + }, + { + "epoch": 0.05, + "learning_rate": 4.964073815477053e-05, + "loss": 1.0378, + "step": 54123 + }, + { + "epoch": 0.05, + "learning_rate": 4.964072488626686e-05, + "loss": 1.0442, + "step": 54124 + }, + { + "epoch": 0.05, + "learning_rate": 4.964071161751996e-05, + "loss": 1.0791, + "step": 54125 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640698348529814e-05, + "loss": 0.9001, + "step": 54126 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640685079296425e-05, + "loss": 1.1012, + "step": 54127 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640671809819796e-05, + "loss": 1.0498, + "step": 54128 + }, + { + "epoch": 0.05, + "learning_rate": 4.964065854009992e-05, + "loss": 1.2597, + "step": 54129 + }, + { + "epoch": 0.05, + "learning_rate": 4.96406452701368e-05, + "loss": 0.9644, + "step": 54130 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640631999930436e-05, + "loss": 1.1485, + "step": 54131 + }, + { + "epoch": 0.05, + "learning_rate": 4.964061872948084e-05, + "loss": 1.533, + "step": 54132 + }, + { + "epoch": 0.05, + "learning_rate": 4.964060545878799e-05, + "loss": 0.99, + "step": 54133 + }, + { + "epoch": 0.05, + "learning_rate": 4.964059218785191e-05, + "loss": 1.2001, + "step": 54134 + }, + { + "epoch": 0.05, + "learning_rate": 4.964057891667259e-05, + "loss": 1.2635, + "step": 54135 + }, + { + "epoch": 0.05, + "learning_rate": 4.964056564525001e-05, + "loss": 1.0756, + "step": 54136 + }, + { + "epoch": 0.05, + "learning_rate": 4.96405523735842e-05, + "loss": 1.1481, + "step": 54137 + }, + { + "epoch": 0.05, + "learning_rate": 4.964053910167515e-05, + "loss": 1.4355, + "step": 54138 + }, + { + "epoch": 0.05, + "learning_rate": 4.964052582952287e-05, + "loss": 1.0234, + "step": 54139 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640512557127335e-05, + "loss": 1.0088, + "step": 54140 + }, + { + "epoch": 0.05, + "learning_rate": 4.964049928448856e-05, + "loss": 1.0578, + "step": 54141 + }, + { + "epoch": 0.05, + "learning_rate": 4.964048601160655e-05, + "loss": 1.3106, + "step": 54142 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640472738481294e-05, + "loss": 1.3392, + "step": 54143 + }, + { + "epoch": 0.05, + "learning_rate": 4.96404594651128e-05, + "loss": 1.1499, + "step": 54144 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640446191501064e-05, + "loss": 1.2484, + "step": 54145 + }, + { + "epoch": 0.05, + "learning_rate": 4.964043291764609e-05, + "loss": 1.2777, + "step": 54146 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640419643547876e-05, + "loss": 1.1746, + "step": 54147 + }, + { + "epoch": 0.05, + "learning_rate": 4.964040636920642e-05, + "loss": 1.2682, + "step": 54148 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640393094621726e-05, + "loss": 1.3312, + "step": 54149 + }, + { + "epoch": 0.05, + "learning_rate": 4.964037981979379e-05, + "loss": 1.4228, + "step": 54150 + }, + { + "epoch": 0.05, + "learning_rate": 4.964036654472262e-05, + "loss": 1.283, + "step": 54151 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640353269408205e-05, + "loss": 1.2821, + "step": 54152 + }, + { + "epoch": 0.05, + "learning_rate": 4.964033999385056e-05, + "loss": 1.2375, + "step": 54153 + }, + { + "epoch": 0.05, + "learning_rate": 4.964032671804966e-05, + "loss": 1.4562, + "step": 54154 + }, + { + "epoch": 0.05, + "learning_rate": 4.964031344200554e-05, + "loss": 1.277, + "step": 54155 + }, + { + "epoch": 0.05, + "learning_rate": 4.964030016571817e-05, + "loss": 1.2732, + "step": 54156 + }, + { + "epoch": 0.05, + "learning_rate": 4.964028688918756e-05, + "loss": 1.0745, + "step": 54157 + }, + { + "epoch": 0.05, + "learning_rate": 4.964027361241371e-05, + "loss": 1.0933, + "step": 54158 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640260335396624e-05, + "loss": 1.2683, + "step": 54159 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640247058136304e-05, + "loss": 1.3443, + "step": 54160 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640233780632743e-05, + "loss": 1.024, + "step": 54161 + }, + { + "epoch": 0.05, + "learning_rate": 4.964022050288595e-05, + "loss": 0.9574, + "step": 54162 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640207224895906e-05, + "loss": 0.6691, + "step": 54163 + }, + { + "epoch": 0.05, + "learning_rate": 4.964019394666264e-05, + "loss": 1.3888, + "step": 54164 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640180668186126e-05, + "loss": 1.0596, + "step": 54165 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640167389466375e-05, + "loss": 0.9791, + "step": 54166 + }, + { + "epoch": 0.05, + "learning_rate": 4.964015411050339e-05, + "loss": 1.0355, + "step": 54167 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640140831297164e-05, + "loss": 0.9699, + "step": 54168 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640127551847704e-05, + "loss": 1.0856, + "step": 54169 + }, + { + "epoch": 0.05, + "learning_rate": 4.964011427215501e-05, + "loss": 2.0564, + "step": 54170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640100992219076e-05, + "loss": 2.7855, + "step": 54171 + }, + { + "epoch": 0.05, + "learning_rate": 4.96400877120399e-05, + "loss": 1.7189, + "step": 54172 + }, + { + "epoch": 0.05, + "learning_rate": 4.964007443161749e-05, + "loss": 1.2389, + "step": 54173 + }, + { + "epoch": 0.05, + "learning_rate": 4.964006115095185e-05, + "loss": 0.9938, + "step": 54174 + }, + { + "epoch": 0.05, + "learning_rate": 4.964004787004297e-05, + "loss": 1.327, + "step": 54175 + }, + { + "epoch": 0.05, + "learning_rate": 4.9640034588890846e-05, + "loss": 1.4053, + "step": 54176 + }, + { + "epoch": 0.05, + "learning_rate": 4.96400213074955e-05, + "loss": 1.3339, + "step": 54177 + }, + { + "epoch": 0.05, + "learning_rate": 4.964000802585691e-05, + "loss": 1.6081, + "step": 54178 + }, + { + "epoch": 0.05, + "learning_rate": 4.963999474397508e-05, + "loss": 1.2585, + "step": 54179 + }, + { + "epoch": 0.05, + "learning_rate": 4.963998146185003e-05, + "loss": 1.1952, + "step": 54180 + }, + { + "epoch": 0.05, + "learning_rate": 4.963996817948173e-05, + "loss": 1.235, + "step": 54181 + }, + { + "epoch": 0.05, + "learning_rate": 4.96399548968702e-05, + "loss": 1.2333, + "step": 54182 + }, + { + "epoch": 0.05, + "learning_rate": 4.963994161401543e-05, + "loss": 1.4996, + "step": 54183 + }, + { + "epoch": 0.05, + "learning_rate": 4.963992833091743e-05, + "loss": 1.3982, + "step": 54184 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639915047576194e-05, + "loss": 1.2881, + "step": 54185 + }, + { + "epoch": 0.05, + "learning_rate": 4.963990176399172e-05, + "loss": 1.2613, + "step": 54186 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639888480164024e-05, + "loss": 1.0875, + "step": 54187 + }, + { + "epoch": 0.05, + "learning_rate": 4.963987519609308e-05, + "loss": 1.0179, + "step": 54188 + }, + { + "epoch": 0.05, + "learning_rate": 4.96398619117789e-05, + "loss": 1.0072, + "step": 54189 + }, + { + "epoch": 0.05, + "learning_rate": 4.963984862722149e-05, + "loss": 1.1834, + "step": 54190 + }, + { + "epoch": 0.05, + "learning_rate": 4.963983534242085e-05, + "loss": 1.3899, + "step": 54191 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639822057376974e-05, + "loss": 1.2769, + "step": 54192 + }, + { + "epoch": 0.05, + "learning_rate": 4.963980877208986e-05, + "loss": 0.8939, + "step": 54193 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639795486559515e-05, + "loss": 0.9731, + "step": 54194 + }, + { + "epoch": 0.05, + "learning_rate": 4.963978220078593e-05, + "loss": 1.0734, + "step": 54195 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639768914769126e-05, + "loss": 1.1048, + "step": 54196 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639755628509074e-05, + "loss": 1.1974, + "step": 54197 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639742342005795e-05, + "loss": 1.1919, + "step": 54198 + }, + { + "epoch": 0.05, + "learning_rate": 4.963972905525929e-05, + "loss": 1.6035, + "step": 54199 + }, + { + "epoch": 0.05, + "learning_rate": 4.963971576826954e-05, + "loss": 0.8792, + "step": 54200 + }, + { + "epoch": 0.05, + "learning_rate": 4.963970248103656e-05, + "loss": 1.4016, + "step": 54201 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639689193560346e-05, + "loss": 1.3484, + "step": 54202 + }, + { + "epoch": 0.05, + "learning_rate": 4.96396759058409e-05, + "loss": 1.5533, + "step": 54203 + }, + { + "epoch": 0.05, + "learning_rate": 4.963966261787823e-05, + "loss": 1.4211, + "step": 54204 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639649329672324e-05, + "loss": 0.9963, + "step": 54205 + }, + { + "epoch": 0.05, + "learning_rate": 4.963963604122318e-05, + "loss": 1.4193, + "step": 54206 + }, + { + "epoch": 0.05, + "learning_rate": 4.963962275253081e-05, + "loss": 1.1808, + "step": 54207 + }, + { + "epoch": 0.05, + "learning_rate": 4.96396094635952e-05, + "loss": 1.2963, + "step": 54208 + }, + { + "epoch": 0.05, + "learning_rate": 4.963959617441636e-05, + "loss": 1.3271, + "step": 54209 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639582884994296e-05, + "loss": 1.0698, + "step": 54210 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639569595328996e-05, + "loss": 1.3498, + "step": 54211 + }, + { + "epoch": 0.05, + "learning_rate": 4.963955630542047e-05, + "loss": 1.6977, + "step": 54212 + }, + { + "epoch": 0.05, + "learning_rate": 4.96395430152687e-05, + "loss": 1.2324, + "step": 54213 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639529724873704e-05, + "loss": 0.978, + "step": 54214 + }, + { + "epoch": 0.05, + "learning_rate": 4.963951643423548e-05, + "loss": 1.2695, + "step": 54215 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639503143354024e-05, + "loss": 1.0723, + "step": 54216 + }, + { + "epoch": 0.05, + "learning_rate": 4.963948985222934e-05, + "loss": 0.7779, + "step": 54217 + }, + { + "epoch": 0.05, + "learning_rate": 4.963947656086142e-05, + "loss": 1.0439, + "step": 54218 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639463269250276e-05, + "loss": 1.2308, + "step": 54219 + }, + { + "epoch": 0.05, + "learning_rate": 4.96394499773959e-05, + "loss": 1.6489, + "step": 54220 + }, + { + "epoch": 0.05, + "learning_rate": 4.963943668529829e-05, + "loss": 1.7933, + "step": 54221 + }, + { + "epoch": 0.05, + "learning_rate": 4.963942339295745e-05, + "loss": 1.8165, + "step": 54222 + }, + { + "epoch": 0.05, + "learning_rate": 4.963941010037338e-05, + "loss": 1.7205, + "step": 54223 + }, + { + "epoch": 0.05, + "learning_rate": 4.963939680754608e-05, + "loss": 1.5144, + "step": 54224 + }, + { + "epoch": 0.05, + "learning_rate": 4.963938351447555e-05, + "loss": 1.3167, + "step": 54225 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639370221161796e-05, + "loss": 1.3155, + "step": 54226 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639356927604806e-05, + "loss": 1.6586, + "step": 54227 + }, + { + "epoch": 0.05, + "learning_rate": 4.963934363380459e-05, + "loss": 1.3782, + "step": 54228 + }, + { + "epoch": 0.05, + "learning_rate": 4.963933033976115e-05, + "loss": 1.5057, + "step": 54229 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639317045474465e-05, + "loss": 1.3073, + "step": 54230 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639303750944567e-05, + "loss": 1.4427, + "step": 54231 + }, + { + "epoch": 0.05, + "learning_rate": 4.963929045617143e-05, + "loss": 1.446, + "step": 54232 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639277161155074e-05, + "loss": 1.0795, + "step": 54233 + }, + { + "epoch": 0.05, + "learning_rate": 4.963926386589548e-05, + "loss": 1.0663, + "step": 54234 + }, + { + "epoch": 0.05, + "learning_rate": 4.963925057039266e-05, + "loss": 1.3753, + "step": 54235 + }, + { + "epoch": 0.05, + "learning_rate": 4.963923727464661e-05, + "loss": 1.2026, + "step": 54236 + }, + { + "epoch": 0.05, + "learning_rate": 4.963922397865734e-05, + "loss": 1.2611, + "step": 54237 + }, + { + "epoch": 0.05, + "learning_rate": 4.963921068242484e-05, + "loss": 1.2413, + "step": 54238 + }, + { + "epoch": 0.05, + "learning_rate": 4.96391973859491e-05, + "loss": 1.2304, + "step": 54239 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639184089230144e-05, + "loss": 1.1767, + "step": 54240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639170792267966e-05, + "loss": 1.1084, + "step": 54241 + }, + { + "epoch": 0.05, + "learning_rate": 4.963915749506255e-05, + "loss": 1.1078, + "step": 54242 + }, + { + "epoch": 0.05, + "learning_rate": 4.963914419761391e-05, + "loss": 0.9548, + "step": 54243 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639130899922036e-05, + "loss": 1.2094, + "step": 54244 + }, + { + "epoch": 0.05, + "learning_rate": 4.963911760198694e-05, + "loss": 0.6417, + "step": 54245 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639104303808615e-05, + "loss": 0.9386, + "step": 54246 + }, + { + "epoch": 0.05, + "learning_rate": 4.963909100538707e-05, + "loss": 1.295, + "step": 54247 + }, + { + "epoch": 0.05, + "learning_rate": 4.963907770672229e-05, + "loss": 1.3853, + "step": 54248 + }, + { + "epoch": 0.05, + "learning_rate": 4.963906440781429e-05, + "loss": 1.1159, + "step": 54249 + }, + { + "epoch": 0.05, + "learning_rate": 4.963905110866306e-05, + "loss": 1.253, + "step": 54250 + }, + { + "epoch": 0.05, + "learning_rate": 4.9639037809268605e-05, + "loss": 0.9892, + "step": 54251 + }, + { + "epoch": 0.05, + "learning_rate": 4.963902450963092e-05, + "loss": 1.722, + "step": 54252 + }, + { + "epoch": 0.05, + "learning_rate": 4.963901120975002e-05, + "loss": 1.2816, + "step": 54253 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638997909625886e-05, + "loss": 1.0474, + "step": 54254 + }, + { + "epoch": 0.05, + "learning_rate": 4.963898460925852e-05, + "loss": 1.2999, + "step": 54255 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638971308647934e-05, + "loss": 0.7926, + "step": 54256 + }, + { + "epoch": 0.05, + "learning_rate": 4.963895800779412e-05, + "loss": 0.9779, + "step": 54257 + }, + { + "epoch": 0.05, + "learning_rate": 4.963894470669709e-05, + "loss": 1.2794, + "step": 54258 + }, + { + "epoch": 0.05, + "learning_rate": 4.963893140535683e-05, + "loss": 1.2367, + "step": 54259 + }, + { + "epoch": 0.05, + "learning_rate": 4.963891810377333e-05, + "loss": 0.3723, + "step": 54260 + }, + { + "epoch": 0.05, + "learning_rate": 4.963890480194663e-05, + "loss": 1.0484, + "step": 54261 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638891499876696e-05, + "loss": 0.8737, + "step": 54262 + }, + { + "epoch": 0.05, + "learning_rate": 4.963887819756353e-05, + "loss": 0.8369, + "step": 54263 + }, + { + "epoch": 0.05, + "learning_rate": 4.963886489500714e-05, + "loss": 1.0927, + "step": 54264 + }, + { + "epoch": 0.05, + "learning_rate": 4.963885159220753e-05, + "loss": 1.357, + "step": 54265 + }, + { + "epoch": 0.05, + "learning_rate": 4.96388382891647e-05, + "loss": 0.8601, + "step": 54266 + }, + { + "epoch": 0.05, + "learning_rate": 4.963882498587864e-05, + "loss": 1.1255, + "step": 54267 + }, + { + "epoch": 0.05, + "learning_rate": 4.963881168234936e-05, + "loss": 1.1677, + "step": 54268 + }, + { + "epoch": 0.05, + "learning_rate": 4.963879837857685e-05, + "loss": 1.2653, + "step": 54269 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638785074561114e-05, + "loss": 1.1587, + "step": 54270 + }, + { + "epoch": 0.05, + "learning_rate": 4.963877177030216e-05, + "loss": 1.047, + "step": 54271 + }, + { + "epoch": 0.05, + "learning_rate": 4.963875846579998e-05, + "loss": 1.1641, + "step": 54272 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638745161054586e-05, + "loss": 1.296, + "step": 54273 + }, + { + "epoch": 0.05, + "learning_rate": 4.963873185606596e-05, + "loss": 1.1147, + "step": 54274 + }, + { + "epoch": 0.05, + "learning_rate": 4.963871855083411e-05, + "loss": 1.0323, + "step": 54275 + }, + { + "epoch": 0.05, + "learning_rate": 4.963870524535904e-05, + "loss": 1.3664, + "step": 54276 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638691939640745e-05, + "loss": 0.7657, + "step": 54277 + }, + { + "epoch": 0.05, + "learning_rate": 4.963867863367923e-05, + "loss": 0.823, + "step": 54278 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638665327474485e-05, + "loss": 0.9039, + "step": 54279 + }, + { + "epoch": 0.05, + "learning_rate": 4.963865202102652e-05, + "loss": 1.362, + "step": 54280 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638638714335336e-05, + "loss": 1.2463, + "step": 54281 + }, + { + "epoch": 0.05, + "learning_rate": 4.963862540740094e-05, + "loss": 1.2467, + "step": 54282 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638612100223305e-05, + "loss": 0.9227, + "step": 54283 + }, + { + "epoch": 0.05, + "learning_rate": 4.963859879280245e-05, + "loss": 1.5832, + "step": 54284 + }, + { + "epoch": 0.05, + "learning_rate": 4.963858548513838e-05, + "loss": 1.4311, + "step": 54285 + }, + { + "epoch": 0.05, + "learning_rate": 4.963857217723109e-05, + "loss": 0.7302, + "step": 54286 + }, + { + "epoch": 0.05, + "learning_rate": 4.963855886908057e-05, + "loss": 0.4076, + "step": 54287 + }, + { + "epoch": 0.05, + "learning_rate": 4.963854556068683e-05, + "loss": 0.3922, + "step": 54288 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638532252049876e-05, + "loss": 0.5158, + "step": 54289 + }, + { + "epoch": 0.05, + "learning_rate": 4.96385189431697e-05, + "loss": 0.4914, + "step": 54290 + }, + { + "epoch": 0.05, + "learning_rate": 4.963850563404629e-05, + "loss": 0.3283, + "step": 54291 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638492324679676e-05, + "loss": 0.2418, + "step": 54292 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638479015069826e-05, + "loss": 0.3109, + "step": 54293 + }, + { + "epoch": 0.05, + "learning_rate": 4.963846570521676e-05, + "loss": 0.3221, + "step": 54294 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638452395120485e-05, + "loss": 0.5523, + "step": 54295 + }, + { + "epoch": 0.05, + "learning_rate": 4.963843908478097e-05, + "loss": 1.2886, + "step": 54296 + }, + { + "epoch": 0.05, + "learning_rate": 4.963842577419825e-05, + "loss": 1.367, + "step": 54297 + }, + { + "epoch": 0.05, + "learning_rate": 4.963841246337231e-05, + "loss": 0.7704, + "step": 54298 + }, + { + "epoch": 0.05, + "learning_rate": 4.963839915230314e-05, + "loss": 1.2632, + "step": 54299 + }, + { + "epoch": 0.05, + "learning_rate": 4.963838584099075e-05, + "loss": 1.1509, + "step": 54300 + }, + { + "epoch": 0.05, + "learning_rate": 4.963837252943515e-05, + "loss": 0.825, + "step": 54301 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638359217636324e-05, + "loss": 1.3007, + "step": 54302 + }, + { + "epoch": 0.05, + "learning_rate": 4.963834590559428e-05, + "loss": 0.9345, + "step": 54303 + }, + { + "epoch": 0.05, + "learning_rate": 4.963833259330902e-05, + "loss": 0.9701, + "step": 54304 + }, + { + "epoch": 0.05, + "learning_rate": 4.963831928078054e-05, + "loss": 0.8983, + "step": 54305 + }, + { + "epoch": 0.05, + "learning_rate": 4.963830596800884e-05, + "loss": 1.4358, + "step": 54306 + }, + { + "epoch": 0.05, + "learning_rate": 4.963829265499391e-05, + "loss": 0.9022, + "step": 54307 + }, + { + "epoch": 0.05, + "learning_rate": 4.963827934173577e-05, + "loss": 0.9832, + "step": 54308 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638266028234416e-05, + "loss": 1.1354, + "step": 54309 + }, + { + "epoch": 0.05, + "learning_rate": 4.963825271448984e-05, + "loss": 1.1825, + "step": 54310 + }, + { + "epoch": 0.05, + "learning_rate": 4.963823940050205e-05, + "loss": 1.4718, + "step": 54311 + }, + { + "epoch": 0.05, + "learning_rate": 4.963822608627103e-05, + "loss": 1.1283, + "step": 54312 + }, + { + "epoch": 0.05, + "learning_rate": 4.96382127717968e-05, + "loss": 1.1462, + "step": 54313 + }, + { + "epoch": 0.05, + "learning_rate": 4.963819945707935e-05, + "loss": 1.4296, + "step": 54314 + }, + { + "epoch": 0.05, + "learning_rate": 4.963818614211868e-05, + "loss": 1.015, + "step": 54315 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638172826914795e-05, + "loss": 1.2381, + "step": 54316 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638159511467696e-05, + "loss": 1.061, + "step": 54317 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638146195777375e-05, + "loss": 1.1064, + "step": 54318 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638132879843835e-05, + "loss": 0.5213, + "step": 54319 + }, + { + "epoch": 0.05, + "learning_rate": 4.963811956366708e-05, + "loss": 0.8533, + "step": 54320 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638106247247106e-05, + "loss": 1.826, + "step": 54321 + }, + { + "epoch": 0.05, + "learning_rate": 4.963809293058392e-05, + "loss": 1.0495, + "step": 54322 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638079613677516e-05, + "loss": 0.4508, + "step": 54323 + }, + { + "epoch": 0.05, + "learning_rate": 4.963806629652789e-05, + "loss": 0.5153, + "step": 54324 + }, + { + "epoch": 0.05, + "learning_rate": 4.963805297913505e-05, + "loss": 0.4937, + "step": 54325 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638039661498994e-05, + "loss": 0.3748, + "step": 54326 + }, + { + "epoch": 0.05, + "learning_rate": 4.963802634361972e-05, + "loss": 0.8692, + "step": 54327 + }, + { + "epoch": 0.05, + "learning_rate": 4.9638013025497234e-05, + "loss": 1.3576, + "step": 54328 + }, + { + "epoch": 0.05, + "learning_rate": 4.963799970713153e-05, + "loss": 1.0085, + "step": 54329 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637986388522605e-05, + "loss": 1.3587, + "step": 54330 + }, + { + "epoch": 0.05, + "learning_rate": 4.963797306967047e-05, + "loss": 1.1544, + "step": 54331 + }, + { + "epoch": 0.05, + "learning_rate": 4.963795975057512e-05, + "loss": 1.3092, + "step": 54332 + }, + { + "epoch": 0.05, + "learning_rate": 4.963794643123655e-05, + "loss": 0.6714, + "step": 54333 + }, + { + "epoch": 0.05, + "learning_rate": 4.963793311165476e-05, + "loss": 0.2636, + "step": 54334 + }, + { + "epoch": 0.05, + "learning_rate": 4.963791979182977e-05, + "loss": 0.2708, + "step": 54335 + }, + { + "epoch": 0.05, + "learning_rate": 4.963790647176155e-05, + "loss": 0.2739, + "step": 54336 + }, + { + "epoch": 0.05, + "learning_rate": 4.963789315145012e-05, + "loss": 0.1915, + "step": 54337 + }, + { + "epoch": 0.05, + "learning_rate": 4.963787983089548e-05, + "loss": 0.3925, + "step": 54338 + }, + { + "epoch": 0.05, + "learning_rate": 4.963786651009762e-05, + "loss": 0.8357, + "step": 54339 + }, + { + "epoch": 0.05, + "learning_rate": 4.963785318905655e-05, + "loss": 1.4369, + "step": 54340 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637839867772265e-05, + "loss": 0.9639, + "step": 54341 + }, + { + "epoch": 0.05, + "learning_rate": 4.963782654624476e-05, + "loss": 1.0491, + "step": 54342 + }, + { + "epoch": 0.05, + "learning_rate": 4.963781322447405e-05, + "loss": 0.7748, + "step": 54343 + }, + { + "epoch": 0.05, + "learning_rate": 4.963779990246011e-05, + "loss": 1.0329, + "step": 54344 + }, + { + "epoch": 0.05, + "learning_rate": 4.963778658020297e-05, + "loss": 1.4971, + "step": 54345 + }, + { + "epoch": 0.05, + "learning_rate": 4.963777325770261e-05, + "loss": 0.952, + "step": 54346 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637759934959035e-05, + "loss": 1.3532, + "step": 54347 + }, + { + "epoch": 0.05, + "learning_rate": 4.963774661197226e-05, + "loss": 1.2088, + "step": 54348 + }, + { + "epoch": 0.05, + "learning_rate": 4.963773328874226e-05, + "loss": 0.8871, + "step": 54349 + }, + { + "epoch": 0.05, + "learning_rate": 4.963771996526905e-05, + "loss": 0.9345, + "step": 54350 + }, + { + "epoch": 0.05, + "learning_rate": 4.963770664155262e-05, + "loss": 1.0902, + "step": 54351 + }, + { + "epoch": 0.05, + "learning_rate": 4.963769331759298e-05, + "loss": 1.1069, + "step": 54352 + }, + { + "epoch": 0.05, + "learning_rate": 4.963767999339013e-05, + "loss": 1.1407, + "step": 54353 + }, + { + "epoch": 0.05, + "learning_rate": 4.963766666894407e-05, + "loss": 1.1265, + "step": 54354 + }, + { + "epoch": 0.05, + "learning_rate": 4.963765334425479e-05, + "loss": 1.0469, + "step": 54355 + }, + { + "epoch": 0.05, + "learning_rate": 4.96376400193223e-05, + "loss": 1.0938, + "step": 54356 + }, + { + "epoch": 0.05, + "learning_rate": 4.96376266941466e-05, + "loss": 1.2538, + "step": 54357 + }, + { + "epoch": 0.05, + "learning_rate": 4.963761336872769e-05, + "loss": 1.2313, + "step": 54358 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637600043065554e-05, + "loss": 1.3587, + "step": 54359 + }, + { + "epoch": 0.05, + "learning_rate": 4.963758671716022e-05, + "loss": 1.1262, + "step": 54360 + }, + { + "epoch": 0.05, + "learning_rate": 4.963757339101167e-05, + "loss": 1.0776, + "step": 54361 + }, + { + "epoch": 0.05, + "learning_rate": 4.963756006461991e-05, + "loss": 1.0759, + "step": 54362 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637546737984936e-05, + "loss": 1.1766, + "step": 54363 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637533411106754e-05, + "loss": 1.3846, + "step": 54364 + }, + { + "epoch": 0.05, + "learning_rate": 4.963752008398536e-05, + "loss": 1.0446, + "step": 54365 + }, + { + "epoch": 0.05, + "learning_rate": 4.963750675662074e-05, + "loss": 1.3644, + "step": 54366 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637493429012926e-05, + "loss": 1.1137, + "step": 54367 + }, + { + "epoch": 0.05, + "learning_rate": 4.96374801011619e-05, + "loss": 1.3482, + "step": 54368 + }, + { + "epoch": 0.05, + "learning_rate": 4.963746677306766e-05, + "loss": 1.0884, + "step": 54369 + }, + { + "epoch": 0.05, + "learning_rate": 4.963745344473021e-05, + "loss": 0.8703, + "step": 54370 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637440116149545e-05, + "loss": 0.7904, + "step": 54371 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637426787325674e-05, + "loss": 0.7872, + "step": 54372 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637413458258596e-05, + "loss": 1.1209, + "step": 54373 + }, + { + "epoch": 0.05, + "learning_rate": 4.96374001289483e-05, + "loss": 1.0124, + "step": 54374 + }, + { + "epoch": 0.05, + "learning_rate": 4.96373867993948e-05, + "loss": 1.1981, + "step": 54375 + }, + { + "epoch": 0.05, + "learning_rate": 4.963737346959809e-05, + "loss": 1.0125, + "step": 54376 + }, + { + "epoch": 0.05, + "learning_rate": 4.963736013955816e-05, + "loss": 0.6444, + "step": 54377 + }, + { + "epoch": 0.05, + "learning_rate": 4.963734680927503e-05, + "loss": 0.905, + "step": 54378 + }, + { + "epoch": 0.05, + "learning_rate": 4.963733347874869e-05, + "loss": 0.5086, + "step": 54379 + }, + { + "epoch": 0.05, + "learning_rate": 4.963732014797914e-05, + "loss": 1.8386, + "step": 54380 + }, + { + "epoch": 0.05, + "learning_rate": 4.963730681696638e-05, + "loss": 1.506, + "step": 54381 + }, + { + "epoch": 0.05, + "learning_rate": 4.963729348571041e-05, + "loss": 1.1771, + "step": 54382 + }, + { + "epoch": 0.05, + "learning_rate": 4.963728015421124e-05, + "loss": 1.1098, + "step": 54383 + }, + { + "epoch": 0.05, + "learning_rate": 4.963726682246885e-05, + "loss": 1.2093, + "step": 54384 + }, + { + "epoch": 0.05, + "learning_rate": 4.963725349048325e-05, + "loss": 1.1159, + "step": 54385 + }, + { + "epoch": 0.05, + "learning_rate": 4.963724015825445e-05, + "loss": 1.1846, + "step": 54386 + }, + { + "epoch": 0.05, + "learning_rate": 4.963722682578243e-05, + "loss": 1.2476, + "step": 54387 + }, + { + "epoch": 0.05, + "learning_rate": 4.963721349306721e-05, + "loss": 1.2608, + "step": 54388 + }, + { + "epoch": 0.05, + "learning_rate": 4.963720016010879e-05, + "loss": 0.8847, + "step": 54389 + }, + { + "epoch": 0.05, + "learning_rate": 4.963718682690715e-05, + "loss": 0.8976, + "step": 54390 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637173493462306e-05, + "loss": 0.6746, + "step": 54391 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637160159774256e-05, + "loss": 1.2624, + "step": 54392 + }, + { + "epoch": 0.05, + "learning_rate": 4.963714682584299e-05, + "loss": 1.0062, + "step": 54393 + }, + { + "epoch": 0.05, + "learning_rate": 4.963713349166852e-05, + "loss": 0.8541, + "step": 54394 + }, + { + "epoch": 0.05, + "learning_rate": 4.963712015725085e-05, + "loss": 0.9602, + "step": 54395 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637106822589964e-05, + "loss": 0.6805, + "step": 54396 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637093487685874e-05, + "loss": 0.2923, + "step": 54397 + }, + { + "epoch": 0.05, + "learning_rate": 4.963708015253858e-05, + "loss": 0.2915, + "step": 54398 + }, + { + "epoch": 0.05, + "learning_rate": 4.963706681714807e-05, + "loss": 0.4189, + "step": 54399 + }, + { + "epoch": 0.05, + "learning_rate": 4.963705348151436e-05, + "loss": 0.9449, + "step": 54400 + }, + { + "epoch": 0.05, + "learning_rate": 4.963704014563745e-05, + "loss": 1.0408, + "step": 54401 + }, + { + "epoch": 0.05, + "learning_rate": 4.963702680951732e-05, + "loss": 1.2513, + "step": 54402 + }, + { + "epoch": 0.05, + "learning_rate": 4.9637013473153995e-05, + "loss": 1.1792, + "step": 54403 + }, + { + "epoch": 0.05, + "learning_rate": 4.963700013654746e-05, + "loss": 1.1583, + "step": 54404 + }, + { + "epoch": 0.05, + "learning_rate": 4.963698679969772e-05, + "loss": 1.3136, + "step": 54405 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636973462604766e-05, + "loss": 1.4212, + "step": 54406 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636960125268614e-05, + "loss": 1.3134, + "step": 54407 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636946787689255e-05, + "loss": 1.274, + "step": 54408 + }, + { + "epoch": 0.05, + "learning_rate": 4.963693344986668e-05, + "loss": 1.1698, + "step": 54409 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636920111800916e-05, + "loss": 1.0814, + "step": 54410 + }, + { + "epoch": 0.05, + "learning_rate": 4.963690677349194e-05, + "loss": 1.0357, + "step": 54411 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636893434939756e-05, + "loss": 1.146, + "step": 54412 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636880096144376e-05, + "loss": 0.8801, + "step": 54413 + }, + { + "epoch": 0.05, + "learning_rate": 4.963686675710578e-05, + "loss": 0.9859, + "step": 54414 + }, + { + "epoch": 0.05, + "learning_rate": 4.963685341782399e-05, + "loss": 1.3257, + "step": 54415 + }, + { + "epoch": 0.05, + "learning_rate": 4.963684007829898e-05, + "loss": 1.2091, + "step": 54416 + }, + { + "epoch": 0.05, + "learning_rate": 4.963682673853078e-05, + "loss": 0.9792, + "step": 54417 + }, + { + "epoch": 0.05, + "learning_rate": 4.963681339851938e-05, + "loss": 1.3335, + "step": 54418 + }, + { + "epoch": 0.05, + "learning_rate": 4.963680005826476e-05, + "loss": 1.2134, + "step": 54419 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636786717766945e-05, + "loss": 1.0808, + "step": 54420 + }, + { + "epoch": 0.05, + "learning_rate": 4.963677337702592e-05, + "loss": 1.1688, + "step": 54421 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636760036041695e-05, + "loss": 1.2513, + "step": 54422 + }, + { + "epoch": 0.05, + "learning_rate": 4.963674669481426e-05, + "loss": 1.0959, + "step": 54423 + }, + { + "epoch": 0.05, + "learning_rate": 4.963673335334363e-05, + "loss": 1.0746, + "step": 54424 + }, + { + "epoch": 0.05, + "learning_rate": 4.96367200116298e-05, + "loss": 1.3038, + "step": 54425 + }, + { + "epoch": 0.05, + "learning_rate": 4.963670666967276e-05, + "loss": 0.7539, + "step": 54426 + }, + { + "epoch": 0.05, + "learning_rate": 4.963669332747252e-05, + "loss": 0.7633, + "step": 54427 + }, + { + "epoch": 0.05, + "learning_rate": 4.963667998502907e-05, + "loss": 0.7228, + "step": 54428 + }, + { + "epoch": 0.05, + "learning_rate": 4.963666664234242e-05, + "loss": 0.5062, + "step": 54429 + }, + { + "epoch": 0.05, + "learning_rate": 4.963665329941257e-05, + "loss": 0.2385, + "step": 54430 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636639956239514e-05, + "loss": 0.2227, + "step": 54431 + }, + { + "epoch": 0.05, + "learning_rate": 4.963662661282326e-05, + "loss": 0.4226, + "step": 54432 + }, + { + "epoch": 0.05, + "learning_rate": 4.96366132691638e-05, + "loss": 0.9469, + "step": 54433 + }, + { + "epoch": 0.05, + "learning_rate": 4.963659992526114e-05, + "loss": 1.2225, + "step": 54434 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636586581115275e-05, + "loss": 1.2408, + "step": 54435 + }, + { + "epoch": 0.05, + "learning_rate": 4.963657323672621e-05, + "loss": 1.0544, + "step": 54436 + }, + { + "epoch": 0.05, + "learning_rate": 4.963655989209394e-05, + "loss": 0.9299, + "step": 54437 + }, + { + "epoch": 0.05, + "learning_rate": 4.963654654721848e-05, + "loss": 1.063, + "step": 54438 + }, + { + "epoch": 0.05, + "learning_rate": 4.96365332020998e-05, + "loss": 1.076, + "step": 54439 + }, + { + "epoch": 0.05, + "learning_rate": 4.963651985673793e-05, + "loss": 1.2428, + "step": 54440 + }, + { + "epoch": 0.05, + "learning_rate": 4.963650651113286e-05, + "loss": 1.4062, + "step": 54441 + }, + { + "epoch": 0.05, + "learning_rate": 4.963649316528458e-05, + "loss": 1.0335, + "step": 54442 + }, + { + "epoch": 0.05, + "learning_rate": 4.96364798191931e-05, + "loss": 1.6265, + "step": 54443 + }, + { + "epoch": 0.05, + "learning_rate": 4.963646647285843e-05, + "loss": 1.398, + "step": 54444 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636453126280554e-05, + "loss": 1.0991, + "step": 54445 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636439779459476e-05, + "loss": 1.2186, + "step": 54446 + }, + { + "epoch": 0.05, + "learning_rate": 4.96364264323952e-05, + "loss": 1.2667, + "step": 54447 + }, + { + "epoch": 0.05, + "learning_rate": 4.963641308508772e-05, + "loss": 1.0739, + "step": 54448 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636399737537034e-05, + "loss": 0.8082, + "step": 54449 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636386389743156e-05, + "loss": 1.1869, + "step": 54450 + }, + { + "epoch": 0.05, + "learning_rate": 4.963637304170608e-05, + "loss": 1.4696, + "step": 54451 + }, + { + "epoch": 0.05, + "learning_rate": 4.963635969342579e-05, + "loss": 1.8612, + "step": 54452 + }, + { + "epoch": 0.05, + "learning_rate": 4.963634634490232e-05, + "loss": 0.9381, + "step": 54453 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636332996135634e-05, + "loss": 1.2057, + "step": 54454 + }, + { + "epoch": 0.05, + "learning_rate": 4.963631964712576e-05, + "loss": 1.2705, + "step": 54455 + }, + { + "epoch": 0.05, + "learning_rate": 4.963630629787268e-05, + "loss": 0.938, + "step": 54456 + }, + { + "epoch": 0.05, + "learning_rate": 4.96362929483764e-05, + "loss": 1.4952, + "step": 54457 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636279598636924e-05, + "loss": 1.1039, + "step": 54458 + }, + { + "epoch": 0.05, + "learning_rate": 4.963626624865425e-05, + "loss": 1.134, + "step": 54459 + }, + { + "epoch": 0.05, + "learning_rate": 4.963625289842838e-05, + "loss": 1.3429, + "step": 54460 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636239547959306e-05, + "loss": 1.2597, + "step": 54461 + }, + { + "epoch": 0.05, + "learning_rate": 4.963622619724704e-05, + "loss": 1.2196, + "step": 54462 + }, + { + "epoch": 0.05, + "learning_rate": 4.963621284629156e-05, + "loss": 0.9487, + "step": 54463 + }, + { + "epoch": 0.05, + "learning_rate": 4.96361994950929e-05, + "loss": 0.8673, + "step": 54464 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636186143651034e-05, + "loss": 0.4639, + "step": 54465 + }, + { + "epoch": 0.05, + "learning_rate": 4.963617279196597e-05, + "loss": 0.226, + "step": 54466 + }, + { + "epoch": 0.05, + "learning_rate": 4.963615944003771e-05, + "loss": 0.1956, + "step": 54467 + }, + { + "epoch": 0.05, + "learning_rate": 4.963614608786625e-05, + "loss": 0.2329, + "step": 54468 + }, + { + "epoch": 0.05, + "learning_rate": 4.963613273545159e-05, + "loss": 0.1195, + "step": 54469 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636119382793734e-05, + "loss": 0.2743, + "step": 54470 + }, + { + "epoch": 0.05, + "learning_rate": 4.963610602989268e-05, + "loss": 0.8718, + "step": 54471 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636092676748434e-05, + "loss": 1.1649, + "step": 54472 + }, + { + "epoch": 0.05, + "learning_rate": 4.963607932336099e-05, + "loss": 1.3303, + "step": 54473 + }, + { + "epoch": 0.05, + "learning_rate": 4.963606596973034e-05, + "loss": 1.185, + "step": 54474 + }, + { + "epoch": 0.05, + "learning_rate": 4.96360526158565e-05, + "loss": 1.1356, + "step": 54475 + }, + { + "epoch": 0.05, + "learning_rate": 4.9636039261739466e-05, + "loss": 1.1952, + "step": 54476 + }, + { + "epoch": 0.05, + "learning_rate": 4.963602590737923e-05, + "loss": 1.2092, + "step": 54477 + }, + { + "epoch": 0.05, + "learning_rate": 4.96360125527758e-05, + "loss": 1.4023, + "step": 54478 + }, + { + "epoch": 0.05, + "learning_rate": 4.963599919792917e-05, + "loss": 1.0865, + "step": 54479 + }, + { + "epoch": 0.05, + "learning_rate": 4.963598584283935e-05, + "loss": 1.1451, + "step": 54480 + }, + { + "epoch": 0.05, + "learning_rate": 4.963597248750633e-05, + "loss": 1.0286, + "step": 54481 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635959131930116e-05, + "loss": 1.0678, + "step": 54482 + }, + { + "epoch": 0.05, + "learning_rate": 4.96359457761107e-05, + "loss": 1.1544, + "step": 54483 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635932420048095e-05, + "loss": 1.1995, + "step": 54484 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635919063742294e-05, + "loss": 1.0885, + "step": 54485 + }, + { + "epoch": 0.05, + "learning_rate": 4.96359057071933e-05, + "loss": 0.7633, + "step": 54486 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635892350401106e-05, + "loss": 1.1484, + "step": 54487 + }, + { + "epoch": 0.05, + "learning_rate": 4.963587899336571e-05, + "loss": 0.9533, + "step": 54488 + }, + { + "epoch": 0.05, + "learning_rate": 4.963586563608713e-05, + "loss": 1.1009, + "step": 54489 + }, + { + "epoch": 0.05, + "learning_rate": 4.963585227856535e-05, + "loss": 0.9635, + "step": 54490 + }, + { + "epoch": 0.05, + "learning_rate": 4.963583892080038e-05, + "loss": 0.7484, + "step": 54491 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635825562792216e-05, + "loss": 1.3787, + "step": 54492 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635812204540855e-05, + "loss": 1.235, + "step": 54493 + }, + { + "epoch": 0.05, + "learning_rate": 4.96357988460463e-05, + "loss": 1.4254, + "step": 54494 + }, + { + "epoch": 0.05, + "learning_rate": 4.963578548730855e-05, + "loss": 0.3611, + "step": 54495 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635772128327606e-05, + "loss": 1.3352, + "step": 54496 + }, + { + "epoch": 0.05, + "learning_rate": 4.963575876910347e-05, + "loss": 1.2897, + "step": 54497 + }, + { + "epoch": 0.05, + "learning_rate": 4.963574540963614e-05, + "loss": 0.9702, + "step": 54498 + }, + { + "epoch": 0.05, + "learning_rate": 4.963573204992561e-05, + "loss": 1.0017, + "step": 54499 + }, + { + "epoch": 0.05, + "learning_rate": 4.963571868997189e-05, + "loss": 1.1473, + "step": 54500 + }, + { + "epoch": 0.05, + "eval_loss": 1.0877758264541626, + "eval_runtime": 127.4671, + "eval_samples_per_second": 10.866, + "eval_steps_per_second": 5.437, + "step": 54500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635705329774975e-05, + "loss": 1.1023, + "step": 54501 + }, + { + "epoch": 0.05, + "learning_rate": 4.963569196933487e-05, + "loss": 3.9714, + "step": 54502 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635678608651573e-05, + "loss": 3.4567, + "step": 54503 + }, + { + "epoch": 0.05, + "learning_rate": 4.963566524772508e-05, + "loss": 1.5288, + "step": 54504 + }, + { + "epoch": 0.05, + "learning_rate": 4.963565188655539e-05, + "loss": 1.7376, + "step": 54505 + }, + { + "epoch": 0.05, + "learning_rate": 4.963563852514251e-05, + "loss": 1.6179, + "step": 54506 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635625163486444e-05, + "loss": 1.7473, + "step": 54507 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635611801587176e-05, + "loss": 1.3452, + "step": 54508 + }, + { + "epoch": 0.05, + "learning_rate": 4.963559843944472e-05, + "loss": 1.3278, + "step": 54509 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635585077059075e-05, + "loss": 0.9957, + "step": 54510 + }, + { + "epoch": 0.05, + "learning_rate": 4.963557171443024e-05, + "loss": 0.672, + "step": 54511 + }, + { + "epoch": 0.05, + "learning_rate": 4.96355583515582e-05, + "loss": 1.391, + "step": 54512 + }, + { + "epoch": 0.05, + "learning_rate": 4.963554498844299e-05, + "loss": 0.8921, + "step": 54513 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635531625084566e-05, + "loss": 1.1579, + "step": 54514 + }, + { + "epoch": 0.05, + "learning_rate": 4.963551826148296e-05, + "loss": 0.5768, + "step": 54515 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635504897638165e-05, + "loss": 1.3384, + "step": 54516 + }, + { + "epoch": 0.05, + "learning_rate": 4.963549153355017e-05, + "loss": 0.7912, + "step": 54517 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635478169218984e-05, + "loss": 1.0738, + "step": 54518 + }, + { + "epoch": 0.05, + "learning_rate": 4.963546480464462e-05, + "loss": 1.2083, + "step": 54519 + }, + { + "epoch": 0.05, + "learning_rate": 4.963545143982705e-05, + "loss": 1.38, + "step": 54520 + }, + { + "epoch": 0.05, + "learning_rate": 4.96354380747663e-05, + "loss": 1.3453, + "step": 54521 + }, + { + "epoch": 0.05, + "learning_rate": 4.963542470946235e-05, + "loss": 0.8786, + "step": 54522 + }, + { + "epoch": 0.05, + "learning_rate": 4.963541134391522e-05, + "loss": 1.5817, + "step": 54523 + }, + { + "epoch": 0.05, + "learning_rate": 4.96353979781249e-05, + "loss": 1.0085, + "step": 54524 + }, + { + "epoch": 0.05, + "learning_rate": 4.963538461209138e-05, + "loss": 0.9142, + "step": 54525 + }, + { + "epoch": 0.05, + "learning_rate": 4.963537124581468e-05, + "loss": 1.1288, + "step": 54526 + }, + { + "epoch": 0.05, + "learning_rate": 4.963535787929478e-05, + "loss": 0.9751, + "step": 54527 + }, + { + "epoch": 0.05, + "learning_rate": 4.963534451253169e-05, + "loss": 1.1158, + "step": 54528 + }, + { + "epoch": 0.05, + "learning_rate": 4.963533114552542e-05, + "loss": 0.947, + "step": 54529 + }, + { + "epoch": 0.05, + "learning_rate": 4.963531777827595e-05, + "loss": 0.9469, + "step": 54530 + }, + { + "epoch": 0.05, + "learning_rate": 4.96353044107833e-05, + "loss": 0.7221, + "step": 54531 + }, + { + "epoch": 0.05, + "learning_rate": 4.963529104304745e-05, + "loss": 1.5067, + "step": 54532 + }, + { + "epoch": 0.05, + "learning_rate": 4.963527767506842e-05, + "loss": 1.0289, + "step": 54533 + }, + { + "epoch": 0.05, + "learning_rate": 4.963526430684619e-05, + "loss": 1.525, + "step": 54534 + }, + { + "epoch": 0.05, + "learning_rate": 4.963525093838079e-05, + "loss": 1.3168, + "step": 54535 + }, + { + "epoch": 0.05, + "learning_rate": 4.963523756967219e-05, + "loss": 1.3727, + "step": 54536 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635224200720396e-05, + "loss": 1.1844, + "step": 54537 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635210831525423e-05, + "loss": 1.174, + "step": 54538 + }, + { + "epoch": 0.05, + "learning_rate": 4.963519746208726e-05, + "loss": 1.0846, + "step": 54539 + }, + { + "epoch": 0.05, + "learning_rate": 4.96351840924059e-05, + "loss": 1.3522, + "step": 54540 + }, + { + "epoch": 0.05, + "learning_rate": 4.963517072248136e-05, + "loss": 1.2577, + "step": 54541 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635157352313635e-05, + "loss": 0.9864, + "step": 54542 + }, + { + "epoch": 0.05, + "learning_rate": 4.963514398190271e-05, + "loss": 1.0981, + "step": 54543 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635130611248605e-05, + "loss": 0.4675, + "step": 54544 + }, + { + "epoch": 0.05, + "learning_rate": 4.963511724035131e-05, + "loss": 0.8172, + "step": 54545 + }, + { + "epoch": 0.05, + "learning_rate": 4.963510386921083e-05, + "loss": 1.5603, + "step": 54546 + }, + { + "epoch": 0.05, + "learning_rate": 4.963509049782716e-05, + "loss": 1.1093, + "step": 54547 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635077126200305e-05, + "loss": 0.8846, + "step": 54548 + }, + { + "epoch": 0.05, + "learning_rate": 4.963506375433027e-05, + "loss": 1.213, + "step": 54549 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635050382217036e-05, + "loss": 0.9809, + "step": 54550 + }, + { + "epoch": 0.05, + "learning_rate": 4.963503700986062e-05, + "loss": 1.0497, + "step": 54551 + }, + { + "epoch": 0.05, + "learning_rate": 4.9635023637261014e-05, + "loss": 1.1659, + "step": 54552 + }, + { + "epoch": 0.05, + "learning_rate": 4.963501026441822e-05, + "loss": 1.1774, + "step": 54553 + }, + { + "epoch": 0.05, + "learning_rate": 4.963499689133225e-05, + "loss": 1.0725, + "step": 54554 + }, + { + "epoch": 0.05, + "learning_rate": 4.963498351800309e-05, + "loss": 0.5543, + "step": 54555 + }, + { + "epoch": 0.05, + "learning_rate": 4.963497014443074e-05, + "loss": 0.2856, + "step": 54556 + }, + { + "epoch": 0.05, + "learning_rate": 4.96349567706152e-05, + "loss": 0.4036, + "step": 54557 + }, + { + "epoch": 0.05, + "learning_rate": 4.963494339655649e-05, + "loss": 0.2232, + "step": 54558 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634930022254574e-05, + "loss": 0.4672, + "step": 54559 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634916647709485e-05, + "loss": 0.9341, + "step": 54560 + }, + { + "epoch": 0.05, + "learning_rate": 4.963490327292121e-05, + "loss": 1.194, + "step": 54561 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634889897889745e-05, + "loss": 1.141, + "step": 54562 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634876522615096e-05, + "loss": 1.0158, + "step": 54563 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634863147097266e-05, + "loss": 1.1341, + "step": 54564 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634849771336244e-05, + "loss": 0.7411, + "step": 54565 + }, + { + "epoch": 0.05, + "learning_rate": 4.963483639533204e-05, + "loss": 1.1614, + "step": 54566 + }, + { + "epoch": 0.05, + "learning_rate": 4.963482301908465e-05, + "loss": 1.4024, + "step": 54567 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634809642594084e-05, + "loss": 1.0162, + "step": 54568 + }, + { + "epoch": 0.05, + "learning_rate": 4.963479626586033e-05, + "loss": 1.1692, + "step": 54569 + }, + { + "epoch": 0.05, + "learning_rate": 4.963478288888339e-05, + "loss": 1.3195, + "step": 54570 + }, + { + "epoch": 0.05, + "learning_rate": 4.963476951166326e-05, + "loss": 1.4265, + "step": 54571 + }, + { + "epoch": 0.05, + "learning_rate": 4.963475613419995e-05, + "loss": 1.0592, + "step": 54572 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634742756493456e-05, + "loss": 1.4897, + "step": 54573 + }, + { + "epoch": 0.05, + "learning_rate": 4.963472937854378e-05, + "loss": 1.2809, + "step": 54574 + }, + { + "epoch": 0.05, + "learning_rate": 4.963471600035092e-05, + "loss": 1.9277, + "step": 54575 + }, + { + "epoch": 0.05, + "learning_rate": 4.963470262191487e-05, + "loss": 1.5122, + "step": 54576 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634689243235646e-05, + "loss": 1.1034, + "step": 54577 + }, + { + "epoch": 0.05, + "learning_rate": 4.963467586431324e-05, + "loss": 1.1624, + "step": 54578 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634662485147645e-05, + "loss": 1.1204, + "step": 54579 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634649105738865e-05, + "loss": 1.0857, + "step": 54580 + }, + { + "epoch": 0.05, + "learning_rate": 4.963463572608691e-05, + "loss": 1.4168, + "step": 54581 + }, + { + "epoch": 0.05, + "learning_rate": 4.963462234619176e-05, + "loss": 1.4013, + "step": 54582 + }, + { + "epoch": 0.05, + "learning_rate": 4.963460896605344e-05, + "loss": 1.3266, + "step": 54583 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634595585671926e-05, + "loss": 1.1389, + "step": 54584 + }, + { + "epoch": 0.05, + "learning_rate": 4.963458220504724e-05, + "loss": 1.4211, + "step": 54585 + }, + { + "epoch": 0.05, + "learning_rate": 4.963456882417937e-05, + "loss": 1.2569, + "step": 54586 + }, + { + "epoch": 0.05, + "learning_rate": 4.963455544306831e-05, + "loss": 1.3012, + "step": 54587 + }, + { + "epoch": 0.05, + "learning_rate": 4.963454206171407e-05, + "loss": 0.9284, + "step": 54588 + }, + { + "epoch": 0.05, + "learning_rate": 4.963452868011665e-05, + "loss": 1.018, + "step": 54589 + }, + { + "epoch": 0.05, + "learning_rate": 4.963451529827606e-05, + "loss": 0.7958, + "step": 54590 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634501916192276e-05, + "loss": 0.7897, + "step": 54591 + }, + { + "epoch": 0.05, + "learning_rate": 4.963448853386531e-05, + "loss": 1.304, + "step": 54592 + }, + { + "epoch": 0.05, + "learning_rate": 4.963447515129517e-05, + "loss": 1.3126, + "step": 54593 + }, + { + "epoch": 0.05, + "learning_rate": 4.963446176848184e-05, + "loss": 1.3497, + "step": 54594 + }, + { + "epoch": 0.05, + "learning_rate": 4.963444838542534e-05, + "loss": 1.2054, + "step": 54595 + }, + { + "epoch": 0.05, + "learning_rate": 4.963443500212564e-05, + "loss": 1.3033, + "step": 54596 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634421618582786e-05, + "loss": 1.1227, + "step": 54597 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634408234796736e-05, + "loss": 1.1529, + "step": 54598 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634394850767506e-05, + "loss": 1.2945, + "step": 54599 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634381466495096e-05, + "loss": 0.7352, + "step": 54600 + }, + { + "epoch": 0.05, + "learning_rate": 4.963436808197951e-05, + "loss": 0.1593, + "step": 54601 + }, + { + "epoch": 0.05, + "learning_rate": 4.963435469722073e-05, + "loss": 0.1243, + "step": 54602 + }, + { + "epoch": 0.05, + "learning_rate": 4.963434131221878e-05, + "loss": 0.5335, + "step": 54603 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634327926973654e-05, + "loss": 1.1028, + "step": 54604 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634314541485346e-05, + "loss": 0.8548, + "step": 54605 + }, + { + "epoch": 0.05, + "learning_rate": 4.963430115575386e-05, + "loss": 1.1623, + "step": 54606 + }, + { + "epoch": 0.05, + "learning_rate": 4.963428776977918e-05, + "loss": 1.0044, + "step": 54607 + }, + { + "epoch": 0.05, + "learning_rate": 4.963427438356134e-05, + "loss": 1.0527, + "step": 54608 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634260997100315e-05, + "loss": 1.0616, + "step": 54609 + }, + { + "epoch": 0.05, + "learning_rate": 4.963424761039611e-05, + "loss": 1.0802, + "step": 54610 + }, + { + "epoch": 0.05, + "learning_rate": 4.963423422344872e-05, + "loss": 1.2109, + "step": 54611 + }, + { + "epoch": 0.05, + "learning_rate": 4.963422083625816e-05, + "loss": 1.0149, + "step": 54612 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634207448824414e-05, + "loss": 1.1147, + "step": 54613 + }, + { + "epoch": 0.05, + "learning_rate": 4.96341940611475e-05, + "loss": 1.0224, + "step": 54614 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634180673227396e-05, + "loss": 0.6497, + "step": 54615 + }, + { + "epoch": 0.05, + "learning_rate": 4.963416728506412e-05, + "loss": 1.0393, + "step": 54616 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634153896657665e-05, + "loss": 1.2049, + "step": 54617 + }, + { + "epoch": 0.05, + "learning_rate": 4.963414050800803e-05, + "loss": 1.3135, + "step": 54618 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634127119115215e-05, + "loss": 1.0278, + "step": 54619 + }, + { + "epoch": 0.05, + "learning_rate": 4.963411372997922e-05, + "loss": 0.8116, + "step": 54620 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634100340600066e-05, + "loss": 1.401, + "step": 54621 + }, + { + "epoch": 0.05, + "learning_rate": 4.963408695097772e-05, + "loss": 1.3632, + "step": 54622 + }, + { + "epoch": 0.05, + "learning_rate": 4.963407356111219e-05, + "loss": 0.9611, + "step": 54623 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634060171003494e-05, + "loss": 1.0421, + "step": 54624 + }, + { + "epoch": 0.05, + "learning_rate": 4.9634046780651624e-05, + "loss": 0.9694, + "step": 54625 + }, + { + "epoch": 0.05, + "learning_rate": 4.963403339005657e-05, + "loss": 1.0203, + "step": 54626 + }, + { + "epoch": 0.05, + "learning_rate": 4.963401999921834e-05, + "loss": 0.9207, + "step": 54627 + }, + { + "epoch": 0.05, + "learning_rate": 4.963400660813693e-05, + "loss": 1.1448, + "step": 54628 + }, + { + "epoch": 0.05, + "learning_rate": 4.963399321681235e-05, + "loss": 1.674, + "step": 54629 + }, + { + "epoch": 0.05, + "learning_rate": 4.963397982524459e-05, + "loss": 1.9559, + "step": 54630 + }, + { + "epoch": 0.05, + "learning_rate": 4.963396643343366e-05, + "loss": 1.7042, + "step": 54631 + }, + { + "epoch": 0.05, + "learning_rate": 4.963395304137954e-05, + "loss": 1.5831, + "step": 54632 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633939649082255e-05, + "loss": 1.7178, + "step": 54633 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633926256541794e-05, + "loss": 1.4194, + "step": 54634 + }, + { + "epoch": 0.05, + "learning_rate": 4.963391286375816e-05, + "loss": 1.5408, + "step": 54635 + }, + { + "epoch": 0.05, + "learning_rate": 4.963389947073134e-05, + "loss": 1.5914, + "step": 54636 + }, + { + "epoch": 0.05, + "learning_rate": 4.963388607746135e-05, + "loss": 1.1989, + "step": 54637 + }, + { + "epoch": 0.05, + "learning_rate": 4.963387268394819e-05, + "loss": 0.8599, + "step": 54638 + }, + { + "epoch": 0.05, + "learning_rate": 4.963385929019184e-05, + "loss": 0.6637, + "step": 54639 + }, + { + "epoch": 0.05, + "learning_rate": 4.963384589619233e-05, + "loss": 0.4666, + "step": 54640 + }, + { + "epoch": 0.05, + "learning_rate": 4.963383250194964e-05, + "loss": 0.4244, + "step": 54641 + }, + { + "epoch": 0.05, + "learning_rate": 4.963381910746377e-05, + "loss": 0.4022, + "step": 54642 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633805712734734e-05, + "loss": 0.6401, + "step": 54643 + }, + { + "epoch": 0.05, + "learning_rate": 4.963379231776252e-05, + "loss": 1.0704, + "step": 54644 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633778922547126e-05, + "loss": 0.8909, + "step": 54645 + }, + { + "epoch": 0.05, + "learning_rate": 4.963376552708856e-05, + "loss": 1.4616, + "step": 54646 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633752131386826e-05, + "loss": 1.1644, + "step": 54647 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633738735441916e-05, + "loss": 1.0705, + "step": 54648 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633725339253826e-05, + "loss": 1.7951, + "step": 54649 + }, + { + "epoch": 0.05, + "learning_rate": 4.963371194282257e-05, + "loss": 1.4216, + "step": 54650 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633698546148135e-05, + "loss": 1.48, + "step": 54651 + }, + { + "epoch": 0.05, + "learning_rate": 4.963368514923053e-05, + "loss": 1.2305, + "step": 54652 + }, + { + "epoch": 0.05, + "learning_rate": 4.963367175206975e-05, + "loss": 1.4349, + "step": 54653 + }, + { + "epoch": 0.05, + "learning_rate": 4.96336583546658e-05, + "loss": 1.0538, + "step": 54654 + }, + { + "epoch": 0.05, + "learning_rate": 4.963364495701867e-05, + "loss": 1.5602, + "step": 54655 + }, + { + "epoch": 0.05, + "learning_rate": 4.963363155912837e-05, + "loss": 2.1056, + "step": 54656 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633618160994896e-05, + "loss": 1.8249, + "step": 54657 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633604762618257e-05, + "loss": 1.5171, + "step": 54658 + }, + { + "epoch": 0.05, + "learning_rate": 4.963359136399844e-05, + "loss": 1.5626, + "step": 54659 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633577965135445e-05, + "loss": 1.4202, + "step": 54660 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633564566029286e-05, + "loss": 1.3538, + "step": 54661 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633551166679954e-05, + "loss": 1.2331, + "step": 54662 + }, + { + "epoch": 0.05, + "learning_rate": 4.963353776708744e-05, + "loss": 1.2264, + "step": 54663 + }, + { + "epoch": 0.05, + "learning_rate": 4.963352436725177e-05, + "loss": 1.0084, + "step": 54664 + }, + { + "epoch": 0.05, + "learning_rate": 4.963351096717292e-05, + "loss": 1.1912, + "step": 54665 + }, + { + "epoch": 0.05, + "learning_rate": 4.96334975668509e-05, + "loss": 0.9504, + "step": 54666 + }, + { + "epoch": 0.05, + "learning_rate": 4.96334841662857e-05, + "loss": 0.9596, + "step": 54667 + }, + { + "epoch": 0.05, + "learning_rate": 4.963347076547734e-05, + "loss": 1.081, + "step": 54668 + }, + { + "epoch": 0.05, + "learning_rate": 4.963345736442581e-05, + "loss": 1.0925, + "step": 54669 + }, + { + "epoch": 0.05, + "learning_rate": 4.96334439631311e-05, + "loss": 0.9956, + "step": 54670 + }, + { + "epoch": 0.05, + "learning_rate": 4.963343056159322e-05, + "loss": 1.1449, + "step": 54671 + }, + { + "epoch": 0.05, + "learning_rate": 4.963341715981217e-05, + "loss": 1.2081, + "step": 54672 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633403757787956e-05, + "loss": 1.1375, + "step": 54673 + }, + { + "epoch": 0.05, + "learning_rate": 4.963339035552056e-05, + "loss": 1.2655, + "step": 54674 + }, + { + "epoch": 0.05, + "learning_rate": 4.963337695301e-05, + "loss": 0.9937, + "step": 54675 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633363550256276e-05, + "loss": 0.3164, + "step": 54676 + }, + { + "epoch": 0.05, + "learning_rate": 4.963335014725937e-05, + "loss": 1.1083, + "step": 54677 + }, + { + "epoch": 0.05, + "learning_rate": 4.96333367440193e-05, + "loss": 0.8914, + "step": 54678 + }, + { + "epoch": 0.05, + "learning_rate": 4.963332334053606e-05, + "loss": 1.0609, + "step": 54679 + }, + { + "epoch": 0.05, + "learning_rate": 4.963330993680965e-05, + "loss": 1.0909, + "step": 54680 + }, + { + "epoch": 0.05, + "learning_rate": 4.963329653284007e-05, + "loss": 1.8053, + "step": 54681 + }, + { + "epoch": 0.05, + "learning_rate": 4.963328312862732e-05, + "loss": 1.1366, + "step": 54682 + }, + { + "epoch": 0.05, + "learning_rate": 4.96332697241714e-05, + "loss": 1.0294, + "step": 54683 + }, + { + "epoch": 0.05, + "learning_rate": 4.963325631947231e-05, + "loss": 1.2406, + "step": 54684 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633242914530054e-05, + "loss": 1.2128, + "step": 54685 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633229509344625e-05, + "loss": 1.3558, + "step": 54686 + }, + { + "epoch": 0.05, + "learning_rate": 4.963321610391603e-05, + "loss": 1.4147, + "step": 54687 + }, + { + "epoch": 0.05, + "learning_rate": 4.963320269824426e-05, + "loss": 1.4923, + "step": 54688 + }, + { + "epoch": 0.05, + "learning_rate": 4.963318929232933e-05, + "loss": 0.955, + "step": 54689 + }, + { + "epoch": 0.05, + "learning_rate": 4.963317588617123e-05, + "loss": 1.0118, + "step": 54690 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633162479769965e-05, + "loss": 1.0768, + "step": 54691 + }, + { + "epoch": 0.05, + "learning_rate": 4.963314907312553e-05, + "loss": 1.067, + "step": 54692 + }, + { + "epoch": 0.05, + "learning_rate": 4.963313566623792e-05, + "loss": 1.0594, + "step": 54693 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633122259107146e-05, + "loss": 1.4382, + "step": 54694 + }, + { + "epoch": 0.05, + "learning_rate": 4.963310885173321e-05, + "loss": 1.1543, + "step": 54695 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633095444116095e-05, + "loss": 1.0308, + "step": 54696 + }, + { + "epoch": 0.05, + "learning_rate": 4.963308203625582e-05, + "loss": 1.3465, + "step": 54697 + }, + { + "epoch": 0.05, + "learning_rate": 4.963306862815238e-05, + "loss": 1.2881, + "step": 54698 + }, + { + "epoch": 0.05, + "learning_rate": 4.963305521980576e-05, + "loss": 1.001, + "step": 54699 + }, + { + "epoch": 0.05, + "learning_rate": 4.9633041811215986e-05, + "loss": 1.1635, + "step": 54700 + }, + { + "epoch": 0.05, + "learning_rate": 4.963302840238304e-05, + "loss": 1.2915, + "step": 54701 + }, + { + "epoch": 0.05, + "learning_rate": 4.963301499330693e-05, + "loss": 1.1649, + "step": 54702 + }, + { + "epoch": 0.05, + "learning_rate": 4.963300158398765e-05, + "loss": 1.3661, + "step": 54703 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632988174425196e-05, + "loss": 1.1115, + "step": 54704 + }, + { + "epoch": 0.05, + "learning_rate": 4.963297476461959e-05, + "loss": 1.0874, + "step": 54705 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632961354570815e-05, + "loss": 0.7364, + "step": 54706 + }, + { + "epoch": 0.05, + "learning_rate": 4.963294794427887e-05, + "loss": 0.8686, + "step": 54707 + }, + { + "epoch": 0.05, + "learning_rate": 4.963293453374376e-05, + "loss": 1.052, + "step": 54708 + }, + { + "epoch": 0.05, + "learning_rate": 4.963292112296548e-05, + "loss": 1.0036, + "step": 54709 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632907711944036e-05, + "loss": 1.2322, + "step": 54710 + }, + { + "epoch": 0.05, + "learning_rate": 4.963289430067943e-05, + "loss": 1.2705, + "step": 54711 + }, + { + "epoch": 0.05, + "learning_rate": 4.963288088917165e-05, + "loss": 0.8985, + "step": 54712 + }, + { + "epoch": 0.05, + "learning_rate": 4.963286747742072e-05, + "loss": 1.2007, + "step": 54713 + }, + { + "epoch": 0.05, + "learning_rate": 4.963285406542661e-05, + "loss": 0.8581, + "step": 54714 + }, + { + "epoch": 0.05, + "learning_rate": 4.963284065318934e-05, + "loss": 1.0904, + "step": 54715 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632827240708906e-05, + "loss": 1.3849, + "step": 54716 + }, + { + "epoch": 0.05, + "learning_rate": 4.963281382798531e-05, + "loss": 0.5381, + "step": 54717 + }, + { + "epoch": 0.05, + "learning_rate": 4.963280041501854e-05, + "loss": 1.2024, + "step": 54718 + }, + { + "epoch": 0.05, + "learning_rate": 4.963278700180861e-05, + "loss": 1.1006, + "step": 54719 + }, + { + "epoch": 0.05, + "learning_rate": 4.963277358835552e-05, + "loss": 1.2324, + "step": 54720 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632760174659265e-05, + "loss": 1.0397, + "step": 54721 + }, + { + "epoch": 0.05, + "learning_rate": 4.963274676071984e-05, + "loss": 0.924, + "step": 54722 + }, + { + "epoch": 0.05, + "learning_rate": 4.963273334653725e-05, + "loss": 1.1859, + "step": 54723 + }, + { + "epoch": 0.05, + "learning_rate": 4.963271993211151e-05, + "loss": 0.8389, + "step": 54724 + }, + { + "epoch": 0.05, + "learning_rate": 4.963270651744259e-05, + "loss": 1.2208, + "step": 54725 + }, + { + "epoch": 0.05, + "learning_rate": 4.963269310253052e-05, + "loss": 1.1036, + "step": 54726 + }, + { + "epoch": 0.05, + "learning_rate": 4.963267968737527e-05, + "loss": 1.2852, + "step": 54727 + }, + { + "epoch": 0.05, + "learning_rate": 4.963266627197687e-05, + "loss": 1.1046, + "step": 54728 + }, + { + "epoch": 0.05, + "learning_rate": 4.96326528563353e-05, + "loss": 1.1848, + "step": 54729 + }, + { + "epoch": 0.05, + "learning_rate": 4.963263944045057e-05, + "loss": 1.2275, + "step": 54730 + }, + { + "epoch": 0.05, + "learning_rate": 4.963262602432267e-05, + "loss": 1.4178, + "step": 54731 + }, + { + "epoch": 0.05, + "learning_rate": 4.963261260795161e-05, + "loss": 1.3817, + "step": 54732 + }, + { + "epoch": 0.05, + "learning_rate": 4.963259919133739e-05, + "loss": 1.042, + "step": 54733 + }, + { + "epoch": 0.05, + "learning_rate": 4.963258577448001e-05, + "loss": 1.0896, + "step": 54734 + }, + { + "epoch": 0.05, + "learning_rate": 4.963257235737947e-05, + "loss": 1.4306, + "step": 54735 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632558940035764e-05, + "loss": 1.208, + "step": 54736 + }, + { + "epoch": 0.05, + "learning_rate": 4.963254552244889e-05, + "loss": 1.2509, + "step": 54737 + }, + { + "epoch": 0.05, + "learning_rate": 4.963253210461886e-05, + "loss": 1.3215, + "step": 54738 + }, + { + "epoch": 0.05, + "learning_rate": 4.963251868654567e-05, + "loss": 1.0326, + "step": 54739 + }, + { + "epoch": 0.05, + "learning_rate": 4.963250526822931e-05, + "loss": 1.7616, + "step": 54740 + }, + { + "epoch": 0.05, + "learning_rate": 4.963249184966979e-05, + "loss": 1.9323, + "step": 54741 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632478430867116e-05, + "loss": 1.4842, + "step": 54742 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632465011821274e-05, + "loss": 1.356, + "step": 54743 + }, + { + "epoch": 0.05, + "learning_rate": 4.963245159253227e-05, + "loss": 1.0098, + "step": 54744 + }, + { + "epoch": 0.05, + "learning_rate": 4.963243817300011e-05, + "loss": 0.8314, + "step": 54745 + }, + { + "epoch": 0.05, + "learning_rate": 4.963242475322478e-05, + "loss": 1.1947, + "step": 54746 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632411333206295e-05, + "loss": 1.3558, + "step": 54747 + }, + { + "epoch": 0.05, + "learning_rate": 4.963239791294465e-05, + "loss": 0.6943, + "step": 54748 + }, + { + "epoch": 0.05, + "learning_rate": 4.963238449243985e-05, + "loss": 1.0589, + "step": 54749 + }, + { + "epoch": 0.05, + "learning_rate": 4.963237107169188e-05, + "loss": 1.4898, + "step": 54750 + }, + { + "epoch": 0.05, + "learning_rate": 4.963235765070076e-05, + "loss": 1.1185, + "step": 54751 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632344229466466e-05, + "loss": 1.1065, + "step": 54752 + }, + { + "epoch": 0.05, + "learning_rate": 4.963233080798902e-05, + "loss": 0.9587, + "step": 54753 + }, + { + "epoch": 0.05, + "learning_rate": 4.963231738626841e-05, + "loss": 0.8799, + "step": 54754 + }, + { + "epoch": 0.05, + "learning_rate": 4.963230396430464e-05, + "loss": 1.313, + "step": 54755 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632290542097713e-05, + "loss": 0.8768, + "step": 54756 + }, + { + "epoch": 0.05, + "learning_rate": 4.963227711964763e-05, + "loss": 1.3048, + "step": 54757 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632263696954384e-05, + "loss": 1.3289, + "step": 54758 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632250274017976e-05, + "loss": 0.9517, + "step": 54759 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632236850838416e-05, + "loss": 0.9378, + "step": 54760 + }, + { + "epoch": 0.05, + "learning_rate": 4.963222342741569e-05, + "loss": 1.077, + "step": 54761 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632210003749804e-05, + "loss": 1.3689, + "step": 54762 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632196579840766e-05, + "loss": 1.0918, + "step": 54763 + }, + { + "epoch": 0.05, + "learning_rate": 4.963218315568857e-05, + "loss": 0.3889, + "step": 54764 + }, + { + "epoch": 0.05, + "learning_rate": 4.963216973129321e-05, + "loss": 1.3142, + "step": 54765 + }, + { + "epoch": 0.05, + "learning_rate": 4.963215630665469e-05, + "loss": 1.4621, + "step": 54766 + }, + { + "epoch": 0.05, + "learning_rate": 4.963214288177301e-05, + "loss": 1.309, + "step": 54767 + }, + { + "epoch": 0.05, + "learning_rate": 4.963212945664818e-05, + "loss": 0.7406, + "step": 54768 + }, + { + "epoch": 0.05, + "learning_rate": 4.963211603128019e-05, + "loss": 0.9111, + "step": 54769 + }, + { + "epoch": 0.05, + "learning_rate": 4.963210260566904e-05, + "loss": 1.1893, + "step": 54770 + }, + { + "epoch": 0.05, + "learning_rate": 4.963208917981473e-05, + "loss": 1.1365, + "step": 54771 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632075753717266e-05, + "loss": 1.1213, + "step": 54772 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632062327376647e-05, + "loss": 1.3827, + "step": 54773 + }, + { + "epoch": 0.05, + "learning_rate": 4.9632048900792875e-05, + "loss": 1.4273, + "step": 54774 + }, + { + "epoch": 0.05, + "learning_rate": 4.963203547396593e-05, + "loss": 1.1274, + "step": 54775 + }, + { + "epoch": 0.05, + "learning_rate": 4.963202204689584e-05, + "loss": 1.4454, + "step": 54776 + }, + { + "epoch": 0.05, + "learning_rate": 4.963200861958259e-05, + "loss": 1.3081, + "step": 54777 + }, + { + "epoch": 0.05, + "learning_rate": 4.963199519202618e-05, + "loss": 0.8804, + "step": 54778 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631981764226617e-05, + "loss": 1.2561, + "step": 54779 + }, + { + "epoch": 0.05, + "learning_rate": 4.96319683361839e-05, + "loss": 1.2138, + "step": 54780 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631954907898013e-05, + "loss": 1.1405, + "step": 54781 + }, + { + "epoch": 0.05, + "learning_rate": 4.963194147936898e-05, + "loss": 1.3121, + "step": 54782 + }, + { + "epoch": 0.05, + "learning_rate": 4.963192805059679e-05, + "loss": 0.8503, + "step": 54783 + }, + { + "epoch": 0.05, + "learning_rate": 4.963191462158145e-05, + "loss": 0.8233, + "step": 54784 + }, + { + "epoch": 0.05, + "learning_rate": 4.963190119232295e-05, + "loss": 1.2509, + "step": 54785 + }, + { + "epoch": 0.05, + "learning_rate": 4.96318877628213e-05, + "loss": 1.255, + "step": 54786 + }, + { + "epoch": 0.05, + "learning_rate": 4.963187433307648e-05, + "loss": 1.2836, + "step": 54787 + }, + { + "epoch": 0.05, + "learning_rate": 4.963186090308851e-05, + "loss": 0.9213, + "step": 54788 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631847472857386e-05, + "loss": 1.1468, + "step": 54789 + }, + { + "epoch": 0.05, + "learning_rate": 4.963183404238311e-05, + "loss": 1.2082, + "step": 54790 + }, + { + "epoch": 0.05, + "learning_rate": 4.963182061166567e-05, + "loss": 1.1225, + "step": 54791 + }, + { + "epoch": 0.05, + "learning_rate": 4.963180718070508e-05, + "loss": 1.637, + "step": 54792 + }, + { + "epoch": 0.05, + "learning_rate": 4.963179374950134e-05, + "loss": 1.2573, + "step": 54793 + }, + { + "epoch": 0.05, + "learning_rate": 4.963178031805444e-05, + "loss": 1.2878, + "step": 54794 + }, + { + "epoch": 0.05, + "learning_rate": 4.963176688636439e-05, + "loss": 1.2359, + "step": 54795 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631753454431183e-05, + "loss": 1.5704, + "step": 54796 + }, + { + "epoch": 0.05, + "learning_rate": 4.963174002225482e-05, + "loss": 1.1546, + "step": 54797 + }, + { + "epoch": 0.05, + "learning_rate": 4.96317265898353e-05, + "loss": 1.1822, + "step": 54798 + }, + { + "epoch": 0.05, + "learning_rate": 4.963171315717263e-05, + "loss": 1.0891, + "step": 54799 + }, + { + "epoch": 0.05, + "learning_rate": 4.963169972426681e-05, + "loss": 1.3866, + "step": 54800 + }, + { + "epoch": 0.05, + "learning_rate": 4.963168629111783e-05, + "loss": 1.0083, + "step": 54801 + }, + { + "epoch": 0.05, + "learning_rate": 4.96316728577257e-05, + "loss": 1.0575, + "step": 54802 + }, + { + "epoch": 0.05, + "learning_rate": 4.963165942409041e-05, + "loss": 0.8108, + "step": 54803 + }, + { + "epoch": 0.05, + "learning_rate": 4.963164599021198e-05, + "loss": 1.3099, + "step": 54804 + }, + { + "epoch": 0.05, + "learning_rate": 4.963163255609039e-05, + "loss": 1.0778, + "step": 54805 + }, + { + "epoch": 0.05, + "learning_rate": 4.963161912172564e-05, + "loss": 1.4872, + "step": 54806 + }, + { + "epoch": 0.05, + "learning_rate": 4.963160568711774e-05, + "loss": 1.349, + "step": 54807 + }, + { + "epoch": 0.05, + "learning_rate": 4.963159225226669e-05, + "loss": 1.1602, + "step": 54808 + }, + { + "epoch": 0.05, + "learning_rate": 4.963157881717249e-05, + "loss": 1.0752, + "step": 54809 + }, + { + "epoch": 0.05, + "learning_rate": 4.963156538183514e-05, + "loss": 1.3357, + "step": 54810 + }, + { + "epoch": 0.05, + "learning_rate": 4.963155194625463e-05, + "loss": 0.8512, + "step": 54811 + }, + { + "epoch": 0.05, + "learning_rate": 4.963153851043097e-05, + "loss": 1.1783, + "step": 54812 + }, + { + "epoch": 0.05, + "learning_rate": 4.963152507436416e-05, + "loss": 0.8616, + "step": 54813 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631511638054196e-05, + "loss": 1.1189, + "step": 54814 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631498201501076e-05, + "loss": 1.2787, + "step": 54815 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631484764704804e-05, + "loss": 0.9061, + "step": 54816 + }, + { + "epoch": 0.05, + "learning_rate": 4.963147132766539e-05, + "loss": 1.076, + "step": 54817 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631457890382816e-05, + "loss": 1.1168, + "step": 54818 + }, + { + "epoch": 0.05, + "learning_rate": 4.963144445285709e-05, + "loss": 1.2135, + "step": 54819 + }, + { + "epoch": 0.05, + "learning_rate": 4.963143101508822e-05, + "loss": 0.9782, + "step": 54820 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631417577076195e-05, + "loss": 1.2777, + "step": 54821 + }, + { + "epoch": 0.05, + "learning_rate": 4.963140413882102e-05, + "loss": 0.9872, + "step": 54822 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631390700322694e-05, + "loss": 1.0666, + "step": 54823 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631377261581214e-05, + "loss": 0.7767, + "step": 54824 + }, + { + "epoch": 0.05, + "learning_rate": 4.963136382259659e-05, + "loss": 1.4897, + "step": 54825 + }, + { + "epoch": 0.05, + "learning_rate": 4.963135038336881e-05, + "loss": 1.6811, + "step": 54826 + }, + { + "epoch": 0.05, + "learning_rate": 4.963133694389788e-05, + "loss": 1.1442, + "step": 54827 + }, + { + "epoch": 0.05, + "learning_rate": 4.96313235041838e-05, + "loss": 0.9892, + "step": 54828 + }, + { + "epoch": 0.05, + "learning_rate": 4.963131006422657e-05, + "loss": 0.9884, + "step": 54829 + }, + { + "epoch": 0.05, + "learning_rate": 4.963129662402619e-05, + "loss": 1.3282, + "step": 54830 + }, + { + "epoch": 0.05, + "learning_rate": 4.963128318358267e-05, + "loss": 1.0668, + "step": 54831 + }, + { + "epoch": 0.05, + "learning_rate": 4.963126974289599e-05, + "loss": 1.2896, + "step": 54832 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631256301966155e-05, + "loss": 1.1628, + "step": 54833 + }, + { + "epoch": 0.05, + "learning_rate": 4.963124286079318e-05, + "loss": 1.8185, + "step": 54834 + }, + { + "epoch": 0.05, + "learning_rate": 4.963122941937706e-05, + "loss": 1.7734, + "step": 54835 + }, + { + "epoch": 0.05, + "learning_rate": 4.963121597771778e-05, + "loss": 1.7669, + "step": 54836 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631202535815355e-05, + "loss": 1.7684, + "step": 54837 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631189093669786e-05, + "loss": 1.7132, + "step": 54838 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631175651281057e-05, + "loss": 1.7216, + "step": 54839 + }, + { + "epoch": 0.05, + "learning_rate": 4.963116220864918e-05, + "loss": 1.6213, + "step": 54840 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631148765774174e-05, + "loss": 1.1135, + "step": 54841 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631135322656e-05, + "loss": 1.4909, + "step": 54842 + }, + { + "epoch": 0.05, + "learning_rate": 4.963112187929468e-05, + "loss": 1.3456, + "step": 54843 + }, + { + "epoch": 0.05, + "learning_rate": 4.963110843569022e-05, + "loss": 1.2133, + "step": 54844 + }, + { + "epoch": 0.05, + "learning_rate": 4.963109499184261e-05, + "loss": 1.4961, + "step": 54845 + }, + { + "epoch": 0.05, + "learning_rate": 4.963108154775185e-05, + "loss": 1.2919, + "step": 54846 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631068103417945e-05, + "loss": 0.9288, + "step": 54847 + }, + { + "epoch": 0.05, + "learning_rate": 4.963105465884089e-05, + "loss": 1.5823, + "step": 54848 + }, + { + "epoch": 0.05, + "learning_rate": 4.963104121402069e-05, + "loss": 1.2581, + "step": 54849 + }, + { + "epoch": 0.05, + "learning_rate": 4.963102776895734e-05, + "loss": 1.3983, + "step": 54850 + }, + { + "epoch": 0.05, + "learning_rate": 4.963101432365084e-05, + "loss": 0.817, + "step": 54851 + }, + { + "epoch": 0.05, + "learning_rate": 4.9631000878101194e-05, + "loss": 0.5936, + "step": 54852 + }, + { + "epoch": 0.05, + "learning_rate": 4.963098743230841e-05, + "loss": 1.225, + "step": 54853 + }, + { + "epoch": 0.05, + "learning_rate": 4.963097398627247e-05, + "loss": 1.0401, + "step": 54854 + }, + { + "epoch": 0.05, + "learning_rate": 4.963096053999339e-05, + "loss": 1.5191, + "step": 54855 + }, + { + "epoch": 0.05, + "learning_rate": 4.963094709347116e-05, + "loss": 1.0603, + "step": 54856 + }, + { + "epoch": 0.05, + "learning_rate": 4.963093364670578e-05, + "loss": 0.9153, + "step": 54857 + }, + { + "epoch": 0.05, + "learning_rate": 4.963092019969726e-05, + "loss": 0.8918, + "step": 54858 + }, + { + "epoch": 0.05, + "learning_rate": 4.963090675244559e-05, + "loss": 0.9646, + "step": 54859 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630893304950774e-05, + "loss": 0.8052, + "step": 54860 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630879857212815e-05, + "loss": 1.0406, + "step": 54861 + }, + { + "epoch": 0.05, + "learning_rate": 4.963086640923171e-05, + "loss": 0.9896, + "step": 54862 + }, + { + "epoch": 0.05, + "learning_rate": 4.963085296100746e-05, + "loss": 1.1436, + "step": 54863 + }, + { + "epoch": 0.05, + "learning_rate": 4.963083951254006e-05, + "loss": 1.2814, + "step": 54864 + }, + { + "epoch": 0.05, + "learning_rate": 4.963082606382951e-05, + "loss": 0.9315, + "step": 54865 + }, + { + "epoch": 0.05, + "learning_rate": 4.963081261487583e-05, + "loss": 1.188, + "step": 54866 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630799165679e-05, + "loss": 1.4139, + "step": 54867 + }, + { + "epoch": 0.05, + "learning_rate": 4.963078571623902e-05, + "loss": 1.3755, + "step": 54868 + }, + { + "epoch": 0.05, + "learning_rate": 4.96307722665559e-05, + "loss": 1.2433, + "step": 54869 + }, + { + "epoch": 0.05, + "learning_rate": 4.963075881662963e-05, + "loss": 1.224, + "step": 54870 + }, + { + "epoch": 0.05, + "learning_rate": 4.963074536646022e-05, + "loss": 1.3511, + "step": 54871 + }, + { + "epoch": 0.05, + "learning_rate": 4.963073191604767e-05, + "loss": 0.6109, + "step": 54872 + }, + { + "epoch": 0.05, + "learning_rate": 4.963071846539197e-05, + "loss": 1.0037, + "step": 54873 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630705014493126e-05, + "loss": 0.9629, + "step": 54874 + }, + { + "epoch": 0.05, + "learning_rate": 4.963069156335113e-05, + "loss": 0.9512, + "step": 54875 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630678111966e-05, + "loss": 1.4814, + "step": 54876 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630664660337725e-05, + "loss": 1.0632, + "step": 54877 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630651208466306e-05, + "loss": 1.2553, + "step": 54878 + }, + { + "epoch": 0.05, + "learning_rate": 4.963063775635175e-05, + "loss": 1.1759, + "step": 54879 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630624303994044e-05, + "loss": 1.2557, + "step": 54880 + }, + { + "epoch": 0.05, + "learning_rate": 4.963061085139319e-05, + "loss": 0.8034, + "step": 54881 + }, + { + "epoch": 0.05, + "learning_rate": 4.96305973985492e-05, + "loss": 1.0278, + "step": 54882 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630583945462064e-05, + "loss": 1.1309, + "step": 54883 + }, + { + "epoch": 0.05, + "learning_rate": 4.963057049213179e-05, + "loss": 1.0546, + "step": 54884 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630557038558364e-05, + "loss": 1.2601, + "step": 54885 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630543584741805e-05, + "loss": 1.069, + "step": 54886 + }, + { + "epoch": 0.05, + "learning_rate": 4.963053013068211e-05, + "loss": 1.4739, + "step": 54887 + }, + { + "epoch": 0.05, + "learning_rate": 4.963051667637926e-05, + "loss": 0.9931, + "step": 54888 + }, + { + "epoch": 0.05, + "learning_rate": 4.963050322183327e-05, + "loss": 1.1899, + "step": 54889 + }, + { + "epoch": 0.05, + "learning_rate": 4.963048976704414e-05, + "loss": 1.322, + "step": 54890 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630476312011863e-05, + "loss": 1.1432, + "step": 54891 + }, + { + "epoch": 0.05, + "learning_rate": 4.963046285673645e-05, + "loss": 1.1034, + "step": 54892 + }, + { + "epoch": 0.05, + "learning_rate": 4.96304494012179e-05, + "loss": 0.4197, + "step": 54893 + }, + { + "epoch": 0.05, + "learning_rate": 4.96304359454562e-05, + "loss": 0.3784, + "step": 54894 + }, + { + "epoch": 0.05, + "learning_rate": 4.963042248945135e-05, + "loss": 0.9776, + "step": 54895 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630409033203376e-05, + "loss": 1.1977, + "step": 54896 + }, + { + "epoch": 0.05, + "learning_rate": 4.963039557671226e-05, + "loss": 0.9252, + "step": 54897 + }, + { + "epoch": 0.05, + "learning_rate": 4.963038211997799e-05, + "loss": 0.8458, + "step": 54898 + }, + { + "epoch": 0.05, + "learning_rate": 4.963036866300059e-05, + "loss": 0.7553, + "step": 54899 + }, + { + "epoch": 0.05, + "learning_rate": 4.963035520578005e-05, + "loss": 0.7999, + "step": 54900 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630341748316363e-05, + "loss": 0.9601, + "step": 54901 + }, + { + "epoch": 0.05, + "learning_rate": 4.963032829060954e-05, + "loss": 1.0361, + "step": 54902 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630314832659574e-05, + "loss": 1.5558, + "step": 54903 + }, + { + "epoch": 0.05, + "learning_rate": 4.963030137446647e-05, + "loss": 1.1147, + "step": 54904 + }, + { + "epoch": 0.05, + "learning_rate": 4.963028791603023e-05, + "loss": 1.2208, + "step": 54905 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630274457350846e-05, + "loss": 0.9455, + "step": 54906 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630260998428325e-05, + "loss": 0.8504, + "step": 54907 + }, + { + "epoch": 0.05, + "learning_rate": 4.963024753926266e-05, + "loss": 1.1936, + "step": 54908 + }, + { + "epoch": 0.05, + "learning_rate": 4.963023407985385e-05, + "loss": 0.5659, + "step": 54909 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630220620201914e-05, + "loss": 0.8343, + "step": 54910 + }, + { + "epoch": 0.05, + "learning_rate": 4.963020716030683e-05, + "loss": 0.9691, + "step": 54911 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630193700168614e-05, + "loss": 0.8832, + "step": 54912 + }, + { + "epoch": 0.05, + "learning_rate": 4.963018023978726e-05, + "loss": 1.0355, + "step": 54913 + }, + { + "epoch": 0.05, + "learning_rate": 4.963016677916276e-05, + "loss": 1.2375, + "step": 54914 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630153318295116e-05, + "loss": 0.8989, + "step": 54915 + }, + { + "epoch": 0.05, + "learning_rate": 4.9630139857184343e-05, + "loss": 1.1177, + "step": 54916 + }, + { + "epoch": 0.05, + "learning_rate": 4.963012639583043e-05, + "loss": 1.5479, + "step": 54917 + }, + { + "epoch": 0.05, + "learning_rate": 4.963011293423338e-05, + "loss": 1.2447, + "step": 54918 + }, + { + "epoch": 0.05, + "learning_rate": 4.963009947239319e-05, + "loss": 1.0739, + "step": 54919 + }, + { + "epoch": 0.05, + "learning_rate": 4.963008601030986e-05, + "loss": 1.1828, + "step": 54920 + }, + { + "epoch": 0.05, + "learning_rate": 4.96300725479834e-05, + "loss": 1.2342, + "step": 54921 + }, + { + "epoch": 0.05, + "learning_rate": 4.963005908541379e-05, + "loss": 1.1931, + "step": 54922 + }, + { + "epoch": 0.05, + "learning_rate": 4.963004562260105e-05, + "loss": 0.9644, + "step": 54923 + }, + { + "epoch": 0.05, + "learning_rate": 4.963003215954517e-05, + "loss": 1.1622, + "step": 54924 + }, + { + "epoch": 0.05, + "learning_rate": 4.963001869624616e-05, + "loss": 0.5294, + "step": 54925 + }, + { + "epoch": 0.05, + "learning_rate": 4.963000523270401e-05, + "loss": 0.1391, + "step": 54926 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629991768918715e-05, + "loss": 0.0889, + "step": 54927 + }, + { + "epoch": 0.05, + "learning_rate": 4.962997830489029e-05, + "loss": 0.0857, + "step": 54928 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629964840618725e-05, + "loss": 0.0991, + "step": 54929 + }, + { + "epoch": 0.05, + "learning_rate": 4.962995137610403e-05, + "loss": 0.096, + "step": 54930 + }, + { + "epoch": 0.05, + "learning_rate": 4.962993791134619e-05, + "loss": 0.0838, + "step": 54931 + }, + { + "epoch": 0.05, + "learning_rate": 4.962992444634522e-05, + "loss": 0.0689, + "step": 54932 + }, + { + "epoch": 0.05, + "learning_rate": 4.962991098110111e-05, + "loss": 0.0731, + "step": 54933 + }, + { + "epoch": 0.05, + "learning_rate": 4.962989751561387e-05, + "loss": 0.06, + "step": 54934 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629884049883485e-05, + "loss": 0.0533, + "step": 54935 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629870583909964e-05, + "loss": 0.1349, + "step": 54936 + }, + { + "epoch": 0.05, + "learning_rate": 4.962985711769331e-05, + "loss": 0.2452, + "step": 54937 + }, + { + "epoch": 0.05, + "learning_rate": 4.962984365123353e-05, + "loss": 0.1617, + "step": 54938 + }, + { + "epoch": 0.05, + "learning_rate": 4.96298301845306e-05, + "loss": 0.1143, + "step": 54939 + }, + { + "epoch": 0.05, + "learning_rate": 4.962981671758454e-05, + "loss": 0.0531, + "step": 54940 + }, + { + "epoch": 0.05, + "learning_rate": 4.962980325039535e-05, + "loss": 0.0353, + "step": 54941 + }, + { + "epoch": 0.05, + "learning_rate": 4.962978978296302e-05, + "loss": 0.0641, + "step": 54942 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629776315287555e-05, + "loss": 0.0416, + "step": 54943 + }, + { + "epoch": 0.05, + "learning_rate": 4.962976284736896e-05, + "loss": 0.0479, + "step": 54944 + }, + { + "epoch": 0.05, + "learning_rate": 4.962974937920722e-05, + "loss": 0.9854, + "step": 54945 + }, + { + "epoch": 0.05, + "learning_rate": 4.962973591080236e-05, + "loss": 1.0945, + "step": 54946 + }, + { + "epoch": 0.05, + "learning_rate": 4.962972244215436e-05, + "loss": 1.0258, + "step": 54947 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629708973263224e-05, + "loss": 1.1578, + "step": 54948 + }, + { + "epoch": 0.05, + "learning_rate": 4.962969550412895e-05, + "loss": 1.5301, + "step": 54949 + }, + { + "epoch": 0.05, + "learning_rate": 4.962968203475155e-05, + "loss": 1.0276, + "step": 54950 + }, + { + "epoch": 0.05, + "learning_rate": 4.962966856513101e-05, + "loss": 1.6222, + "step": 54951 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629655095267334e-05, + "loss": 1.0555, + "step": 54952 + }, + { + "epoch": 0.05, + "learning_rate": 4.962964162516053e-05, + "loss": 1.1516, + "step": 54953 + }, + { + "epoch": 0.05, + "learning_rate": 4.962962815481059e-05, + "loss": 1.2658, + "step": 54954 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629614684217516e-05, + "loss": 1.3094, + "step": 54955 + }, + { + "epoch": 0.05, + "learning_rate": 4.962960121338131e-05, + "loss": 1.2123, + "step": 54956 + }, + { + "epoch": 0.05, + "learning_rate": 4.962958774230197e-05, + "loss": 1.0154, + "step": 54957 + }, + { + "epoch": 0.05, + "learning_rate": 4.96295742709795e-05, + "loss": 1.2227, + "step": 54958 + }, + { + "epoch": 0.05, + "learning_rate": 4.96295607994139e-05, + "loss": 1.0158, + "step": 54959 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629547327605156e-05, + "loss": 1.6547, + "step": 54960 + }, + { + "epoch": 0.05, + "learning_rate": 4.962953385555329e-05, + "loss": 0.9459, + "step": 54961 + }, + { + "epoch": 0.05, + "learning_rate": 4.962952038325829e-05, + "loss": 1.2667, + "step": 54962 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629506910720155e-05, + "loss": 1.1804, + "step": 54963 + }, + { + "epoch": 0.05, + "learning_rate": 4.962949343793889e-05, + "loss": 1.391, + "step": 54964 + }, + { + "epoch": 0.05, + "learning_rate": 4.962947996491449e-05, + "loss": 1.4556, + "step": 54965 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629466491646956e-05, + "loss": 1.198, + "step": 54966 + }, + { + "epoch": 0.05, + "learning_rate": 4.96294530181363e-05, + "loss": 1.1983, + "step": 54967 + }, + { + "epoch": 0.05, + "learning_rate": 4.962943954438251e-05, + "loss": 1.2942, + "step": 54968 + }, + { + "epoch": 0.05, + "learning_rate": 4.962942607038558e-05, + "loss": 1.1763, + "step": 54969 + }, + { + "epoch": 0.05, + "learning_rate": 4.962941259614552e-05, + "loss": 1.1828, + "step": 54970 + }, + { + "epoch": 0.05, + "learning_rate": 4.962939912166234e-05, + "loss": 1.2043, + "step": 54971 + }, + { + "epoch": 0.05, + "learning_rate": 4.962938564693602e-05, + "loss": 0.8797, + "step": 54972 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629372171966567e-05, + "loss": 0.9645, + "step": 54973 + }, + { + "epoch": 0.05, + "learning_rate": 4.962935869675398e-05, + "loss": 0.8332, + "step": 54974 + }, + { + "epoch": 0.05, + "learning_rate": 4.962934522129827e-05, + "loss": 0.5845, + "step": 54975 + }, + { + "epoch": 0.05, + "learning_rate": 4.962933174559943e-05, + "loss": 0.369, + "step": 54976 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629318269657464e-05, + "loss": 0.9471, + "step": 54977 + }, + { + "epoch": 0.05, + "learning_rate": 4.962930479347236e-05, + "loss": 1.3995, + "step": 54978 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629291317044125e-05, + "loss": 1.1406, + "step": 54979 + }, + { + "epoch": 0.05, + "learning_rate": 4.962927784037277e-05, + "loss": 1.2293, + "step": 54980 + }, + { + "epoch": 0.05, + "learning_rate": 4.962926436345827e-05, + "loss": 1.5125, + "step": 54981 + }, + { + "epoch": 0.05, + "learning_rate": 4.962925088630065e-05, + "loss": 0.5801, + "step": 54982 + }, + { + "epoch": 0.05, + "learning_rate": 4.96292374088999e-05, + "loss": 1.3338, + "step": 54983 + }, + { + "epoch": 0.05, + "learning_rate": 4.962922393125602e-05, + "loss": 1.1519, + "step": 54984 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629210453369015e-05, + "loss": 1.032, + "step": 54985 + }, + { + "epoch": 0.05, + "learning_rate": 4.962919697523887e-05, + "loss": 1.1785, + "step": 54986 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629183496865606e-05, + "loss": 1.1965, + "step": 54987 + }, + { + "epoch": 0.05, + "learning_rate": 4.96291700182492e-05, + "loss": 0.9652, + "step": 54988 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629156539389675e-05, + "loss": 1.1084, + "step": 54989 + }, + { + "epoch": 0.05, + "learning_rate": 4.962914306028703e-05, + "loss": 0.987, + "step": 54990 + }, + { + "epoch": 0.05, + "learning_rate": 4.962912958094124e-05, + "loss": 1.0694, + "step": 54991 + }, + { + "epoch": 0.05, + "learning_rate": 4.962911610135233e-05, + "loss": 0.8405, + "step": 54992 + }, + { + "epoch": 0.05, + "learning_rate": 4.962910262152029e-05, + "loss": 0.8536, + "step": 54993 + }, + { + "epoch": 0.05, + "learning_rate": 4.962908914144512e-05, + "loss": 0.7437, + "step": 54994 + }, + { + "epoch": 0.05, + "learning_rate": 4.962907566112683e-05, + "loss": 1.0608, + "step": 54995 + }, + { + "epoch": 0.05, + "learning_rate": 4.96290621805654e-05, + "loss": 1.3794, + "step": 54996 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629048699760847e-05, + "loss": 1.0758, + "step": 54997 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629035218713175e-05, + "loss": 0.7106, + "step": 54998 + }, + { + "epoch": 0.05, + "learning_rate": 4.9629021737422364e-05, + "loss": 1.1511, + "step": 54999 + }, + { + "epoch": 0.06, + "learning_rate": 4.9629008255888434e-05, + "loss": 1.4025, + "step": 55000 + }, + { + "epoch": 0.06, + "eval_loss": 1.07865571975708, + "eval_runtime": 125.5474, + "eval_samples_per_second": 11.032, + "eval_steps_per_second": 5.52, + "step": 55000 } ], "max_steps": 1000000, "num_train_epochs": 9223372036854775807, - "total_flos": 7.5185365450752e+17, + "total_flos": 2.00494307868672e+18, "trial_name": null, "trial_params": null }