{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.981977220475146, "eval_steps": 1000, "global_step": 37500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005285272587933722, "grad_norm": 677.4678344726562, "learning_rate": 1.0570824524312897e-08, "loss": 30.8761, "step": 10 }, { "epoch": 0.0010570545175867445, "grad_norm": 893.767578125, "learning_rate": 3.171247357293869e-08, "loss": 31.1956, "step": 20 }, { "epoch": 0.0015855817763801168, "grad_norm": 461.0469665527344, "learning_rate": 5.8139534883720935e-08, "loss": 31.1699, "step": 30 }, { "epoch": 0.002114109035173489, "grad_norm": 1452.7264404296875, "learning_rate": 8.456659619450318e-08, "loss": 30.451, "step": 40 }, { "epoch": 0.002642636293966861, "grad_norm": 560.3501586914062, "learning_rate": 1.1099365750528543e-07, "loss": 31.3123, "step": 50 }, { "epoch": 0.0031711635527602337, "grad_norm": 1279.9322509765625, "learning_rate": 1.3742071881606765e-07, "loss": 30.8764, "step": 60 }, { "epoch": 0.003699690811553606, "grad_norm": 473.1219177246094, "learning_rate": 1.6384778012684993e-07, "loss": 30.2013, "step": 70 }, { "epoch": 0.004228218070346978, "grad_norm": 736.8867797851562, "learning_rate": 1.9027484143763215e-07, "loss": 30.483, "step": 80 }, { "epoch": 0.00475674532914035, "grad_norm": 468.40643310546875, "learning_rate": 2.1670190274841438e-07, "loss": 30.0637, "step": 90 }, { "epoch": 0.005285272587933722, "grad_norm": 456.3676452636719, "learning_rate": 2.431289640591966e-07, "loss": 29.1119, "step": 100 }, { "epoch": 0.005813799846727095, "grad_norm": 565.6411743164062, "learning_rate": 2.695560253699789e-07, "loss": 28.1476, "step": 110 }, { "epoch": 0.006342327105520467, "grad_norm": 877.3154296875, "learning_rate": 2.959830866807611e-07, "loss": 27.6713, "step": 120 }, { "epoch": 0.0068708543643138395, "grad_norm": 407.738037109375, "learning_rate": 3.224101479915433e-07, "loss": 26.9898, "step": 130 }, { "epoch": 0.007399381623107212, "grad_norm": 425.1963195800781, "learning_rate": 3.488372093023256e-07, "loss": 26.0919, "step": 140 }, { "epoch": 0.007927908881900585, "grad_norm": 438.06292724609375, "learning_rate": 3.752642706131079e-07, "loss": 25.3644, "step": 150 }, { "epoch": 0.008456436140693956, "grad_norm": 818.50048828125, "learning_rate": 3.990486257928119e-07, "loss": 24.8812, "step": 160 }, { "epoch": 0.008984963399487329, "grad_norm": 396.7209777832031, "learning_rate": 4.2547568710359414e-07, "loss": 24.4341, "step": 170 }, { "epoch": 0.0095134906582807, "grad_norm": 294.7137451171875, "learning_rate": 4.5190274841437633e-07, "loss": 23.9351, "step": 180 }, { "epoch": 0.010042017917074073, "grad_norm": 501.5253601074219, "learning_rate": 4.783298097251586e-07, "loss": 23.4529, "step": 190 }, { "epoch": 0.010570545175867445, "grad_norm": 246.00775146484375, "learning_rate": 5.047568710359408e-07, "loss": 22.4701, "step": 200 }, { "epoch": 0.011099072434660818, "grad_norm": 402.6690979003906, "learning_rate": 5.285412262156449e-07, "loss": 22.3146, "step": 210 }, { "epoch": 0.01162759969345419, "grad_norm": 271.7874450683594, "learning_rate": 5.549682875264271e-07, "loss": 21.4161, "step": 220 }, { "epoch": 0.012156126952247562, "grad_norm": 295.98077392578125, "learning_rate": 5.813953488372094e-07, "loss": 20.5646, "step": 230 }, { "epoch": 0.012684654211040935, "grad_norm": 425.95257568359375, "learning_rate": 6.078224101479916e-07, "loss": 20.5831, "step": 240 }, { "epoch": 0.013213181469834306, "grad_norm": 231.6118621826172, "learning_rate": 6.342494714587738e-07, "loss": 20.0313, "step": 250 }, { "epoch": 0.013741708728627679, "grad_norm": 296.9624328613281, "learning_rate": 6.606765327695561e-07, "loss": 18.9579, "step": 260 }, { "epoch": 0.014270235987421052, "grad_norm": 297.7179260253906, "learning_rate": 6.871035940803383e-07, "loss": 18.9185, "step": 270 }, { "epoch": 0.014798763246214423, "grad_norm": 747.8505249023438, "learning_rate": 7.135306553911206e-07, "loss": 18.4524, "step": 280 }, { "epoch": 0.015327290505007796, "grad_norm": 330.7308654785156, "learning_rate": 7.399577167019029e-07, "loss": 18.7075, "step": 290 }, { "epoch": 0.01585581776380117, "grad_norm": 269.0719909667969, "learning_rate": 7.66384778012685e-07, "loss": 18.3567, "step": 300 }, { "epoch": 0.01638434502259454, "grad_norm": 211.5826873779297, "learning_rate": 7.928118393234673e-07, "loss": 18.2696, "step": 310 }, { "epoch": 0.016912872281387912, "grad_norm": 674.4915161132812, "learning_rate": 8.192389006342495e-07, "loss": 18.0022, "step": 320 }, { "epoch": 0.017441399540181285, "grad_norm": 209.72621154785156, "learning_rate": 8.456659619450318e-07, "loss": 17.8636, "step": 330 }, { "epoch": 0.017969926798974658, "grad_norm": 262.18719482421875, "learning_rate": 8.720930232558141e-07, "loss": 16.9672, "step": 340 }, { "epoch": 0.01849845405776803, "grad_norm": 235.2413787841797, "learning_rate": 8.985200845665963e-07, "loss": 16.8501, "step": 350 }, { "epoch": 0.0190269813165614, "grad_norm": 540.785400390625, "learning_rate": 9.249471458773786e-07, "loss": 16.8077, "step": 360 }, { "epoch": 0.019555508575354773, "grad_norm": 175.78623962402344, "learning_rate": 9.513742071881607e-07, "loss": 17.0842, "step": 370 }, { "epoch": 0.020084035834148146, "grad_norm": 243.97581481933594, "learning_rate": 9.77801268498943e-07, "loss": 16.9962, "step": 380 }, { "epoch": 0.02061256309294152, "grad_norm": 253.9279022216797, "learning_rate": 1.0042283298097252e-06, "loss": 16.3446, "step": 390 }, { "epoch": 0.02114109035173489, "grad_norm": 206.56703186035156, "learning_rate": 1.0306553911205076e-06, "loss": 16.4884, "step": 400 }, { "epoch": 0.021669617610528262, "grad_norm": 368.267578125, "learning_rate": 1.0570824524312898e-06, "loss": 15.9466, "step": 410 }, { "epoch": 0.022198144869321635, "grad_norm": 169.0613250732422, "learning_rate": 1.083509513742072e-06, "loss": 15.9074, "step": 420 }, { "epoch": 0.022726672128115008, "grad_norm": 165.09361267089844, "learning_rate": 1.1099365750528542e-06, "loss": 16.0784, "step": 430 }, { "epoch": 0.02325519938690838, "grad_norm": 148.66358947753906, "learning_rate": 1.1363636363636364e-06, "loss": 15.3321, "step": 440 }, { "epoch": 0.02378372664570175, "grad_norm": 171.5795135498047, "learning_rate": 1.1627906976744188e-06, "loss": 15.2305, "step": 450 }, { "epoch": 0.024312253904495124, "grad_norm": 230.301513671875, "learning_rate": 1.189217758985201e-06, "loss": 14.3984, "step": 460 }, { "epoch": 0.024840781163288497, "grad_norm": 750.974853515625, "learning_rate": 1.2156448202959832e-06, "loss": 12.9312, "step": 470 }, { "epoch": 0.02536930842208187, "grad_norm": 391.048828125, "learning_rate": 1.2420718816067654e-06, "loss": 11.9995, "step": 480 }, { "epoch": 0.025897835680875243, "grad_norm": 413.6315612792969, "learning_rate": 1.2684989429175476e-06, "loss": 10.5113, "step": 490 }, { "epoch": 0.026426362939668612, "grad_norm": 648.3087768554688, "learning_rate": 1.2949260042283298e-06, "loss": 10.3073, "step": 500 }, { "epoch": 0.026954890198461985, "grad_norm": 300.6764831542969, "learning_rate": 1.3213530655391122e-06, "loss": 9.8948, "step": 510 }, { "epoch": 0.027483417457255358, "grad_norm": 273.8295593261719, "learning_rate": 1.3477801268498944e-06, "loss": 9.5825, "step": 520 }, { "epoch": 0.02801194471604873, "grad_norm": 265.43939208984375, "learning_rate": 1.3742071881606766e-06, "loss": 9.7098, "step": 530 }, { "epoch": 0.028540471974842104, "grad_norm": 400.6189880371094, "learning_rate": 1.400634249471459e-06, "loss": 8.9668, "step": 540 }, { "epoch": 0.029068999233635474, "grad_norm": 428.2016906738281, "learning_rate": 1.4270613107822412e-06, "loss": 9.0851, "step": 550 }, { "epoch": 0.029597526492428847, "grad_norm": 585.582763671875, "learning_rate": 1.4534883720930234e-06, "loss": 8.5216, "step": 560 }, { "epoch": 0.03012605375122222, "grad_norm": 365.0558166503906, "learning_rate": 1.4799154334038058e-06, "loss": 8.4923, "step": 570 }, { "epoch": 0.030654581010015593, "grad_norm": 423.7528991699219, "learning_rate": 1.506342494714588e-06, "loss": 8.3583, "step": 580 }, { "epoch": 0.031183108268808962, "grad_norm": 377.3611145019531, "learning_rate": 1.53276955602537e-06, "loss": 8.0537, "step": 590 }, { "epoch": 0.03171163552760234, "grad_norm": 386.19049072265625, "learning_rate": 1.5591966173361522e-06, "loss": 8.9753, "step": 600 }, { "epoch": 0.03224016278639571, "grad_norm": 241.32601928710938, "learning_rate": 1.5856236786469346e-06, "loss": 7.9553, "step": 610 }, { "epoch": 0.03276869004518908, "grad_norm": 461.6118469238281, "learning_rate": 1.6120507399577168e-06, "loss": 7.8576, "step": 620 }, { "epoch": 0.033297217303982454, "grad_norm": 295.9068908691406, "learning_rate": 1.638477801268499e-06, "loss": 8.0445, "step": 630 }, { "epoch": 0.033825744562775824, "grad_norm": 290.42938232421875, "learning_rate": 1.6649048625792814e-06, "loss": 8.0918, "step": 640 }, { "epoch": 0.0343542718215692, "grad_norm": 202.0693359375, "learning_rate": 1.6913319238900636e-06, "loss": 7.3087, "step": 650 }, { "epoch": 0.03488279908036257, "grad_norm": 263.758056640625, "learning_rate": 1.7177589852008458e-06, "loss": 7.471, "step": 660 }, { "epoch": 0.03541132633915594, "grad_norm": 515.0106811523438, "learning_rate": 1.7441860465116282e-06, "loss": 7.5734, "step": 670 }, { "epoch": 0.035939853597949316, "grad_norm": 213.4090118408203, "learning_rate": 1.7706131078224104e-06, "loss": 7.2879, "step": 680 }, { "epoch": 0.036468380856742685, "grad_norm": 223.5814971923828, "learning_rate": 1.7970401691331926e-06, "loss": 6.819, "step": 690 }, { "epoch": 0.03699690811553606, "grad_norm": 297.4150085449219, "learning_rate": 1.823467230443975e-06, "loss": 6.876, "step": 700 }, { "epoch": 0.03752543537432943, "grad_norm": 345.94439697265625, "learning_rate": 1.8498942917547572e-06, "loss": 7.0304, "step": 710 }, { "epoch": 0.0380539626331228, "grad_norm": 364.8387145996094, "learning_rate": 1.8763213530655392e-06, "loss": 7.076, "step": 720 }, { "epoch": 0.03858248989191618, "grad_norm": 174.8155517578125, "learning_rate": 1.9027484143763214e-06, "loss": 6.8862, "step": 730 }, { "epoch": 0.03911101715070955, "grad_norm": 242.55648803710938, "learning_rate": 1.9291754756871036e-06, "loss": 7.0227, "step": 740 }, { "epoch": 0.03963954440950292, "grad_norm": 188.05819702148438, "learning_rate": 1.955602536997886e-06, "loss": 6.6415, "step": 750 }, { "epoch": 0.04016807166829629, "grad_norm": 184.78993225097656, "learning_rate": 1.9820295983086684e-06, "loss": 6.6127, "step": 760 }, { "epoch": 0.04069659892708966, "grad_norm": 171.94725036621094, "learning_rate": 2.0084566596194504e-06, "loss": 6.3599, "step": 770 }, { "epoch": 0.04122512618588304, "grad_norm": 238.50254821777344, "learning_rate": 2.0348837209302328e-06, "loss": 6.5083, "step": 780 }, { "epoch": 0.04175365344467641, "grad_norm": 197.2220001220703, "learning_rate": 2.061310782241015e-06, "loss": 6.9035, "step": 790 }, { "epoch": 0.04228218070346978, "grad_norm": 181.18223571777344, "learning_rate": 2.087737843551797e-06, "loss": 5.7746, "step": 800 }, { "epoch": 0.042810707962263155, "grad_norm": 295.3748779296875, "learning_rate": 2.1141649048625796e-06, "loss": 5.9542, "step": 810 }, { "epoch": 0.043339235221056524, "grad_norm": 216.48043823242188, "learning_rate": 2.140591966173362e-06, "loss": 5.9673, "step": 820 }, { "epoch": 0.0438677624798499, "grad_norm": 290.2593688964844, "learning_rate": 2.167019027484144e-06, "loss": 5.884, "step": 830 }, { "epoch": 0.04439628973864327, "grad_norm": 217.63153076171875, "learning_rate": 2.1934460887949264e-06, "loss": 5.9201, "step": 840 }, { "epoch": 0.04492481699743664, "grad_norm": 238.81053161621094, "learning_rate": 2.2198731501057084e-06, "loss": 6.1857, "step": 850 }, { "epoch": 0.045453344256230016, "grad_norm": 219.9297637939453, "learning_rate": 2.2463002114164908e-06, "loss": 6.1701, "step": 860 }, { "epoch": 0.045981871515023386, "grad_norm": 442.77294921875, "learning_rate": 2.2727272727272728e-06, "loss": 6.0735, "step": 870 }, { "epoch": 0.04651039877381676, "grad_norm": 192.03538513183594, "learning_rate": 2.299154334038055e-06, "loss": 5.5714, "step": 880 }, { "epoch": 0.04703892603261013, "grad_norm": 167.0824432373047, "learning_rate": 2.3255813953488376e-06, "loss": 5.2074, "step": 890 }, { "epoch": 0.0475674532914035, "grad_norm": 191.635986328125, "learning_rate": 2.3520084566596196e-06, "loss": 5.4976, "step": 900 }, { "epoch": 0.04809598055019688, "grad_norm": 223.22190856933594, "learning_rate": 2.378435517970402e-06, "loss": 5.6967, "step": 910 }, { "epoch": 0.04862450780899025, "grad_norm": 166.43954467773438, "learning_rate": 2.404862579281184e-06, "loss": 5.5235, "step": 920 }, { "epoch": 0.049153035067783624, "grad_norm": 170.29635620117188, "learning_rate": 2.4312896405919664e-06, "loss": 5.7587, "step": 930 }, { "epoch": 0.04968156232657699, "grad_norm": 183.119384765625, "learning_rate": 2.4577167019027488e-06, "loss": 5.5175, "step": 940 }, { "epoch": 0.05021008958537036, "grad_norm": 152.91331481933594, "learning_rate": 2.4841437632135308e-06, "loss": 6.021, "step": 950 }, { "epoch": 0.05073861684416374, "grad_norm": 160.9178924560547, "learning_rate": 2.510570824524313e-06, "loss": 5.3513, "step": 960 }, { "epoch": 0.05126714410295711, "grad_norm": 181.09780883789062, "learning_rate": 2.536997885835095e-06, "loss": 5.7045, "step": 970 }, { "epoch": 0.051795671361750485, "grad_norm": 193.8708953857422, "learning_rate": 2.5634249471458776e-06, "loss": 5.1968, "step": 980 }, { "epoch": 0.052324198620543855, "grad_norm": 177.3693084716797, "learning_rate": 2.5898520084566595e-06, "loss": 5.4299, "step": 990 }, { "epoch": 0.052852725879337224, "grad_norm": 153.60520935058594, "learning_rate": 2.6162790697674424e-06, "loss": 5.5637, "step": 1000 }, { "epoch": 0.052852725879337224, "eval_loss": 4.37191915512085, "eval_runtime": 37.3418, "eval_samples_per_second": 327.568, "eval_steps_per_second": 10.257, "eval_sts-dev_pearson_cosine": 0.7907550996216652, "eval_sts-dev_pearson_dot": 0.7787264096749501, "eval_sts-dev_pearson_euclidean": 0.7974871997180729, "eval_sts-dev_pearson_manhattan": 0.7988757373058686, "eval_sts-dev_pearson_max": 0.7988757373058686, "eval_sts-dev_spearman_cosine": 0.7937427871968019, "eval_sts-dev_spearman_dot": 0.7748548098747573, "eval_sts-dev_spearman_euclidean": 0.7913054353560678, "eval_sts-dev_spearman_manhattan": 0.7925317657655016, "eval_sts-dev_spearman_max": 0.7937427871968019, "step": 1000 }, { "epoch": 0.0533812531381306, "grad_norm": 199.5458526611328, "learning_rate": 2.6427061310782244e-06, "loss": 5.6765, "step": 1010 }, { "epoch": 0.05390978039692397, "grad_norm": 178.0149383544922, "learning_rate": 2.6691331923890068e-06, "loss": 4.9681, "step": 1020 }, { "epoch": 0.05443830765571735, "grad_norm": 185.1396026611328, "learning_rate": 2.6955602536997888e-06, "loss": 5.0406, "step": 1030 }, { "epoch": 0.054966834914510716, "grad_norm": 161.58509826660156, "learning_rate": 2.7219873150105707e-06, "loss": 5.5189, "step": 1040 }, { "epoch": 0.055495362173304086, "grad_norm": 177.19891357421875, "learning_rate": 2.748414376321353e-06, "loss": 5.0272, "step": 1050 }, { "epoch": 0.05602388943209746, "grad_norm": 153.0261993408203, "learning_rate": 2.774841437632135e-06, "loss": 5.1105, "step": 1060 }, { "epoch": 0.05655241669089083, "grad_norm": 149.83326721191406, "learning_rate": 2.801268498942918e-06, "loss": 5.1416, "step": 1070 }, { "epoch": 0.05708094394968421, "grad_norm": 148.05845642089844, "learning_rate": 2.8276955602537e-06, "loss": 4.7098, "step": 1080 }, { "epoch": 0.05760947120847758, "grad_norm": 131.86166381835938, "learning_rate": 2.8541226215644824e-06, "loss": 4.4913, "step": 1090 }, { "epoch": 0.05813799846727095, "grad_norm": 191.01324462890625, "learning_rate": 2.8805496828752644e-06, "loss": 4.9698, "step": 1100 }, { "epoch": 0.058666525726064324, "grad_norm": 195.1591339111328, "learning_rate": 2.9069767441860468e-06, "loss": 4.9357, "step": 1110 }, { "epoch": 0.05919505298485769, "grad_norm": 156.8426971435547, "learning_rate": 2.9334038054968287e-06, "loss": 4.5437, "step": 1120 }, { "epoch": 0.05972358024365106, "grad_norm": 189.21104431152344, "learning_rate": 2.9598308668076116e-06, "loss": 4.9405, "step": 1130 }, { "epoch": 0.06025210750244444, "grad_norm": 177.53219604492188, "learning_rate": 2.9862579281183936e-06, "loss": 4.7391, "step": 1140 }, { "epoch": 0.06078063476123781, "grad_norm": 173.1730194091797, "learning_rate": 3.012684989429176e-06, "loss": 4.8463, "step": 1150 }, { "epoch": 0.061309162020031185, "grad_norm": 154.30496215820312, "learning_rate": 3.039112050739958e-06, "loss": 5.0251, "step": 1160 }, { "epoch": 0.061837689278824555, "grad_norm": 148.533203125, "learning_rate": 3.06553911205074e-06, "loss": 4.9732, "step": 1170 }, { "epoch": 0.062366216537617924, "grad_norm": 182.97225952148438, "learning_rate": 3.0919661733615224e-06, "loss": 5.005, "step": 1180 }, { "epoch": 0.0628947437964113, "grad_norm": 126.25519561767578, "learning_rate": 3.1183932346723043e-06, "loss": 4.6122, "step": 1190 }, { "epoch": 0.06342327105520468, "grad_norm": 165.12728881835938, "learning_rate": 3.144820295983087e-06, "loss": 4.8709, "step": 1200 }, { "epoch": 0.06395179831399804, "grad_norm": 143.62265014648438, "learning_rate": 3.171247357293869e-06, "loss": 4.6791, "step": 1210 }, { "epoch": 0.06448032557279142, "grad_norm": 200.333251953125, "learning_rate": 3.1976744186046516e-06, "loss": 4.8574, "step": 1220 }, { "epoch": 0.06500885283158479, "grad_norm": 149.36807250976562, "learning_rate": 3.2241014799154335e-06, "loss": 4.2062, "step": 1230 }, { "epoch": 0.06553738009037816, "grad_norm": 127.79821014404297, "learning_rate": 3.250528541226216e-06, "loss": 4.5025, "step": 1240 }, { "epoch": 0.06606590734917153, "grad_norm": 174.46910095214844, "learning_rate": 3.276955602536998e-06, "loss": 4.7314, "step": 1250 }, { "epoch": 0.06659443460796491, "grad_norm": 178.1285400390625, "learning_rate": 3.3033826638477808e-06, "loss": 4.2632, "step": 1260 }, { "epoch": 0.06712296186675827, "grad_norm": 250.86181640625, "learning_rate": 3.3298097251585628e-06, "loss": 4.6604, "step": 1270 }, { "epoch": 0.06765148912555165, "grad_norm": 166.58538818359375, "learning_rate": 3.356236786469345e-06, "loss": 5.2568, "step": 1280 }, { "epoch": 0.06818001638434502, "grad_norm": 126.69271087646484, "learning_rate": 3.382663847780127e-06, "loss": 4.1808, "step": 1290 }, { "epoch": 0.0687085436431384, "grad_norm": 309.02728271484375, "learning_rate": 3.409090909090909e-06, "loss": 4.5191, "step": 1300 }, { "epoch": 0.06923707090193176, "grad_norm": 1807.111083984375, "learning_rate": 3.4355179704016915e-06, "loss": 4.6328, "step": 1310 }, { "epoch": 0.06976559816072514, "grad_norm": 183.28480529785156, "learning_rate": 3.4619450317124735e-06, "loss": 4.5478, "step": 1320 }, { "epoch": 0.07029412541951852, "grad_norm": 132.682861328125, "learning_rate": 3.4883720930232564e-06, "loss": 4.1463, "step": 1330 }, { "epoch": 0.07082265267831188, "grad_norm": 147.00880432128906, "learning_rate": 3.5147991543340384e-06, "loss": 4.2767, "step": 1340 }, { "epoch": 0.07135117993710526, "grad_norm": 142.31639099121094, "learning_rate": 3.5412262156448208e-06, "loss": 4.3664, "step": 1350 }, { "epoch": 0.07187970719589863, "grad_norm": 156.88763427734375, "learning_rate": 3.5676532769556027e-06, "loss": 4.2879, "step": 1360 }, { "epoch": 0.072408234454692, "grad_norm": 136.17166137695312, "learning_rate": 3.594080338266385e-06, "loss": 4.7577, "step": 1370 }, { "epoch": 0.07293676171348537, "grad_norm": 155.89089965820312, "learning_rate": 3.620507399577167e-06, "loss": 3.979, "step": 1380 }, { "epoch": 0.07346528897227875, "grad_norm": 521.5955810546875, "learning_rate": 3.64693446088795e-06, "loss": 5.3513, "step": 1390 }, { "epoch": 0.07399381623107212, "grad_norm": 152.02322387695312, "learning_rate": 3.673361522198732e-06, "loss": 4.5532, "step": 1400 }, { "epoch": 0.07452234348986549, "grad_norm": 402.0461120605469, "learning_rate": 3.6997885835095144e-06, "loss": 4.6752, "step": 1410 }, { "epoch": 0.07505087074865886, "grad_norm": 161.24819946289062, "learning_rate": 3.7262156448202964e-06, "loss": 4.1074, "step": 1420 }, { "epoch": 0.07557939800745224, "grad_norm": 145.4809112548828, "learning_rate": 3.7526427061310783e-06, "loss": 4.4917, "step": 1430 }, { "epoch": 0.0761079252662456, "grad_norm": 131.27987670898438, "learning_rate": 3.7790697674418607e-06, "loss": 4.2143, "step": 1440 }, { "epoch": 0.07663645252503898, "grad_norm": 148.03436279296875, "learning_rate": 3.8054968287526427e-06, "loss": 4.1935, "step": 1450 }, { "epoch": 0.07716497978383235, "grad_norm": 193.45318603515625, "learning_rate": 3.8319238900634256e-06, "loss": 4.1725, "step": 1460 }, { "epoch": 0.07769350704262572, "grad_norm": 192.2095489501953, "learning_rate": 3.858350951374207e-06, "loss": 4.2205, "step": 1470 }, { "epoch": 0.0782220343014191, "grad_norm": 219.00430297851562, "learning_rate": 3.8847780126849895e-06, "loss": 4.0035, "step": 1480 }, { "epoch": 0.07875056156021247, "grad_norm": 162.20899963378906, "learning_rate": 3.911205073995772e-06, "loss": 4.5897, "step": 1490 }, { "epoch": 0.07927908881900585, "grad_norm": 160.90748596191406, "learning_rate": 3.937632135306554e-06, "loss": 4.3531, "step": 1500 }, { "epoch": 0.07980761607779921, "grad_norm": 131.437255859375, "learning_rate": 3.964059196617337e-06, "loss": 4.2076, "step": 1510 }, { "epoch": 0.08033614333659259, "grad_norm": 146.01869201660156, "learning_rate": 3.990486257928119e-06, "loss": 4.4968, "step": 1520 }, { "epoch": 0.08086467059538596, "grad_norm": 145.6388702392578, "learning_rate": 4.016913319238901e-06, "loss": 3.8403, "step": 1530 }, { "epoch": 0.08139319785417932, "grad_norm": 148.95050048828125, "learning_rate": 4.043340380549683e-06, "loss": 4.273, "step": 1540 }, { "epoch": 0.0819217251129727, "grad_norm": 418.997802734375, "learning_rate": 4.0697674418604655e-06, "loss": 4.4002, "step": 1550 }, { "epoch": 0.08245025237176608, "grad_norm": 135.8203125, "learning_rate": 4.096194503171247e-06, "loss": 4.1589, "step": 1560 }, { "epoch": 0.08297877963055944, "grad_norm": 169.71337890625, "learning_rate": 4.12262156448203e-06, "loss": 4.032, "step": 1570 }, { "epoch": 0.08350730688935282, "grad_norm": 128.50816345214844, "learning_rate": 4.149048625792812e-06, "loss": 4.0008, "step": 1580 }, { "epoch": 0.0840358341481462, "grad_norm": 117.56497192382812, "learning_rate": 4.175475687103594e-06, "loss": 4.5793, "step": 1590 }, { "epoch": 0.08456436140693956, "grad_norm": 139.8838653564453, "learning_rate": 4.201902748414377e-06, "loss": 3.8702, "step": 1600 }, { "epoch": 0.08509288866573293, "grad_norm": 119.64447021484375, "learning_rate": 4.228329809725159e-06, "loss": 4.034, "step": 1610 }, { "epoch": 0.08562141592452631, "grad_norm": 184.14707946777344, "learning_rate": 4.254756871035941e-06, "loss": 4.1288, "step": 1620 }, { "epoch": 0.08614994318331969, "grad_norm": 99.81002807617188, "learning_rate": 4.281183932346724e-06, "loss": 4.1309, "step": 1630 }, { "epoch": 0.08667847044211305, "grad_norm": 166.3089141845703, "learning_rate": 4.3076109936575055e-06, "loss": 3.8525, "step": 1640 }, { "epoch": 0.08720699770090642, "grad_norm": 142.16470336914062, "learning_rate": 4.334038054968288e-06, "loss": 3.8609, "step": 1650 }, { "epoch": 0.0877355249596998, "grad_norm": 247.35086059570312, "learning_rate": 4.36046511627907e-06, "loss": 3.9533, "step": 1660 }, { "epoch": 0.08826405221849316, "grad_norm": 189.7833709716797, "learning_rate": 4.386892177589853e-06, "loss": 4.0777, "step": 1670 }, { "epoch": 0.08879257947728654, "grad_norm": 148.39865112304688, "learning_rate": 4.413319238900634e-06, "loss": 4.0823, "step": 1680 }, { "epoch": 0.08932110673607992, "grad_norm": 140.76446533203125, "learning_rate": 4.439746300211417e-06, "loss": 3.7431, "step": 1690 }, { "epoch": 0.08984963399487328, "grad_norm": 173.0960693359375, "learning_rate": 4.466173361522199e-06, "loss": 3.9882, "step": 1700 }, { "epoch": 0.09037816125366666, "grad_norm": 136.67481994628906, "learning_rate": 4.4926004228329815e-06, "loss": 4.5606, "step": 1710 }, { "epoch": 0.09090668851246003, "grad_norm": 123.93934631347656, "learning_rate": 4.519027484143764e-06, "loss": 4.1139, "step": 1720 }, { "epoch": 0.09143521577125341, "grad_norm": 127.24778747558594, "learning_rate": 4.5454545454545455e-06, "loss": 3.9865, "step": 1730 }, { "epoch": 0.09196374303004677, "grad_norm": 138.16421508789062, "learning_rate": 4.571881606765328e-06, "loss": 3.986, "step": 1740 }, { "epoch": 0.09249227028884015, "grad_norm": 124.25382232666016, "learning_rate": 4.59830866807611e-06, "loss": 3.8682, "step": 1750 }, { "epoch": 0.09302079754763352, "grad_norm": 118.60977172851562, "learning_rate": 4.624735729386893e-06, "loss": 3.9832, "step": 1760 }, { "epoch": 0.09354932480642689, "grad_norm": 144.16261291503906, "learning_rate": 4.651162790697675e-06, "loss": 4.0979, "step": 1770 }, { "epoch": 0.09407785206522026, "grad_norm": 106.15113067626953, "learning_rate": 4.6775898520084576e-06, "loss": 3.9455, "step": 1780 }, { "epoch": 0.09460637932401364, "grad_norm": 147.02496337890625, "learning_rate": 4.704016913319239e-06, "loss": 3.4984, "step": 1790 }, { "epoch": 0.095134906582807, "grad_norm": 98.03705596923828, "learning_rate": 4.7304439746300215e-06, "loss": 3.9438, "step": 1800 }, { "epoch": 0.09566343384160038, "grad_norm": 106.04286193847656, "learning_rate": 4.756871035940804e-06, "loss": 4.2493, "step": 1810 }, { "epoch": 0.09619196110039376, "grad_norm": 117.45946502685547, "learning_rate": 4.7832980972515855e-06, "loss": 4.2974, "step": 1820 }, { "epoch": 0.09672048835918713, "grad_norm": 147.55563354492188, "learning_rate": 4.809725158562368e-06, "loss": 3.9228, "step": 1830 }, { "epoch": 0.0972490156179805, "grad_norm": 131.4012451171875, "learning_rate": 4.83615221987315e-06, "loss": 3.392, "step": 1840 }, { "epoch": 0.09777754287677387, "grad_norm": 108.57630920410156, "learning_rate": 4.862579281183933e-06, "loss": 3.5158, "step": 1850 }, { "epoch": 0.09830607013556725, "grad_norm": 189.4512481689453, "learning_rate": 4.889006342494715e-06, "loss": 3.5725, "step": 1860 }, { "epoch": 0.09883459739436061, "grad_norm": 155.00392150878906, "learning_rate": 4.9154334038054975e-06, "loss": 3.9951, "step": 1870 }, { "epoch": 0.09936312465315399, "grad_norm": 115.60594940185547, "learning_rate": 4.941860465116279e-06, "loss": 3.5366, "step": 1880 }, { "epoch": 0.09989165191194736, "grad_norm": 179.31591796875, "learning_rate": 4.9682875264270615e-06, "loss": 3.7325, "step": 1890 }, { "epoch": 0.10042017917074073, "grad_norm": 130.13360595703125, "learning_rate": 4.994714587737844e-06, "loss": 3.9996, "step": 1900 }, { "epoch": 0.1009487064295341, "grad_norm": 116.45214080810547, "learning_rate": 5e-06, "loss": 3.5501, "step": 1910 }, { "epoch": 0.10147723368832748, "grad_norm": 118.97759246826172, "learning_rate": 5e-06, "loss": 3.6127, "step": 1920 }, { "epoch": 0.10200576094712084, "grad_norm": 102.94424438476562, "learning_rate": 5e-06, "loss": 3.5908, "step": 1930 }, { "epoch": 0.10253428820591422, "grad_norm": 122.46263122558594, "learning_rate": 5e-06, "loss": 3.7061, "step": 1940 }, { "epoch": 0.1030628154647076, "grad_norm": 124.36211395263672, "learning_rate": 5e-06, "loss": 3.6276, "step": 1950 }, { "epoch": 0.10359134272350097, "grad_norm": 133.5564727783203, "learning_rate": 5e-06, "loss": 4.1169, "step": 1960 }, { "epoch": 0.10411986998229433, "grad_norm": 113.18955993652344, "learning_rate": 5e-06, "loss": 3.8013, "step": 1970 }, { "epoch": 0.10464839724108771, "grad_norm": 138.8753204345703, "learning_rate": 5e-06, "loss": 3.7246, "step": 1980 }, { "epoch": 0.10517692449988109, "grad_norm": 116.16462707519531, "learning_rate": 5e-06, "loss": 4.193, "step": 1990 }, { "epoch": 0.10570545175867445, "grad_norm": 108.90361022949219, "learning_rate": 5e-06, "loss": 4.295, "step": 2000 }, { "epoch": 0.10570545175867445, "eval_loss": 3.0892982482910156, "eval_runtime": 42.8378, "eval_samples_per_second": 285.542, "eval_steps_per_second": 8.941, "eval_sts-dev_pearson_cosine": 0.8229705980343613, "eval_sts-dev_pearson_dot": 0.8127596449860095, "eval_sts-dev_pearson_euclidean": 0.834291153441455, "eval_sts-dev_pearson_manhattan": 0.8347120860335925, "eval_sts-dev_pearson_max": 0.8347120860335925, "eval_sts-dev_spearman_cosine": 0.8322087038494308, "eval_sts-dev_spearman_dot": 0.8148026205548127, "eval_sts-dev_spearman_euclidean": 0.8308379382854225, "eval_sts-dev_spearman_manhattan": 0.8312181471301788, "eval_sts-dev_spearman_max": 0.8322087038494308, "step": 2000 }, { "epoch": 0.10623397901746783, "grad_norm": 104.32476806640625, "learning_rate": 5e-06, "loss": 4.0312, "step": 2010 }, { "epoch": 0.1067625062762612, "grad_norm": 88.6232681274414, "learning_rate": 5e-06, "loss": 3.8994, "step": 2020 }, { "epoch": 0.10729103353505456, "grad_norm": 162.1048126220703, "learning_rate": 5e-06, "loss": 3.6237, "step": 2030 }, { "epoch": 0.10781956079384794, "grad_norm": 124.26639556884766, "learning_rate": 5e-06, "loss": 3.4146, "step": 2040 }, { "epoch": 0.10834808805264132, "grad_norm": 133.0275421142578, "learning_rate": 5e-06, "loss": 3.8146, "step": 2050 }, { "epoch": 0.1088766153114347, "grad_norm": 174.81475830078125, "learning_rate": 5e-06, "loss": 3.6511, "step": 2060 }, { "epoch": 0.10940514257022806, "grad_norm": 452.0158386230469, "learning_rate": 5e-06, "loss": 3.6276, "step": 2070 }, { "epoch": 0.10993366982902143, "grad_norm": 134.3980712890625, "learning_rate": 5e-06, "loss": 3.3791, "step": 2080 }, { "epoch": 0.11046219708781481, "grad_norm": 748.50439453125, "learning_rate": 5e-06, "loss": 4.2062, "step": 2090 }, { "epoch": 0.11099072434660817, "grad_norm": 121.74251556396484, "learning_rate": 5e-06, "loss": 3.9781, "step": 2100 }, { "epoch": 0.11151925160540155, "grad_norm": 262.08673095703125, "learning_rate": 5e-06, "loss": 3.3256, "step": 2110 }, { "epoch": 0.11204777886419492, "grad_norm": 196.1018524169922, "learning_rate": 5e-06, "loss": 3.526, "step": 2120 }, { "epoch": 0.11257630612298829, "grad_norm": 152.33229064941406, "learning_rate": 5e-06, "loss": 3.8106, "step": 2130 }, { "epoch": 0.11310483338178166, "grad_norm": 274.8190002441406, "learning_rate": 5e-06, "loss": 3.2524, "step": 2140 }, { "epoch": 0.11363336064057504, "grad_norm": 234.61090087890625, "learning_rate": 5e-06, "loss": 3.5077, "step": 2150 }, { "epoch": 0.11416188789936842, "grad_norm": 139.89031982421875, "learning_rate": 5e-06, "loss": 3.7765, "step": 2160 }, { "epoch": 0.11469041515816178, "grad_norm": 95.74369812011719, "learning_rate": 5e-06, "loss": 3.7529, "step": 2170 }, { "epoch": 0.11521894241695516, "grad_norm": 104.67826080322266, "learning_rate": 5e-06, "loss": 3.4596, "step": 2180 }, { "epoch": 0.11574746967574853, "grad_norm": 185.16998291015625, "learning_rate": 5e-06, "loss": 3.1843, "step": 2190 }, { "epoch": 0.1162759969345419, "grad_norm": 159.19680786132812, "learning_rate": 5e-06, "loss": 3.3945, "step": 2200 }, { "epoch": 0.11680452419333527, "grad_norm": 225.2294464111328, "learning_rate": 5e-06, "loss": 3.4831, "step": 2210 }, { "epoch": 0.11733305145212865, "grad_norm": 97.39671325683594, "learning_rate": 5e-06, "loss": 3.6178, "step": 2220 }, { "epoch": 0.11786157871092201, "grad_norm": 129.05470275878906, "learning_rate": 5e-06, "loss": 3.9299, "step": 2230 }, { "epoch": 0.11839010596971539, "grad_norm": 99.11614990234375, "learning_rate": 5e-06, "loss": 3.1327, "step": 2240 }, { "epoch": 0.11891863322850876, "grad_norm": 120.35791778564453, "learning_rate": 5e-06, "loss": 3.454, "step": 2250 }, { "epoch": 0.11944716048730213, "grad_norm": 145.4455108642578, "learning_rate": 5e-06, "loss": 3.2395, "step": 2260 }, { "epoch": 0.1199756877460955, "grad_norm": 118.87342071533203, "learning_rate": 5e-06, "loss": 3.2547, "step": 2270 }, { "epoch": 0.12050421500488888, "grad_norm": 111.57682037353516, "learning_rate": 5e-06, "loss": 4.3915, "step": 2280 }, { "epoch": 0.12103274226368226, "grad_norm": 94.49699401855469, "learning_rate": 5e-06, "loss": 3.7403, "step": 2290 }, { "epoch": 0.12156126952247562, "grad_norm": 410.17352294921875, "learning_rate": 5e-06, "loss": 3.2871, "step": 2300 }, { "epoch": 0.122089796781269, "grad_norm": 169.58985900878906, "learning_rate": 5e-06, "loss": 3.5985, "step": 2310 }, { "epoch": 0.12261832404006237, "grad_norm": 126.57696533203125, "learning_rate": 5e-06, "loss": 3.4529, "step": 2320 }, { "epoch": 0.12314685129885573, "grad_norm": 113.62972259521484, "learning_rate": 5e-06, "loss": 3.8486, "step": 2330 }, { "epoch": 0.12367537855764911, "grad_norm": 173.26112365722656, "learning_rate": 5e-06, "loss": 3.082, "step": 2340 }, { "epoch": 0.12420390581644249, "grad_norm": 107.14911651611328, "learning_rate": 5e-06, "loss": 3.3215, "step": 2350 }, { "epoch": 0.12473243307523585, "grad_norm": 117.90474700927734, "learning_rate": 5e-06, "loss": 3.4311, "step": 2360 }, { "epoch": 0.12526096033402923, "grad_norm": 100.70008087158203, "learning_rate": 5e-06, "loss": 3.1914, "step": 2370 }, { "epoch": 0.1257894875928226, "grad_norm": 125.84510040283203, "learning_rate": 5e-06, "loss": 3.3602, "step": 2380 }, { "epoch": 0.12631801485161598, "grad_norm": 104.65560913085938, "learning_rate": 5e-06, "loss": 3.5217, "step": 2390 }, { "epoch": 0.12684654211040935, "grad_norm": 225.65335083007812, "learning_rate": 5e-06, "loss": 3.2764, "step": 2400 }, { "epoch": 0.1273750693692027, "grad_norm": 156.574462890625, "learning_rate": 5e-06, "loss": 3.2842, "step": 2410 }, { "epoch": 0.12790359662799608, "grad_norm": 112.25650024414062, "learning_rate": 5e-06, "loss": 3.5077, "step": 2420 }, { "epoch": 0.12843212388678946, "grad_norm": 569.938232421875, "learning_rate": 5e-06, "loss": 3.5205, "step": 2430 }, { "epoch": 0.12896065114558283, "grad_norm": 110.05473327636719, "learning_rate": 5e-06, "loss": 4.0949, "step": 2440 }, { "epoch": 0.1294891784043762, "grad_norm": 103.79340362548828, "learning_rate": 5e-06, "loss": 3.4999, "step": 2450 }, { "epoch": 0.13001770566316959, "grad_norm": 133.9407958984375, "learning_rate": 5e-06, "loss": 4.2648, "step": 2460 }, { "epoch": 0.13054623292196296, "grad_norm": 123.05575561523438, "learning_rate": 5e-06, "loss": 3.8938, "step": 2470 }, { "epoch": 0.1310747601807563, "grad_norm": 118.81642150878906, "learning_rate": 5e-06, "loss": 3.4282, "step": 2480 }, { "epoch": 0.1316032874395497, "grad_norm": 200.04893493652344, "learning_rate": 5e-06, "loss": 3.07, "step": 2490 }, { "epoch": 0.13213181469834306, "grad_norm": 129.6864013671875, "learning_rate": 5e-06, "loss": 3.2797, "step": 2500 }, { "epoch": 0.13266034195713644, "grad_norm": 108.05059051513672, "learning_rate": 5e-06, "loss": 3.3628, "step": 2510 }, { "epoch": 0.13318886921592982, "grad_norm": 122.91266632080078, "learning_rate": 5e-06, "loss": 3.3319, "step": 2520 }, { "epoch": 0.1337173964747232, "grad_norm": 379.0265808105469, "learning_rate": 5e-06, "loss": 3.2682, "step": 2530 }, { "epoch": 0.13424592373351654, "grad_norm": 149.47569274902344, "learning_rate": 5e-06, "loss": 3.3463, "step": 2540 }, { "epoch": 0.13477445099230992, "grad_norm": 85.81983947753906, "learning_rate": 5e-06, "loss": 3.4556, "step": 2550 }, { "epoch": 0.1353029782511033, "grad_norm": 113.7278823852539, "learning_rate": 5e-06, "loss": 3.5043, "step": 2560 }, { "epoch": 0.13583150550989667, "grad_norm": 98.1441421508789, "learning_rate": 5e-06, "loss": 3.4814, "step": 2570 }, { "epoch": 0.13636003276869005, "grad_norm": 93.17894744873047, "learning_rate": 5e-06, "loss": 3.2423, "step": 2580 }, { "epoch": 0.13688856002748342, "grad_norm": 183.36965942382812, "learning_rate": 5e-06, "loss": 3.2511, "step": 2590 }, { "epoch": 0.1374170872862768, "grad_norm": 114.61785888671875, "learning_rate": 5e-06, "loss": 2.8796, "step": 2600 }, { "epoch": 0.13794561454507015, "grad_norm": 147.88072204589844, "learning_rate": 5e-06, "loss": 3.424, "step": 2610 }, { "epoch": 0.13847414180386353, "grad_norm": 178.03887939453125, "learning_rate": 5e-06, "loss": 3.369, "step": 2620 }, { "epoch": 0.1390026690626569, "grad_norm": 175.87086486816406, "learning_rate": 5e-06, "loss": 2.7698, "step": 2630 }, { "epoch": 0.13953119632145028, "grad_norm": 158.91262817382812, "learning_rate": 5e-06, "loss": 2.9491, "step": 2640 }, { "epoch": 0.14005972358024366, "grad_norm": 146.5417022705078, "learning_rate": 5e-06, "loss": 3.4251, "step": 2650 }, { "epoch": 0.14058825083903703, "grad_norm": 152.3367156982422, "learning_rate": 5e-06, "loss": 2.9474, "step": 2660 }, { "epoch": 0.1411167780978304, "grad_norm": 157.4917755126953, "learning_rate": 5e-06, "loss": 2.9178, "step": 2670 }, { "epoch": 0.14164530535662376, "grad_norm": 108.15939331054688, "learning_rate": 5e-06, "loss": 3.5167, "step": 2680 }, { "epoch": 0.14217383261541713, "grad_norm": 133.39602661132812, "learning_rate": 5e-06, "loss": 2.8399, "step": 2690 }, { "epoch": 0.1427023598742105, "grad_norm": 107.16201782226562, "learning_rate": 5e-06, "loss": 3.5662, "step": 2700 }, { "epoch": 0.1432308871330039, "grad_norm": 109.21529388427734, "learning_rate": 5e-06, "loss": 2.728, "step": 2710 }, { "epoch": 0.14375941439179726, "grad_norm": 83.75203704833984, "learning_rate": 5e-06, "loss": 2.982, "step": 2720 }, { "epoch": 0.14428794165059064, "grad_norm": 143.2152862548828, "learning_rate": 5e-06, "loss": 3.0574, "step": 2730 }, { "epoch": 0.144816468909384, "grad_norm": 113.67931365966797, "learning_rate": 5e-06, "loss": 2.6998, "step": 2740 }, { "epoch": 0.14534499616817737, "grad_norm": 110.7346420288086, "learning_rate": 5e-06, "loss": 3.0641, "step": 2750 }, { "epoch": 0.14587352342697074, "grad_norm": 88.32946014404297, "learning_rate": 5e-06, "loss": 2.6829, "step": 2760 }, { "epoch": 0.14640205068576412, "grad_norm": 110.84928894042969, "learning_rate": 5e-06, "loss": 3.1356, "step": 2770 }, { "epoch": 0.1469305779445575, "grad_norm": 104.03882598876953, "learning_rate": 5e-06, "loss": 3.3968, "step": 2780 }, { "epoch": 0.14745910520335087, "grad_norm": 111.13092803955078, "learning_rate": 5e-06, "loss": 3.1089, "step": 2790 }, { "epoch": 0.14798763246214425, "grad_norm": 115.56938171386719, "learning_rate": 5e-06, "loss": 3.0553, "step": 2800 }, { "epoch": 0.1485161597209376, "grad_norm": 120.62376403808594, "learning_rate": 5e-06, "loss": 3.4705, "step": 2810 }, { "epoch": 0.14904468697973097, "grad_norm": 111.65912628173828, "learning_rate": 5e-06, "loss": 2.819, "step": 2820 }, { "epoch": 0.14957321423852435, "grad_norm": 107.1757583618164, "learning_rate": 5e-06, "loss": 3.1859, "step": 2830 }, { "epoch": 0.15010174149731773, "grad_norm": 75.35289001464844, "learning_rate": 5e-06, "loss": 3.074, "step": 2840 }, { "epoch": 0.1506302687561111, "grad_norm": 96.39420318603516, "learning_rate": 5e-06, "loss": 3.5011, "step": 2850 }, { "epoch": 0.15115879601490448, "grad_norm": 126.04460906982422, "learning_rate": 5e-06, "loss": 3.568, "step": 2860 }, { "epoch": 0.15168732327369783, "grad_norm": 142.1207733154297, "learning_rate": 5e-06, "loss": 2.9143, "step": 2870 }, { "epoch": 0.1522158505324912, "grad_norm": 106.59545135498047, "learning_rate": 5e-06, "loss": 2.9493, "step": 2880 }, { "epoch": 0.15274437779128458, "grad_norm": 141.34963989257812, "learning_rate": 5e-06, "loss": 3.2193, "step": 2890 }, { "epoch": 0.15327290505007796, "grad_norm": 112.30767059326172, "learning_rate": 5e-06, "loss": 3.4707, "step": 2900 }, { "epoch": 0.15380143230887133, "grad_norm": 117.78746795654297, "learning_rate": 5e-06, "loss": 2.9435, "step": 2910 }, { "epoch": 0.1543299595676647, "grad_norm": 131.08436584472656, "learning_rate": 5e-06, "loss": 3.1377, "step": 2920 }, { "epoch": 0.15485848682645809, "grad_norm": 251.7745819091797, "learning_rate": 5e-06, "loss": 3.257, "step": 2930 }, { "epoch": 0.15538701408525143, "grad_norm": 116.6044692993164, "learning_rate": 5e-06, "loss": 3.5603, "step": 2940 }, { "epoch": 0.1559155413440448, "grad_norm": 134.99510192871094, "learning_rate": 5e-06, "loss": 3.3866, "step": 2950 }, { "epoch": 0.1564440686028382, "grad_norm": 146.512939453125, "learning_rate": 5e-06, "loss": 2.9176, "step": 2960 }, { "epoch": 0.15697259586163156, "grad_norm": 127.92869567871094, "learning_rate": 5e-06, "loss": 3.3868, "step": 2970 }, { "epoch": 0.15750112312042494, "grad_norm": 123.8846664428711, "learning_rate": 5e-06, "loss": 3.5115, "step": 2980 }, { "epoch": 0.15802965037921832, "grad_norm": 315.5433044433594, "learning_rate": 5e-06, "loss": 3.1473, "step": 2990 }, { "epoch": 0.1585581776380117, "grad_norm": 92.26111602783203, "learning_rate": 5e-06, "loss": 2.9326, "step": 3000 }, { "epoch": 0.1585581776380117, "eval_loss": 2.752798318862915, "eval_runtime": 45.1096, "eval_samples_per_second": 271.162, "eval_steps_per_second": 8.49, "eval_sts-dev_pearson_cosine": 0.8357136850916924, "eval_sts-dev_pearson_dot": 0.8184811922977646, "eval_sts-dev_pearson_euclidean": 0.837959718528368, "eval_sts-dev_pearson_manhattan": 0.8395386154626387, "eval_sts-dev_pearson_max": 0.8395386154626387, "eval_sts-dev_spearman_cosine": 0.8419318002886358, "eval_sts-dev_spearman_dot": 0.8169894873183425, "eval_sts-dev_spearman_euclidean": 0.8367968833378484, "eval_sts-dev_spearman_manhattan": 0.8384608355031593, "eval_sts-dev_spearman_max": 0.8419318002886358, "step": 3000 }, { "epoch": 0.15908670489680504, "grad_norm": 111.70408630371094, "learning_rate": 5e-06, "loss": 3.0363, "step": 3010 }, { "epoch": 0.15961523215559842, "grad_norm": 125.21070098876953, "learning_rate": 5e-06, "loss": 3.2108, "step": 3020 }, { "epoch": 0.1601437594143918, "grad_norm": 119.08252716064453, "learning_rate": 5e-06, "loss": 3.4492, "step": 3030 }, { "epoch": 0.16067228667318517, "grad_norm": 196.8905029296875, "learning_rate": 5e-06, "loss": 3.6485, "step": 3040 }, { "epoch": 0.16120081393197855, "grad_norm": 107.0430908203125, "learning_rate": 5e-06, "loss": 2.893, "step": 3050 }, { "epoch": 0.16172934119077192, "grad_norm": 107.47099304199219, "learning_rate": 5e-06, "loss": 3.4406, "step": 3060 }, { "epoch": 0.16225786844956527, "grad_norm": 83.6535873413086, "learning_rate": 5e-06, "loss": 3.2535, "step": 3070 }, { "epoch": 0.16278639570835865, "grad_norm": 102.46939086914062, "learning_rate": 5e-06, "loss": 2.6682, "step": 3080 }, { "epoch": 0.16331492296715203, "grad_norm": 100.44358825683594, "learning_rate": 5e-06, "loss": 2.8742, "step": 3090 }, { "epoch": 0.1638434502259454, "grad_norm": 163.17984008789062, "learning_rate": 5e-06, "loss": 3.3197, "step": 3100 }, { "epoch": 0.16437197748473878, "grad_norm": 100.51493835449219, "learning_rate": 5e-06, "loss": 3.1714, "step": 3110 }, { "epoch": 0.16490050474353216, "grad_norm": 91.7384262084961, "learning_rate": 5e-06, "loss": 3.5535, "step": 3120 }, { "epoch": 0.16542903200232553, "grad_norm": 94.91766357421875, "learning_rate": 5e-06, "loss": 3.0781, "step": 3130 }, { "epoch": 0.16595755926111888, "grad_norm": 137.36489868164062, "learning_rate": 5e-06, "loss": 3.162, "step": 3140 }, { "epoch": 0.16648608651991226, "grad_norm": 217.2371368408203, "learning_rate": 5e-06, "loss": 3.1818, "step": 3150 }, { "epoch": 0.16701461377870563, "grad_norm": 106.30720520019531, "learning_rate": 5e-06, "loss": 3.1401, "step": 3160 }, { "epoch": 0.167543141037499, "grad_norm": 116.82994842529297, "learning_rate": 5e-06, "loss": 3.277, "step": 3170 }, { "epoch": 0.1680716682962924, "grad_norm": 73.77325439453125, "learning_rate": 5e-06, "loss": 3.0623, "step": 3180 }, { "epoch": 0.16860019555508576, "grad_norm": 115.15567779541016, "learning_rate": 5e-06, "loss": 3.2008, "step": 3190 }, { "epoch": 0.1691287228138791, "grad_norm": 96.37894439697266, "learning_rate": 5e-06, "loss": 2.9205, "step": 3200 }, { "epoch": 0.1696572500726725, "grad_norm": 94.3653793334961, "learning_rate": 5e-06, "loss": 2.7414, "step": 3210 }, { "epoch": 0.17018577733146587, "grad_norm": 115.94802856445312, "learning_rate": 5e-06, "loss": 2.9346, "step": 3220 }, { "epoch": 0.17071430459025924, "grad_norm": 135.32847595214844, "learning_rate": 5e-06, "loss": 2.9595, "step": 3230 }, { "epoch": 0.17124283184905262, "grad_norm": 114.90149688720703, "learning_rate": 5e-06, "loss": 3.2256, "step": 3240 }, { "epoch": 0.171771359107846, "grad_norm": 104.90364837646484, "learning_rate": 5e-06, "loss": 3.69, "step": 3250 }, { "epoch": 0.17229988636663937, "grad_norm": 108.89354705810547, "learning_rate": 5e-06, "loss": 3.1443, "step": 3260 }, { "epoch": 0.17282841362543272, "grad_norm": 80.30792236328125, "learning_rate": 5e-06, "loss": 2.9954, "step": 3270 }, { "epoch": 0.1733569408842261, "grad_norm": 100.1319808959961, "learning_rate": 5e-06, "loss": 2.8085, "step": 3280 }, { "epoch": 0.17388546814301947, "grad_norm": 120.2499771118164, "learning_rate": 5e-06, "loss": 2.8288, "step": 3290 }, { "epoch": 0.17441399540181285, "grad_norm": 120.04457092285156, "learning_rate": 5e-06, "loss": 3.4209, "step": 3300 }, { "epoch": 0.17494252266060623, "grad_norm": 96.71575927734375, "learning_rate": 5e-06, "loss": 3.322, "step": 3310 }, { "epoch": 0.1754710499193996, "grad_norm": 158.1149139404297, "learning_rate": 5e-06, "loss": 3.0216, "step": 3320 }, { "epoch": 0.17599957717819298, "grad_norm": 183.33299255371094, "learning_rate": 5e-06, "loss": 3.3436, "step": 3330 }, { "epoch": 0.17652810443698633, "grad_norm": 98.19087982177734, "learning_rate": 5e-06, "loss": 2.5108, "step": 3340 }, { "epoch": 0.1770566316957797, "grad_norm": 115.44419860839844, "learning_rate": 5e-06, "loss": 2.8275, "step": 3350 }, { "epoch": 0.17758515895457308, "grad_norm": 91.81729125976562, "learning_rate": 5e-06, "loss": 3.3355, "step": 3360 }, { "epoch": 0.17811368621336646, "grad_norm": 133.90577697753906, "learning_rate": 5e-06, "loss": 3.2709, "step": 3370 }, { "epoch": 0.17864221347215983, "grad_norm": 162.4363250732422, "learning_rate": 5e-06, "loss": 3.1768, "step": 3380 }, { "epoch": 0.1791707407309532, "grad_norm": 99.245361328125, "learning_rate": 5e-06, "loss": 3.1812, "step": 3390 }, { "epoch": 0.17969926798974656, "grad_norm": 97.58031463623047, "learning_rate": 5e-06, "loss": 3.1249, "step": 3400 }, { "epoch": 0.18022779524853993, "grad_norm": 138.724365234375, "learning_rate": 5e-06, "loss": 3.0331, "step": 3410 }, { "epoch": 0.1807563225073333, "grad_norm": 111.52359008789062, "learning_rate": 5e-06, "loss": 2.8404, "step": 3420 }, { "epoch": 0.1812848497661267, "grad_norm": 124.68292236328125, "learning_rate": 5e-06, "loss": 3.0225, "step": 3430 }, { "epoch": 0.18181337702492006, "grad_norm": 114.96760559082031, "learning_rate": 5e-06, "loss": 3.3184, "step": 3440 }, { "epoch": 0.18234190428371344, "grad_norm": 152.0911865234375, "learning_rate": 5e-06, "loss": 2.8057, "step": 3450 }, { "epoch": 0.18287043154250682, "grad_norm": 106.23136138916016, "learning_rate": 5e-06, "loss": 3.0416, "step": 3460 }, { "epoch": 0.18339895880130017, "grad_norm": 96.04741668701172, "learning_rate": 5e-06, "loss": 3.0723, "step": 3470 }, { "epoch": 0.18392748606009354, "grad_norm": 96.21978759765625, "learning_rate": 5e-06, "loss": 3.4026, "step": 3480 }, { "epoch": 0.18445601331888692, "grad_norm": 142.70057678222656, "learning_rate": 5e-06, "loss": 2.9979, "step": 3490 }, { "epoch": 0.1849845405776803, "grad_norm": 113.43472290039062, "learning_rate": 5e-06, "loss": 3.443, "step": 3500 }, { "epoch": 0.18551306783647367, "grad_norm": 66.78421783447266, "learning_rate": 5e-06, "loss": 2.6448, "step": 3510 }, { "epoch": 0.18604159509526705, "grad_norm": 95.42284393310547, "learning_rate": 5e-06, "loss": 3.2442, "step": 3520 }, { "epoch": 0.1865701223540604, "grad_norm": 112.01078033447266, "learning_rate": 5e-06, "loss": 2.718, "step": 3530 }, { "epoch": 0.18709864961285377, "grad_norm": 93.84679412841797, "learning_rate": 5e-06, "loss": 3.102, "step": 3540 }, { "epoch": 0.18762717687164715, "grad_norm": 87.2450180053711, "learning_rate": 5e-06, "loss": 3.0568, "step": 3550 }, { "epoch": 0.18815570413044053, "grad_norm": 88.86810302734375, "learning_rate": 5e-06, "loss": 3.1704, "step": 3560 }, { "epoch": 0.1886842313892339, "grad_norm": 86.44938659667969, "learning_rate": 5e-06, "loss": 3.1248, "step": 3570 }, { "epoch": 0.18921275864802728, "grad_norm": 94.07122802734375, "learning_rate": 5e-06, "loss": 3.4115, "step": 3580 }, { "epoch": 0.18974128590682066, "grad_norm": 246.6625213623047, "learning_rate": 5e-06, "loss": 3.3282, "step": 3590 }, { "epoch": 0.190269813165614, "grad_norm": 143.3963623046875, "learning_rate": 5e-06, "loss": 3.0174, "step": 3600 }, { "epoch": 0.19079834042440738, "grad_norm": 253.36566162109375, "learning_rate": 5e-06, "loss": 3.1675, "step": 3610 }, { "epoch": 0.19132686768320076, "grad_norm": 91.88736724853516, "learning_rate": 5e-06, "loss": 2.969, "step": 3620 }, { "epoch": 0.19185539494199413, "grad_norm": 128.03514099121094, "learning_rate": 5e-06, "loss": 3.0357, "step": 3630 }, { "epoch": 0.1923839222007875, "grad_norm": 90.0180892944336, "learning_rate": 5e-06, "loss": 2.9588, "step": 3640 }, { "epoch": 0.1929124494595809, "grad_norm": 84.69573211669922, "learning_rate": 5e-06, "loss": 3.5595, "step": 3650 }, { "epoch": 0.19344097671837426, "grad_norm": 94.70064544677734, "learning_rate": 5e-06, "loss": 3.4269, "step": 3660 }, { "epoch": 0.1939695039771676, "grad_norm": 91.56018829345703, "learning_rate": 5e-06, "loss": 2.732, "step": 3670 }, { "epoch": 0.194498031235961, "grad_norm": 104.98668670654297, "learning_rate": 5e-06, "loss": 2.8213, "step": 3680 }, { "epoch": 0.19502655849475437, "grad_norm": 101.18280029296875, "learning_rate": 5e-06, "loss": 3.0934, "step": 3690 }, { "epoch": 0.19555508575354774, "grad_norm": 73.52476501464844, "learning_rate": 5e-06, "loss": 2.8047, "step": 3700 }, { "epoch": 0.19608361301234112, "grad_norm": 78.50372314453125, "learning_rate": 5e-06, "loss": 2.5503, "step": 3710 }, { "epoch": 0.1966121402711345, "grad_norm": 78.5093002319336, "learning_rate": 5e-06, "loss": 3.0994, "step": 3720 }, { "epoch": 0.19714066752992784, "grad_norm": 81.58222961425781, "learning_rate": 5e-06, "loss": 2.6705, "step": 3730 }, { "epoch": 0.19766919478872122, "grad_norm": 151.78358459472656, "learning_rate": 5e-06, "loss": 2.6332, "step": 3740 }, { "epoch": 0.1981977220475146, "grad_norm": 147.77880859375, "learning_rate": 5e-06, "loss": 3.1632, "step": 3750 }, { "epoch": 0.19872624930630797, "grad_norm": 91.11559295654297, "learning_rate": 5e-06, "loss": 2.7999, "step": 3760 }, { "epoch": 0.19925477656510135, "grad_norm": 1408.048828125, "learning_rate": 5e-06, "loss": 2.7271, "step": 3770 }, { "epoch": 0.19978330382389473, "grad_norm": 74.82400512695312, "learning_rate": 5e-06, "loss": 2.7626, "step": 3780 }, { "epoch": 0.2003118310826881, "grad_norm": 92.69935607910156, "learning_rate": 5e-06, "loss": 3.0003, "step": 3790 }, { "epoch": 0.20084035834148145, "grad_norm": 63.5579719543457, "learning_rate": 5e-06, "loss": 2.3595, "step": 3800 }, { "epoch": 0.20136888560027483, "grad_norm": 96.93280029296875, "learning_rate": 5e-06, "loss": 3.1453, "step": 3810 }, { "epoch": 0.2018974128590682, "grad_norm": 120.19187927246094, "learning_rate": 5e-06, "loss": 3.4523, "step": 3820 }, { "epoch": 0.20242594011786158, "grad_norm": 81.38937377929688, "learning_rate": 5e-06, "loss": 2.6247, "step": 3830 }, { "epoch": 0.20295446737665496, "grad_norm": 80.31893157958984, "learning_rate": 5e-06, "loss": 3.0611, "step": 3840 }, { "epoch": 0.20348299463544833, "grad_norm": 100.67012786865234, "learning_rate": 5e-06, "loss": 3.3603, "step": 3850 }, { "epoch": 0.20401152189424168, "grad_norm": 88.81230926513672, "learning_rate": 5e-06, "loss": 3.2233, "step": 3860 }, { "epoch": 0.20454004915303506, "grad_norm": 230.0200958251953, "learning_rate": 5e-06, "loss": 2.6191, "step": 3870 }, { "epoch": 0.20506857641182843, "grad_norm": 100.8106689453125, "learning_rate": 5e-06, "loss": 2.6844, "step": 3880 }, { "epoch": 0.2055971036706218, "grad_norm": 58.555419921875, "learning_rate": 5e-06, "loss": 2.8535, "step": 3890 }, { "epoch": 0.2061256309294152, "grad_norm": 101.62776947021484, "learning_rate": 5e-06, "loss": 3.0453, "step": 3900 }, { "epoch": 0.20665415818820856, "grad_norm": 93.81981658935547, "learning_rate": 5e-06, "loss": 2.756, "step": 3910 }, { "epoch": 0.20718268544700194, "grad_norm": 94.22981262207031, "learning_rate": 5e-06, "loss": 3.1061, "step": 3920 }, { "epoch": 0.2077112127057953, "grad_norm": 96.19477844238281, "learning_rate": 5e-06, "loss": 3.0669, "step": 3930 }, { "epoch": 0.20823973996458867, "grad_norm": 77.46731567382812, "learning_rate": 5e-06, "loss": 2.7907, "step": 3940 }, { "epoch": 0.20876826722338204, "grad_norm": 114.99488830566406, "learning_rate": 5e-06, "loss": 3.543, "step": 3950 }, { "epoch": 0.20929679448217542, "grad_norm": 81.93171691894531, "learning_rate": 5e-06, "loss": 2.637, "step": 3960 }, { "epoch": 0.2098253217409688, "grad_norm": 114.71746063232422, "learning_rate": 5e-06, "loss": 2.8429, "step": 3970 }, { "epoch": 0.21035384899976217, "grad_norm": 79.0142822265625, "learning_rate": 5e-06, "loss": 3.366, "step": 3980 }, { "epoch": 0.21088237625855555, "grad_norm": 83.37735748291016, "learning_rate": 5e-06, "loss": 2.928, "step": 3990 }, { "epoch": 0.2114109035173489, "grad_norm": 92.91641998291016, "learning_rate": 5e-06, "loss": 2.924, "step": 4000 }, { "epoch": 0.2114109035173489, "eval_loss": 2.5643086433410645, "eval_runtime": 43.711, "eval_samples_per_second": 279.838, "eval_steps_per_second": 8.762, "eval_sts-dev_pearson_cosine": 0.8339356393289103, "eval_sts-dev_pearson_dot": 0.8181027003123573, "eval_sts-dev_pearson_euclidean": 0.8387131254733715, "eval_sts-dev_pearson_manhattan": 0.8392322302102793, "eval_sts-dev_pearson_max": 0.8392322302102793, "eval_sts-dev_spearman_cosine": 0.8398784892175655, "eval_sts-dev_spearman_dot": 0.8196660164207918, "eval_sts-dev_spearman_euclidean": 0.8366956028836601, "eval_sts-dev_spearman_manhattan": 0.8374044566482887, "eval_sts-dev_spearman_max": 0.8398784892175655, "step": 4000 }, { "epoch": 0.21193943077614227, "grad_norm": 73.02286529541016, "learning_rate": 5e-06, "loss": 3.1176, "step": 4010 }, { "epoch": 0.21246795803493565, "grad_norm": 81.9727554321289, "learning_rate": 5e-06, "loss": 3.0559, "step": 4020 }, { "epoch": 0.21299648529372903, "grad_norm": 98.16416931152344, "learning_rate": 5e-06, "loss": 2.6467, "step": 4030 }, { "epoch": 0.2135250125525224, "grad_norm": 127.52660369873047, "learning_rate": 5e-06, "loss": 3.5661, "step": 4040 }, { "epoch": 0.21405353981131578, "grad_norm": 91.64215087890625, "learning_rate": 5e-06, "loss": 2.6231, "step": 4050 }, { "epoch": 0.21458206707010913, "grad_norm": 77.68624114990234, "learning_rate": 5e-06, "loss": 2.9249, "step": 4060 }, { "epoch": 0.2151105943289025, "grad_norm": 188.5897216796875, "learning_rate": 5e-06, "loss": 3.1276, "step": 4070 }, { "epoch": 0.21563912158769588, "grad_norm": 83.77557373046875, "learning_rate": 5e-06, "loss": 2.7969, "step": 4080 }, { "epoch": 0.21616764884648926, "grad_norm": 88.76850891113281, "learning_rate": 5e-06, "loss": 3.3618, "step": 4090 }, { "epoch": 0.21669617610528263, "grad_norm": 121.21629333496094, "learning_rate": 5e-06, "loss": 3.1227, "step": 4100 }, { "epoch": 0.217224703364076, "grad_norm": 114.9481201171875, "learning_rate": 5e-06, "loss": 2.7564, "step": 4110 }, { "epoch": 0.2177532306228694, "grad_norm": 109.53424072265625, "learning_rate": 5e-06, "loss": 2.9048, "step": 4120 }, { "epoch": 0.21828175788166274, "grad_norm": 129.43502807617188, "learning_rate": 5e-06, "loss": 2.8664, "step": 4130 }, { "epoch": 0.2188102851404561, "grad_norm": 108.19719696044922, "learning_rate": 5e-06, "loss": 2.285, "step": 4140 }, { "epoch": 0.2193388123992495, "grad_norm": 151.9497833251953, "learning_rate": 5e-06, "loss": 2.8941, "step": 4150 }, { "epoch": 0.21986733965804287, "grad_norm": 179.0300750732422, "learning_rate": 5e-06, "loss": 3.1457, "step": 4160 }, { "epoch": 0.22039586691683624, "grad_norm": 881.2627563476562, "learning_rate": 5e-06, "loss": 2.8739, "step": 4170 }, { "epoch": 0.22092439417562962, "grad_norm": 122.3038558959961, "learning_rate": 5e-06, "loss": 2.7301, "step": 4180 }, { "epoch": 0.22145292143442297, "grad_norm": 110.11743927001953, "learning_rate": 5e-06, "loss": 3.1653, "step": 4190 }, { "epoch": 0.22198144869321634, "grad_norm": 99.87207794189453, "learning_rate": 5e-06, "loss": 2.947, "step": 4200 }, { "epoch": 0.22250997595200972, "grad_norm": 122.35835266113281, "learning_rate": 5e-06, "loss": 2.7506, "step": 4210 }, { "epoch": 0.2230385032108031, "grad_norm": 256.5321044921875, "learning_rate": 5e-06, "loss": 2.612, "step": 4220 }, { "epoch": 0.22356703046959647, "grad_norm": 94.53744506835938, "learning_rate": 5e-06, "loss": 3.0018, "step": 4230 }, { "epoch": 0.22409555772838985, "grad_norm": 139.75704956054688, "learning_rate": 5e-06, "loss": 2.8655, "step": 4240 }, { "epoch": 0.22462408498718323, "grad_norm": 84.01942443847656, "learning_rate": 5e-06, "loss": 2.9952, "step": 4250 }, { "epoch": 0.22515261224597657, "grad_norm": 214.548095703125, "learning_rate": 5e-06, "loss": 2.8199, "step": 4260 }, { "epoch": 0.22568113950476995, "grad_norm": 188.50990295410156, "learning_rate": 5e-06, "loss": 2.8755, "step": 4270 }, { "epoch": 0.22620966676356333, "grad_norm": 112.91693878173828, "learning_rate": 5e-06, "loss": 2.7019, "step": 4280 }, { "epoch": 0.2267381940223567, "grad_norm": 104.03621673583984, "learning_rate": 5e-06, "loss": 2.4631, "step": 4290 }, { "epoch": 0.22726672128115008, "grad_norm": 104.20209503173828, "learning_rate": 5e-06, "loss": 2.6744, "step": 4300 }, { "epoch": 0.22779524853994346, "grad_norm": 84.93382263183594, "learning_rate": 5e-06, "loss": 2.7531, "step": 4310 }, { "epoch": 0.22832377579873683, "grad_norm": 101.13731384277344, "learning_rate": 5e-06, "loss": 2.919, "step": 4320 }, { "epoch": 0.22885230305753018, "grad_norm": 93.12877655029297, "learning_rate": 5e-06, "loss": 2.753, "step": 4330 }, { "epoch": 0.22938083031632356, "grad_norm": 107.59876251220703, "learning_rate": 5e-06, "loss": 2.7309, "step": 4340 }, { "epoch": 0.22990935757511693, "grad_norm": 70.52339935302734, "learning_rate": 5e-06, "loss": 2.8632, "step": 4350 }, { "epoch": 0.2304378848339103, "grad_norm": 98.61040496826172, "learning_rate": 5e-06, "loss": 3.2017, "step": 4360 }, { "epoch": 0.2309664120927037, "grad_norm": 127.12358093261719, "learning_rate": 5e-06, "loss": 3.0945, "step": 4370 }, { "epoch": 0.23149493935149706, "grad_norm": 120.17877960205078, "learning_rate": 5e-06, "loss": 2.487, "step": 4380 }, { "epoch": 0.2320234666102904, "grad_norm": 88.09298706054688, "learning_rate": 5e-06, "loss": 2.6603, "step": 4390 }, { "epoch": 0.2325519938690838, "grad_norm": 94.97872161865234, "learning_rate": 5e-06, "loss": 2.5706, "step": 4400 }, { "epoch": 0.23308052112787717, "grad_norm": 63.445831298828125, "learning_rate": 5e-06, "loss": 2.4932, "step": 4410 }, { "epoch": 0.23360904838667054, "grad_norm": 144.358642578125, "learning_rate": 5e-06, "loss": 2.7908, "step": 4420 }, { "epoch": 0.23413757564546392, "grad_norm": 112.5492935180664, "learning_rate": 5e-06, "loss": 2.3224, "step": 4430 }, { "epoch": 0.2346661029042573, "grad_norm": 158.5397186279297, "learning_rate": 5e-06, "loss": 2.5323, "step": 4440 }, { "epoch": 0.23519463016305067, "grad_norm": 126.06301879882812, "learning_rate": 5e-06, "loss": 2.5894, "step": 4450 }, { "epoch": 0.23572315742184402, "grad_norm": 84.34476470947266, "learning_rate": 5e-06, "loss": 2.8153, "step": 4460 }, { "epoch": 0.2362516846806374, "grad_norm": 114.47502899169922, "learning_rate": 5e-06, "loss": 2.7488, "step": 4470 }, { "epoch": 0.23678021193943077, "grad_norm": 173.07305908203125, "learning_rate": 5e-06, "loss": 2.9534, "step": 4480 }, { "epoch": 0.23730873919822415, "grad_norm": 102.05268096923828, "learning_rate": 5e-06, "loss": 2.2812, "step": 4490 }, { "epoch": 0.23783726645701753, "grad_norm": 624.3790893554688, "learning_rate": 5e-06, "loss": 2.9896, "step": 4500 }, { "epoch": 0.2383657937158109, "grad_norm": 56.9068489074707, "learning_rate": 5e-06, "loss": 2.6851, "step": 4510 }, { "epoch": 0.23889432097460425, "grad_norm": 81.36341857910156, "learning_rate": 5e-06, "loss": 2.9409, "step": 4520 }, { "epoch": 0.23942284823339763, "grad_norm": 95.2925033569336, "learning_rate": 5e-06, "loss": 2.1794, "step": 4530 }, { "epoch": 0.239951375492191, "grad_norm": 80.21482849121094, "learning_rate": 5e-06, "loss": 3.2101, "step": 4540 }, { "epoch": 0.24047990275098438, "grad_norm": 95.93926239013672, "learning_rate": 5e-06, "loss": 3.0564, "step": 4550 }, { "epoch": 0.24100843000977776, "grad_norm": 69.99913024902344, "learning_rate": 5e-06, "loss": 2.5361, "step": 4560 }, { "epoch": 0.24153695726857113, "grad_norm": 254.71417236328125, "learning_rate": 5e-06, "loss": 2.7892, "step": 4570 }, { "epoch": 0.2420654845273645, "grad_norm": 107.0210189819336, "learning_rate": 5e-06, "loss": 2.9003, "step": 4580 }, { "epoch": 0.24259401178615786, "grad_norm": 92.31237030029297, "learning_rate": 5e-06, "loss": 2.5414, "step": 4590 }, { "epoch": 0.24312253904495124, "grad_norm": 109.01123046875, "learning_rate": 5e-06, "loss": 2.7725, "step": 4600 }, { "epoch": 0.2436510663037446, "grad_norm": 122.09903717041016, "learning_rate": 5e-06, "loss": 2.6523, "step": 4610 }, { "epoch": 0.244179593562538, "grad_norm": 102.56592559814453, "learning_rate": 5e-06, "loss": 2.4346, "step": 4620 }, { "epoch": 0.24470812082133137, "grad_norm": 82.88028717041016, "learning_rate": 5e-06, "loss": 2.6336, "step": 4630 }, { "epoch": 0.24523664808012474, "grad_norm": 108.32744598388672, "learning_rate": 5e-06, "loss": 3.0876, "step": 4640 }, { "epoch": 0.24576517533891812, "grad_norm": 95.1960678100586, "learning_rate": 5e-06, "loss": 2.594, "step": 4650 }, { "epoch": 0.24629370259771147, "grad_norm": 93.37010192871094, "learning_rate": 5e-06, "loss": 2.4888, "step": 4660 }, { "epoch": 0.24682222985650484, "grad_norm": 75.72577667236328, "learning_rate": 5e-06, "loss": 2.3501, "step": 4670 }, { "epoch": 0.24735075711529822, "grad_norm": 119.9735107421875, "learning_rate": 5e-06, "loss": 2.9178, "step": 4680 }, { "epoch": 0.2478792843740916, "grad_norm": 98.96273803710938, "learning_rate": 5e-06, "loss": 2.6806, "step": 4690 }, { "epoch": 0.24840781163288497, "grad_norm": 80.29339599609375, "learning_rate": 5e-06, "loss": 2.8059, "step": 4700 }, { "epoch": 0.24893633889167835, "grad_norm": 76.34888458251953, "learning_rate": 5e-06, "loss": 3.0342, "step": 4710 }, { "epoch": 0.2494648661504717, "grad_norm": 193.9256591796875, "learning_rate": 5e-06, "loss": 2.6381, "step": 4720 }, { "epoch": 0.24999339340926507, "grad_norm": 71.06155395507812, "learning_rate": 5e-06, "loss": 2.3151, "step": 4730 }, { "epoch": 0.25052192066805845, "grad_norm": 166.44728088378906, "learning_rate": 5e-06, "loss": 2.7198, "step": 4740 }, { "epoch": 0.2510504479268518, "grad_norm": 102.7462387084961, "learning_rate": 5e-06, "loss": 2.8853, "step": 4750 }, { "epoch": 0.2515789751856452, "grad_norm": 104.12824249267578, "learning_rate": 5e-06, "loss": 2.9098, "step": 4760 }, { "epoch": 0.2521075024444386, "grad_norm": 86.12760162353516, "learning_rate": 5e-06, "loss": 2.7686, "step": 4770 }, { "epoch": 0.25263602970323196, "grad_norm": 102.1300277709961, "learning_rate": 5e-06, "loss": 2.7347, "step": 4780 }, { "epoch": 0.25316455696202533, "grad_norm": 102.68096923828125, "learning_rate": 5e-06, "loss": 2.902, "step": 4790 }, { "epoch": 0.2536930842208187, "grad_norm": 118.4320068359375, "learning_rate": 5e-06, "loss": 3.0568, "step": 4800 }, { "epoch": 0.2542216114796121, "grad_norm": 119.21646118164062, "learning_rate": 5e-06, "loss": 3.1041, "step": 4810 }, { "epoch": 0.2547501387384054, "grad_norm": 135.42930603027344, "learning_rate": 5e-06, "loss": 2.5608, "step": 4820 }, { "epoch": 0.2552786659971988, "grad_norm": 88.91123962402344, "learning_rate": 5e-06, "loss": 2.6677, "step": 4830 }, { "epoch": 0.25580719325599216, "grad_norm": 92.73365020751953, "learning_rate": 5e-06, "loss": 2.2374, "step": 4840 }, { "epoch": 0.25633572051478554, "grad_norm": 115.68316650390625, "learning_rate": 5e-06, "loss": 2.7194, "step": 4850 }, { "epoch": 0.2568642477735789, "grad_norm": 86.33030700683594, "learning_rate": 5e-06, "loss": 2.8062, "step": 4860 }, { "epoch": 0.2573927750323723, "grad_norm": 147.43869018554688, "learning_rate": 5e-06, "loss": 3.0287, "step": 4870 }, { "epoch": 0.25792130229116567, "grad_norm": 92.14517974853516, "learning_rate": 5e-06, "loss": 2.6583, "step": 4880 }, { "epoch": 0.25844982954995904, "grad_norm": 196.48507690429688, "learning_rate": 5e-06, "loss": 2.7806, "step": 4890 }, { "epoch": 0.2589783568087524, "grad_norm": 111.88870239257812, "learning_rate": 5e-06, "loss": 3.1375, "step": 4900 }, { "epoch": 0.2595068840675458, "grad_norm": 88.44340515136719, "learning_rate": 5e-06, "loss": 2.7002, "step": 4910 }, { "epoch": 0.26003541132633917, "grad_norm": 125.2267837524414, "learning_rate": 5e-06, "loss": 2.5742, "step": 4920 }, { "epoch": 0.26056393858513255, "grad_norm": 111.5179214477539, "learning_rate": 5e-06, "loss": 3.02, "step": 4930 }, { "epoch": 0.2610924658439259, "grad_norm": 80.34308624267578, "learning_rate": 5e-06, "loss": 2.8444, "step": 4940 }, { "epoch": 0.26162099310271925, "grad_norm": 74.80168151855469, "learning_rate": 5e-06, "loss": 2.9099, "step": 4950 }, { "epoch": 0.2621495203615126, "grad_norm": 145.09889221191406, "learning_rate": 5e-06, "loss": 3.0461, "step": 4960 }, { "epoch": 0.262678047620306, "grad_norm": 317.35150146484375, "learning_rate": 5e-06, "loss": 2.5498, "step": 4970 }, { "epoch": 0.2632065748790994, "grad_norm": 90.4616470336914, "learning_rate": 5e-06, "loss": 2.7803, "step": 4980 }, { "epoch": 0.26373510213789275, "grad_norm": 74.06134033203125, "learning_rate": 5e-06, "loss": 2.5259, "step": 4990 }, { "epoch": 0.26426362939668613, "grad_norm": 87.68196105957031, "learning_rate": 5e-06, "loss": 2.6191, "step": 5000 }, { "epoch": 0.26426362939668613, "eval_loss": 2.3854663372039795, "eval_runtime": 36.7577, "eval_samples_per_second": 332.773, "eval_steps_per_second": 10.42, "eval_sts-dev_pearson_cosine": 0.84195183417842, "eval_sts-dev_pearson_dot": 0.8238941774587112, "eval_sts-dev_pearson_euclidean": 0.8508250210988445, "eval_sts-dev_pearson_manhattan": 0.8508612846290811, "eval_sts-dev_pearson_max": 0.8508612846290811, "eval_sts-dev_spearman_cosine": 0.8471249933684765, "eval_sts-dev_spearman_dot": 0.8214928467571628, "eval_sts-dev_spearman_euclidean": 0.8495184412873017, "eval_sts-dev_spearman_manhattan": 0.8494924079216415, "eval_sts-dev_spearman_max": 0.8495184412873017, "step": 5000 }, { "epoch": 0.2647921566554795, "grad_norm": 89.82234191894531, "learning_rate": 5e-06, "loss": 2.6341, "step": 5010 }, { "epoch": 0.2653206839142729, "grad_norm": 69.3266372680664, "learning_rate": 5e-06, "loss": 2.4378, "step": 5020 }, { "epoch": 0.26584921117306626, "grad_norm": 77.19758605957031, "learning_rate": 5e-06, "loss": 2.1188, "step": 5030 }, { "epoch": 0.26637773843185963, "grad_norm": 95.12015533447266, "learning_rate": 5e-06, "loss": 3.2349, "step": 5040 }, { "epoch": 0.266906265690653, "grad_norm": 87.52239227294922, "learning_rate": 5e-06, "loss": 2.7849, "step": 5050 }, { "epoch": 0.2674347929494464, "grad_norm": 254.3031463623047, "learning_rate": 5e-06, "loss": 2.5527, "step": 5060 }, { "epoch": 0.26796332020823976, "grad_norm": 90.6421127319336, "learning_rate": 5e-06, "loss": 2.9946, "step": 5070 }, { "epoch": 0.2684918474670331, "grad_norm": 145.72593688964844, "learning_rate": 5e-06, "loss": 2.7248, "step": 5080 }, { "epoch": 0.26902037472582646, "grad_norm": 88.97372436523438, "learning_rate": 5e-06, "loss": 2.6047, "step": 5090 }, { "epoch": 0.26954890198461984, "grad_norm": 98.61135864257812, "learning_rate": 5e-06, "loss": 3.0224, "step": 5100 }, { "epoch": 0.2700774292434132, "grad_norm": 101.9147720336914, "learning_rate": 5e-06, "loss": 2.5305, "step": 5110 }, { "epoch": 0.2706059565022066, "grad_norm": 81.65401458740234, "learning_rate": 5e-06, "loss": 2.4172, "step": 5120 }, { "epoch": 0.27113448376099997, "grad_norm": 70.79377746582031, "learning_rate": 5e-06, "loss": 2.3133, "step": 5130 }, { "epoch": 0.27166301101979334, "grad_norm": 192.8549041748047, "learning_rate": 5e-06, "loss": 3.0929, "step": 5140 }, { "epoch": 0.2721915382785867, "grad_norm": 97.99180603027344, "learning_rate": 5e-06, "loss": 2.4147, "step": 5150 }, { "epoch": 0.2727200655373801, "grad_norm": 87.60978698730469, "learning_rate": 5e-06, "loss": 2.4361, "step": 5160 }, { "epoch": 0.2732485927961735, "grad_norm": 111.96373748779297, "learning_rate": 5e-06, "loss": 2.7362, "step": 5170 }, { "epoch": 0.27377712005496685, "grad_norm": 85.1805191040039, "learning_rate": 5e-06, "loss": 2.5511, "step": 5180 }, { "epoch": 0.2743056473137602, "grad_norm": 98.32715606689453, "learning_rate": 5e-06, "loss": 2.7759, "step": 5190 }, { "epoch": 0.2748341745725536, "grad_norm": 82.76212310791016, "learning_rate": 5e-06, "loss": 2.538, "step": 5200 }, { "epoch": 0.275362701831347, "grad_norm": 110.64933013916016, "learning_rate": 5e-06, "loss": 3.0784, "step": 5210 }, { "epoch": 0.2758912290901403, "grad_norm": 118.2285385131836, "learning_rate": 5e-06, "loss": 2.6996, "step": 5220 }, { "epoch": 0.2764197563489337, "grad_norm": 71.20365905761719, "learning_rate": 5e-06, "loss": 2.1167, "step": 5230 }, { "epoch": 0.27694828360772705, "grad_norm": 247.9803924560547, "learning_rate": 5e-06, "loss": 2.4574, "step": 5240 }, { "epoch": 0.27747681086652043, "grad_norm": 71.46477508544922, "learning_rate": 5e-06, "loss": 2.541, "step": 5250 }, { "epoch": 0.2780053381253138, "grad_norm": 86.62269592285156, "learning_rate": 5e-06, "loss": 2.7588, "step": 5260 }, { "epoch": 0.2785338653841072, "grad_norm": 88.04620361328125, "learning_rate": 5e-06, "loss": 2.209, "step": 5270 }, { "epoch": 0.27906239264290056, "grad_norm": 93.1932601928711, "learning_rate": 5e-06, "loss": 2.754, "step": 5280 }, { "epoch": 0.27959091990169394, "grad_norm": 94.66448974609375, "learning_rate": 5e-06, "loss": 2.3846, "step": 5290 }, { "epoch": 0.2801194471604873, "grad_norm": 121.12661743164062, "learning_rate": 5e-06, "loss": 2.3815, "step": 5300 }, { "epoch": 0.2806479744192807, "grad_norm": 88.9536361694336, "learning_rate": 5e-06, "loss": 2.3229, "step": 5310 }, { "epoch": 0.28117650167807406, "grad_norm": 85.5940170288086, "learning_rate": 5e-06, "loss": 2.5009, "step": 5320 }, { "epoch": 0.28170502893686744, "grad_norm": 60.09767150878906, "learning_rate": 5e-06, "loss": 2.8958, "step": 5330 }, { "epoch": 0.2822335561956608, "grad_norm": 76.6888198852539, "learning_rate": 5e-06, "loss": 2.4419, "step": 5340 }, { "epoch": 0.28276208345445414, "grad_norm": 109.612548828125, "learning_rate": 5e-06, "loss": 2.8386, "step": 5350 }, { "epoch": 0.2832906107132475, "grad_norm": 97.52584075927734, "learning_rate": 5e-06, "loss": 2.4213, "step": 5360 }, { "epoch": 0.2838191379720409, "grad_norm": 75.4255599975586, "learning_rate": 5e-06, "loss": 2.7308, "step": 5370 }, { "epoch": 0.28434766523083427, "grad_norm": 74.50464630126953, "learning_rate": 5e-06, "loss": 2.3171, "step": 5380 }, { "epoch": 0.28487619248962764, "grad_norm": 85.42974090576172, "learning_rate": 5e-06, "loss": 2.4222, "step": 5390 }, { "epoch": 0.285404719748421, "grad_norm": 90.30725860595703, "learning_rate": 5e-06, "loss": 2.5326, "step": 5400 }, { "epoch": 0.2859332470072144, "grad_norm": 53.945289611816406, "learning_rate": 5e-06, "loss": 2.1823, "step": 5410 }, { "epoch": 0.2864617742660078, "grad_norm": 77.42508697509766, "learning_rate": 5e-06, "loss": 2.6143, "step": 5420 }, { "epoch": 0.28699030152480115, "grad_norm": 71.2474365234375, "learning_rate": 5e-06, "loss": 2.5623, "step": 5430 }, { "epoch": 0.2875188287835945, "grad_norm": 106.04669952392578, "learning_rate": 5e-06, "loss": 2.6342, "step": 5440 }, { "epoch": 0.2880473560423879, "grad_norm": 93.29757690429688, "learning_rate": 5e-06, "loss": 2.9724, "step": 5450 }, { "epoch": 0.2885758833011813, "grad_norm": 70.6619644165039, "learning_rate": 5e-06, "loss": 2.3377, "step": 5460 }, { "epoch": 0.28910441055997466, "grad_norm": 79.80912780761719, "learning_rate": 5e-06, "loss": 2.6592, "step": 5470 }, { "epoch": 0.289632937818768, "grad_norm": 74.3382339477539, "learning_rate": 5e-06, "loss": 2.4686, "step": 5480 }, { "epoch": 0.29016146507756135, "grad_norm": 100.12771606445312, "learning_rate": 5e-06, "loss": 2.5508, "step": 5490 }, { "epoch": 0.29068999233635473, "grad_norm": 178.5901641845703, "learning_rate": 5e-06, "loss": 2.8819, "step": 5500 }, { "epoch": 0.2912185195951481, "grad_norm": 71.5240478515625, "learning_rate": 5e-06, "loss": 2.4642, "step": 5510 }, { "epoch": 0.2917470468539415, "grad_norm": 70.88338470458984, "learning_rate": 5e-06, "loss": 2.6385, "step": 5520 }, { "epoch": 0.29227557411273486, "grad_norm": 78.67906951904297, "learning_rate": 5e-06, "loss": 2.852, "step": 5530 }, { "epoch": 0.29280410137152824, "grad_norm": 75.11878967285156, "learning_rate": 5e-06, "loss": 2.4236, "step": 5540 }, { "epoch": 0.2933326286303216, "grad_norm": 79.11884307861328, "learning_rate": 5e-06, "loss": 2.4781, "step": 5550 }, { "epoch": 0.293861155889115, "grad_norm": 81.7442855834961, "learning_rate": 5e-06, "loss": 3.0607, "step": 5560 }, { "epoch": 0.29438968314790837, "grad_norm": 83.6771469116211, "learning_rate": 5e-06, "loss": 2.7634, "step": 5570 }, { "epoch": 0.29491821040670174, "grad_norm": 98.78929901123047, "learning_rate": 5e-06, "loss": 2.5003, "step": 5580 }, { "epoch": 0.2954467376654951, "grad_norm": 75.42288970947266, "learning_rate": 5e-06, "loss": 2.5548, "step": 5590 }, { "epoch": 0.2959752649242885, "grad_norm": 97.28291320800781, "learning_rate": 5e-06, "loss": 2.2414, "step": 5600 }, { "epoch": 0.2965037921830818, "grad_norm": 76.384521484375, "learning_rate": 5e-06, "loss": 2.1673, "step": 5610 }, { "epoch": 0.2970323194418752, "grad_norm": 78.2350845336914, "learning_rate": 5e-06, "loss": 2.6842, "step": 5620 }, { "epoch": 0.29756084670066857, "grad_norm": 84.8014907836914, "learning_rate": 5e-06, "loss": 2.3938, "step": 5630 }, { "epoch": 0.29808937395946195, "grad_norm": 88.27128601074219, "learning_rate": 5e-06, "loss": 2.9564, "step": 5640 }, { "epoch": 0.2986179012182553, "grad_norm": 84.72720336914062, "learning_rate": 5e-06, "loss": 2.2356, "step": 5650 }, { "epoch": 0.2991464284770487, "grad_norm": 86.19966125488281, "learning_rate": 5e-06, "loss": 3.3069, "step": 5660 }, { "epoch": 0.2996749557358421, "grad_norm": 103.31525421142578, "learning_rate": 5e-06, "loss": 2.3765, "step": 5670 }, { "epoch": 0.30020348299463545, "grad_norm": 57.145877838134766, "learning_rate": 5e-06, "loss": 2.3425, "step": 5680 }, { "epoch": 0.3007320102534288, "grad_norm": 93.1820068359375, "learning_rate": 5e-06, "loss": 2.6355, "step": 5690 }, { "epoch": 0.3012605375122222, "grad_norm": 92.56536102294922, "learning_rate": 5e-06, "loss": 2.5505, "step": 5700 }, { "epoch": 0.3017890647710156, "grad_norm": 130.28871154785156, "learning_rate": 5e-06, "loss": 2.676, "step": 5710 }, { "epoch": 0.30231759202980896, "grad_norm": 80.49274444580078, "learning_rate": 5e-06, "loss": 2.6175, "step": 5720 }, { "epoch": 0.30284611928860233, "grad_norm": 92.3204345703125, "learning_rate": 5e-06, "loss": 2.4518, "step": 5730 }, { "epoch": 0.30337464654739565, "grad_norm": 95.5674057006836, "learning_rate": 5e-06, "loss": 2.6808, "step": 5740 }, { "epoch": 0.30390317380618903, "grad_norm": 111.63196563720703, "learning_rate": 5e-06, "loss": 1.8947, "step": 5750 }, { "epoch": 0.3044317010649824, "grad_norm": 96.8458480834961, "learning_rate": 5e-06, "loss": 2.5959, "step": 5760 }, { "epoch": 0.3049602283237758, "grad_norm": 66.94209289550781, "learning_rate": 5e-06, "loss": 2.3175, "step": 5770 }, { "epoch": 0.30548875558256916, "grad_norm": 86.09745788574219, "learning_rate": 5e-06, "loss": 2.6486, "step": 5780 }, { "epoch": 0.30601728284136254, "grad_norm": 82.36959838867188, "learning_rate": 5e-06, "loss": 2.4147, "step": 5790 }, { "epoch": 0.3065458101001559, "grad_norm": 107.32633972167969, "learning_rate": 5e-06, "loss": 3.1598, "step": 5800 }, { "epoch": 0.3070743373589493, "grad_norm": 95.36216735839844, "learning_rate": 5e-06, "loss": 2.5192, "step": 5810 }, { "epoch": 0.30760286461774267, "grad_norm": 71.98739624023438, "learning_rate": 5e-06, "loss": 2.1094, "step": 5820 }, { "epoch": 0.30813139187653604, "grad_norm": 95.21013641357422, "learning_rate": 5e-06, "loss": 2.2515, "step": 5830 }, { "epoch": 0.3086599191353294, "grad_norm": 76.88362121582031, "learning_rate": 5e-06, "loss": 2.2107, "step": 5840 }, { "epoch": 0.3091884463941228, "grad_norm": 106.17201232910156, "learning_rate": 5e-06, "loss": 2.4194, "step": 5850 }, { "epoch": 0.30971697365291617, "grad_norm": 65.86492919921875, "learning_rate": 5e-06, "loss": 2.576, "step": 5860 }, { "epoch": 0.31024550091170955, "grad_norm": 76.17904663085938, "learning_rate": 5e-06, "loss": 2.5578, "step": 5870 }, { "epoch": 0.31077402817050287, "grad_norm": 89.75590515136719, "learning_rate": 5e-06, "loss": 2.4742, "step": 5880 }, { "epoch": 0.31130255542929625, "grad_norm": 82.94830322265625, "learning_rate": 5e-06, "loss": 2.3562, "step": 5890 }, { "epoch": 0.3118310826880896, "grad_norm": 88.11162567138672, "learning_rate": 5e-06, "loss": 2.44, "step": 5900 }, { "epoch": 0.312359609946883, "grad_norm": 93.87834930419922, "learning_rate": 5e-06, "loss": 2.5067, "step": 5910 }, { "epoch": 0.3128881372056764, "grad_norm": 70.6242904663086, "learning_rate": 5e-06, "loss": 2.6134, "step": 5920 }, { "epoch": 0.31341666446446975, "grad_norm": 79.70014953613281, "learning_rate": 5e-06, "loss": 2.5081, "step": 5930 }, { "epoch": 0.31394519172326313, "grad_norm": 122.92144012451172, "learning_rate": 5e-06, "loss": 2.2108, "step": 5940 }, { "epoch": 0.3144737189820565, "grad_norm": 92.35652160644531, "learning_rate": 5e-06, "loss": 2.5597, "step": 5950 }, { "epoch": 0.3150022462408499, "grad_norm": 99.51287841796875, "learning_rate": 5e-06, "loss": 2.5551, "step": 5960 }, { "epoch": 0.31553077349964326, "grad_norm": 92.60234069824219, "learning_rate": 5e-06, "loss": 2.9771, "step": 5970 }, { "epoch": 0.31605930075843663, "grad_norm": 61.02357482910156, "learning_rate": 5e-06, "loss": 2.8512, "step": 5980 }, { "epoch": 0.31658782801723, "grad_norm": 83.60948181152344, "learning_rate": 5e-06, "loss": 2.5188, "step": 5990 }, { "epoch": 0.3171163552760234, "grad_norm": 70.05110931396484, "learning_rate": 5e-06, "loss": 2.3515, "step": 6000 }, { "epoch": 0.3171163552760234, "eval_loss": 2.240429162979126, "eval_runtime": 42.0461, "eval_samples_per_second": 290.919, "eval_steps_per_second": 9.109, "eval_sts-dev_pearson_cosine": 0.8424791155610334, "eval_sts-dev_pearson_dot": 0.8231215920645936, "eval_sts-dev_pearson_euclidean": 0.8520790917638159, "eval_sts-dev_pearson_manhattan": 0.8518535087170254, "eval_sts-dev_pearson_max": 0.8520790917638159, "eval_sts-dev_spearman_cosine": 0.8499496900562713, "eval_sts-dev_spearman_dot": 0.8233431114299246, "eval_sts-dev_spearman_euclidean": 0.8513926532266082, "eval_sts-dev_spearman_manhattan": 0.851255458047931, "eval_sts-dev_spearman_max": 0.8513926532266082, "step": 6000 }, { "epoch": 0.3176448825348167, "grad_norm": 110.67689514160156, "learning_rate": 5e-06, "loss": 2.3971, "step": 6010 }, { "epoch": 0.3181734097936101, "grad_norm": 92.74166107177734, "learning_rate": 5e-06, "loss": 2.9238, "step": 6020 }, { "epoch": 0.31870193705240346, "grad_norm": 75.1832046508789, "learning_rate": 5e-06, "loss": 2.055, "step": 6030 }, { "epoch": 0.31923046431119684, "grad_norm": 82.1972885131836, "learning_rate": 5e-06, "loss": 2.3262, "step": 6040 }, { "epoch": 0.3197589915699902, "grad_norm": 67.66712188720703, "learning_rate": 5e-06, "loss": 2.4028, "step": 6050 }, { "epoch": 0.3202875188287836, "grad_norm": 72.09346008300781, "learning_rate": 5e-06, "loss": 2.6955, "step": 6060 }, { "epoch": 0.32081604608757697, "grad_norm": 81.32170867919922, "learning_rate": 5e-06, "loss": 2.5426, "step": 6070 }, { "epoch": 0.32134457334637034, "grad_norm": 94.93788146972656, "learning_rate": 5e-06, "loss": 2.5827, "step": 6080 }, { "epoch": 0.3218731006051637, "grad_norm": 69.84442901611328, "learning_rate": 5e-06, "loss": 1.9264, "step": 6090 }, { "epoch": 0.3224016278639571, "grad_norm": 102.50479888916016, "learning_rate": 5e-06, "loss": 2.1547, "step": 6100 }, { "epoch": 0.3229301551227505, "grad_norm": 110.75091552734375, "learning_rate": 5e-06, "loss": 2.707, "step": 6110 }, { "epoch": 0.32345868238154385, "grad_norm": 95.4905776977539, "learning_rate": 5e-06, "loss": 2.6737, "step": 6120 }, { "epoch": 0.3239872096403372, "grad_norm": 58.70147705078125, "learning_rate": 5e-06, "loss": 2.6646, "step": 6130 }, { "epoch": 0.32451573689913055, "grad_norm": 97.05884552001953, "learning_rate": 5e-06, "loss": 2.6213, "step": 6140 }, { "epoch": 0.3250442641579239, "grad_norm": 75.73735809326172, "learning_rate": 5e-06, "loss": 2.4298, "step": 6150 }, { "epoch": 0.3255727914167173, "grad_norm": 114.50432586669922, "learning_rate": 5e-06, "loss": 2.2964, "step": 6160 }, { "epoch": 0.3261013186755107, "grad_norm": 86.9200439453125, "learning_rate": 5e-06, "loss": 2.7238, "step": 6170 }, { "epoch": 0.32662984593430405, "grad_norm": 84.707275390625, "learning_rate": 5e-06, "loss": 2.9217, "step": 6180 }, { "epoch": 0.32715837319309743, "grad_norm": 93.9710922241211, "learning_rate": 5e-06, "loss": 2.2634, "step": 6190 }, { "epoch": 0.3276869004518908, "grad_norm": 97.73650360107422, "learning_rate": 5e-06, "loss": 2.5191, "step": 6200 }, { "epoch": 0.3282154277106842, "grad_norm": 90.21611785888672, "learning_rate": 5e-06, "loss": 2.6026, "step": 6210 }, { "epoch": 0.32874395496947756, "grad_norm": 72.90707397460938, "learning_rate": 5e-06, "loss": 2.5669, "step": 6220 }, { "epoch": 0.32927248222827094, "grad_norm": 90.78082275390625, "learning_rate": 5e-06, "loss": 2.2805, "step": 6230 }, { "epoch": 0.3298010094870643, "grad_norm": 98.97916412353516, "learning_rate": 5e-06, "loss": 2.5231, "step": 6240 }, { "epoch": 0.3303295367458577, "grad_norm": 103.45552825927734, "learning_rate": 5e-06, "loss": 2.4317, "step": 6250 }, { "epoch": 0.33085806400465106, "grad_norm": 89.42842102050781, "learning_rate": 5e-06, "loss": 2.1196, "step": 6260 }, { "epoch": 0.3313865912634444, "grad_norm": 116.63945770263672, "learning_rate": 5e-06, "loss": 2.3505, "step": 6270 }, { "epoch": 0.33191511852223776, "grad_norm": 108.17332458496094, "learning_rate": 5e-06, "loss": 2.8414, "step": 6280 }, { "epoch": 0.33244364578103114, "grad_norm": 79.77661895751953, "learning_rate": 5e-06, "loss": 2.2717, "step": 6290 }, { "epoch": 0.3329721730398245, "grad_norm": 84.6794204711914, "learning_rate": 5e-06, "loss": 2.5002, "step": 6300 }, { "epoch": 0.3335007002986179, "grad_norm": 86.02350616455078, "learning_rate": 5e-06, "loss": 2.5537, "step": 6310 }, { "epoch": 0.33402922755741127, "grad_norm": 64.38496398925781, "learning_rate": 5e-06, "loss": 2.706, "step": 6320 }, { "epoch": 0.33455775481620464, "grad_norm": 92.31443786621094, "learning_rate": 5e-06, "loss": 2.6221, "step": 6330 }, { "epoch": 0.335086282074998, "grad_norm": 80.24845886230469, "learning_rate": 5e-06, "loss": 2.3054, "step": 6340 }, { "epoch": 0.3356148093337914, "grad_norm": 132.51303100585938, "learning_rate": 5e-06, "loss": 2.9389, "step": 6350 }, { "epoch": 0.3361433365925848, "grad_norm": 100.6501235961914, "learning_rate": 5e-06, "loss": 2.0146, "step": 6360 }, { "epoch": 0.33667186385137815, "grad_norm": 83.950439453125, "learning_rate": 5e-06, "loss": 2.2646, "step": 6370 }, { "epoch": 0.3372003911101715, "grad_norm": 76.93345642089844, "learning_rate": 5e-06, "loss": 2.7927, "step": 6380 }, { "epoch": 0.3377289183689649, "grad_norm": 87.22737884521484, "learning_rate": 5e-06, "loss": 2.3085, "step": 6390 }, { "epoch": 0.3382574456277582, "grad_norm": 105.49230194091797, "learning_rate": 5e-06, "loss": 2.8011, "step": 6400 }, { "epoch": 0.3387859728865516, "grad_norm": 107.44608306884766, "learning_rate": 5e-06, "loss": 2.8185, "step": 6410 }, { "epoch": 0.339314500145345, "grad_norm": 70.09943389892578, "learning_rate": 5e-06, "loss": 2.1682, "step": 6420 }, { "epoch": 0.33984302740413835, "grad_norm": 86.46043395996094, "learning_rate": 5e-06, "loss": 2.4014, "step": 6430 }, { "epoch": 0.34037155466293173, "grad_norm": 58.66908645629883, "learning_rate": 5e-06, "loss": 2.0774, "step": 6440 }, { "epoch": 0.3409000819217251, "grad_norm": 99.3773422241211, "learning_rate": 5e-06, "loss": 2.7344, "step": 6450 }, { "epoch": 0.3414286091805185, "grad_norm": 65.60407257080078, "learning_rate": 5e-06, "loss": 2.5025, "step": 6460 }, { "epoch": 0.34195713643931186, "grad_norm": 103.81360626220703, "learning_rate": 5e-06, "loss": 2.4203, "step": 6470 }, { "epoch": 0.34248566369810524, "grad_norm": 84.6003189086914, "learning_rate": 5e-06, "loss": 2.776, "step": 6480 }, { "epoch": 0.3430141909568986, "grad_norm": 82.03404998779297, "learning_rate": 5e-06, "loss": 2.2166, "step": 6490 }, { "epoch": 0.343542718215692, "grad_norm": 86.78759765625, "learning_rate": 5e-06, "loss": 2.6204, "step": 6500 }, { "epoch": 0.34407124547448537, "grad_norm": 74.80810546875, "learning_rate": 5e-06, "loss": 2.5564, "step": 6510 }, { "epoch": 0.34459977273327874, "grad_norm": 61.17771530151367, "learning_rate": 5e-06, "loss": 2.2846, "step": 6520 }, { "epoch": 0.3451282999920721, "grad_norm": 88.96772003173828, "learning_rate": 5e-06, "loss": 2.5158, "step": 6530 }, { "epoch": 0.34565682725086544, "grad_norm": 68.64732360839844, "learning_rate": 5e-06, "loss": 2.1981, "step": 6540 }, { "epoch": 0.3461853545096588, "grad_norm": 73.94080352783203, "learning_rate": 5e-06, "loss": 2.4195, "step": 6550 }, { "epoch": 0.3467138817684522, "grad_norm": 67.83092498779297, "learning_rate": 5e-06, "loss": 2.4621, "step": 6560 }, { "epoch": 0.34724240902724557, "grad_norm": 87.89527130126953, "learning_rate": 5e-06, "loss": 2.7793, "step": 6570 }, { "epoch": 0.34777093628603895, "grad_norm": 84.43002319335938, "learning_rate": 5e-06, "loss": 2.2086, "step": 6580 }, { "epoch": 0.3482994635448323, "grad_norm": 112.70220184326172, "learning_rate": 5e-06, "loss": 2.7836, "step": 6590 }, { "epoch": 0.3488279908036257, "grad_norm": 77.07843017578125, "learning_rate": 5e-06, "loss": 2.5791, "step": 6600 }, { "epoch": 0.3493565180624191, "grad_norm": 105.94934844970703, "learning_rate": 5e-06, "loss": 2.1752, "step": 6610 }, { "epoch": 0.34988504532121245, "grad_norm": 90.58606719970703, "learning_rate": 5e-06, "loss": 2.3475, "step": 6620 }, { "epoch": 0.3504135725800058, "grad_norm": 70.49636840820312, "learning_rate": 5e-06, "loss": 2.2384, "step": 6630 }, { "epoch": 0.3509420998387992, "grad_norm": 175.32965087890625, "learning_rate": 5e-06, "loss": 2.4707, "step": 6640 }, { "epoch": 0.3514706270975926, "grad_norm": 88.42003631591797, "learning_rate": 5e-06, "loss": 2.4221, "step": 6650 }, { "epoch": 0.35199915435638596, "grad_norm": 109.38282012939453, "learning_rate": 5e-06, "loss": 2.6415, "step": 6660 }, { "epoch": 0.3525276816151793, "grad_norm": 67.00623321533203, "learning_rate": 5e-06, "loss": 2.3872, "step": 6670 }, { "epoch": 0.35305620887397265, "grad_norm": 92.41059112548828, "learning_rate": 5e-06, "loss": 2.6841, "step": 6680 }, { "epoch": 0.35358473613276603, "grad_norm": 65.67755126953125, "learning_rate": 5e-06, "loss": 2.192, "step": 6690 }, { "epoch": 0.3541132633915594, "grad_norm": 88.52156066894531, "learning_rate": 5e-06, "loss": 2.3678, "step": 6700 }, { "epoch": 0.3546417906503528, "grad_norm": 137.80123901367188, "learning_rate": 5e-06, "loss": 2.7071, "step": 6710 }, { "epoch": 0.35517031790914616, "grad_norm": 151.2347412109375, "learning_rate": 5e-06, "loss": 2.1774, "step": 6720 }, { "epoch": 0.35569884516793954, "grad_norm": 106.51242065429688, "learning_rate": 5e-06, "loss": 2.4432, "step": 6730 }, { "epoch": 0.3562273724267329, "grad_norm": 72.58672332763672, "learning_rate": 5e-06, "loss": 2.4925, "step": 6740 }, { "epoch": 0.3567558996855263, "grad_norm": 77.30189514160156, "learning_rate": 5e-06, "loss": 2.3192, "step": 6750 }, { "epoch": 0.35728442694431967, "grad_norm": 111.25764465332031, "learning_rate": 5e-06, "loss": 2.2543, "step": 6760 }, { "epoch": 0.35781295420311304, "grad_norm": 118.70784759521484, "learning_rate": 5e-06, "loss": 2.4008, "step": 6770 }, { "epoch": 0.3583414814619064, "grad_norm": 64.47772216796875, "learning_rate": 5e-06, "loss": 2.4768, "step": 6780 }, { "epoch": 0.3588700087206998, "grad_norm": 116.53492736816406, "learning_rate": 5e-06, "loss": 2.4179, "step": 6790 }, { "epoch": 0.3593985359794931, "grad_norm": 78.51580047607422, "learning_rate": 5e-06, "loss": 2.0594, "step": 6800 }, { "epoch": 0.3599270632382865, "grad_norm": 94.29190063476562, "learning_rate": 5e-06, "loss": 2.5371, "step": 6810 }, { "epoch": 0.36045559049707987, "grad_norm": 76.46458435058594, "learning_rate": 5e-06, "loss": 2.2325, "step": 6820 }, { "epoch": 0.36098411775587325, "grad_norm": 87.64910888671875, "learning_rate": 5e-06, "loss": 2.1572, "step": 6830 }, { "epoch": 0.3615126450146666, "grad_norm": 65.17787170410156, "learning_rate": 5e-06, "loss": 2.0705, "step": 6840 }, { "epoch": 0.36204117227346, "grad_norm": 56.938194274902344, "learning_rate": 5e-06, "loss": 2.209, "step": 6850 }, { "epoch": 0.3625696995322534, "grad_norm": 76.17575073242188, "learning_rate": 5e-06, "loss": 2.7542, "step": 6860 }, { "epoch": 0.36309822679104675, "grad_norm": 89.66632080078125, "learning_rate": 5e-06, "loss": 2.0356, "step": 6870 }, { "epoch": 0.36362675404984013, "grad_norm": 101.79145050048828, "learning_rate": 5e-06, "loss": 2.3447, "step": 6880 }, { "epoch": 0.3641552813086335, "grad_norm": 58.324005126953125, "learning_rate": 5e-06, "loss": 2.1093, "step": 6890 }, { "epoch": 0.3646838085674269, "grad_norm": 106.10673522949219, "learning_rate": 5e-06, "loss": 2.2551, "step": 6900 }, { "epoch": 0.36521233582622026, "grad_norm": 106.65017700195312, "learning_rate": 5e-06, "loss": 2.3635, "step": 6910 }, { "epoch": 0.36574086308501363, "grad_norm": 82.8312759399414, "learning_rate": 5e-06, "loss": 2.3968, "step": 6920 }, { "epoch": 0.36626939034380696, "grad_norm": 241.59873962402344, "learning_rate": 5e-06, "loss": 2.6719, "step": 6930 }, { "epoch": 0.36679791760260033, "grad_norm": 99.39743041992188, "learning_rate": 5e-06, "loss": 2.2375, "step": 6940 }, { "epoch": 0.3673264448613937, "grad_norm": 74.95823669433594, "learning_rate": 5e-06, "loss": 2.8405, "step": 6950 }, { "epoch": 0.3678549721201871, "grad_norm": 95.29563903808594, "learning_rate": 5e-06, "loss": 2.3598, "step": 6960 }, { "epoch": 0.36838349937898046, "grad_norm": 76.75310516357422, "learning_rate": 5e-06, "loss": 2.3775, "step": 6970 }, { "epoch": 0.36891202663777384, "grad_norm": 68.7529296875, "learning_rate": 5e-06, "loss": 2.3932, "step": 6980 }, { "epoch": 0.3694405538965672, "grad_norm": 78.4465103149414, "learning_rate": 5e-06, "loss": 2.1039, "step": 6990 }, { "epoch": 0.3699690811553606, "grad_norm": 73.16110229492188, "learning_rate": 5e-06, "loss": 2.4013, "step": 7000 }, { "epoch": 0.3699690811553606, "eval_loss": 2.1510636806488037, "eval_runtime": 37.7949, "eval_samples_per_second": 323.641, "eval_steps_per_second": 10.134, "eval_sts-dev_pearson_cosine": 0.848793399958891, "eval_sts-dev_pearson_dot": 0.8315633259424045, "eval_sts-dev_pearson_euclidean": 0.8553501231084463, "eval_sts-dev_pearson_manhattan": 0.8550206228045758, "eval_sts-dev_pearson_max": 0.8553501231084463, "eval_sts-dev_spearman_cosine": 0.8529661282809592, "eval_sts-dev_spearman_dot": 0.829360688922406, "eval_sts-dev_spearman_euclidean": 0.8547330858141923, "eval_sts-dev_spearman_manhattan": 0.854124067688434, "eval_sts-dev_spearman_max": 0.8547330858141923, "step": 7000 }, { "epoch": 0.37049760841415397, "grad_norm": 95.91380310058594, "learning_rate": 5e-06, "loss": 2.5338, "step": 7010 }, { "epoch": 0.37102613567294734, "grad_norm": 88.22793579101562, "learning_rate": 5e-06, "loss": 2.2101, "step": 7020 }, { "epoch": 0.3715546629317407, "grad_norm": 103.00116729736328, "learning_rate": 5e-06, "loss": 2.6291, "step": 7030 }, { "epoch": 0.3720831901905341, "grad_norm": 84.87012481689453, "learning_rate": 5e-06, "loss": 2.7929, "step": 7040 }, { "epoch": 0.3726117174493275, "grad_norm": 92.43543243408203, "learning_rate": 5e-06, "loss": 2.1269, "step": 7050 }, { "epoch": 0.3731402447081208, "grad_norm": 90.59485626220703, "learning_rate": 5e-06, "loss": 2.463, "step": 7060 }, { "epoch": 0.37366877196691417, "grad_norm": 67.2775650024414, "learning_rate": 5e-06, "loss": 1.8087, "step": 7070 }, { "epoch": 0.37419729922570755, "grad_norm": 80.0679931640625, "learning_rate": 5e-06, "loss": 2.4382, "step": 7080 }, { "epoch": 0.3747258264845009, "grad_norm": 100.6688461303711, "learning_rate": 5e-06, "loss": 2.4318, "step": 7090 }, { "epoch": 0.3752543537432943, "grad_norm": 92.10970306396484, "learning_rate": 5e-06, "loss": 2.4709, "step": 7100 }, { "epoch": 0.3757828810020877, "grad_norm": 77.32544708251953, "learning_rate": 5e-06, "loss": 2.1339, "step": 7110 }, { "epoch": 0.37631140826088105, "grad_norm": 101.0381851196289, "learning_rate": 5e-06, "loss": 2.131, "step": 7120 }, { "epoch": 0.37683993551967443, "grad_norm": 141.14877319335938, "learning_rate": 5e-06, "loss": 2.759, "step": 7130 }, { "epoch": 0.3773684627784678, "grad_norm": 77.4377212524414, "learning_rate": 5e-06, "loss": 2.3072, "step": 7140 }, { "epoch": 0.3778969900372612, "grad_norm": 68.7982177734375, "learning_rate": 5e-06, "loss": 2.5662, "step": 7150 }, { "epoch": 0.37842551729605456, "grad_norm": 75.91426086425781, "learning_rate": 5e-06, "loss": 2.1588, "step": 7160 }, { "epoch": 0.37895404455484794, "grad_norm": 91.07208251953125, "learning_rate": 5e-06, "loss": 2.1836, "step": 7170 }, { "epoch": 0.3794825718136413, "grad_norm": 65.99691009521484, "learning_rate": 5e-06, "loss": 2.6977, "step": 7180 }, { "epoch": 0.3800110990724347, "grad_norm": 193.88575744628906, "learning_rate": 5e-06, "loss": 2.629, "step": 7190 }, { "epoch": 0.380539626331228, "grad_norm": 87.36736297607422, "learning_rate": 5e-06, "loss": 2.1633, "step": 7200 }, { "epoch": 0.3810681535900214, "grad_norm": 103.8977279663086, "learning_rate": 5e-06, "loss": 2.4396, "step": 7210 }, { "epoch": 0.38159668084881476, "grad_norm": 95.2267837524414, "learning_rate": 5e-06, "loss": 2.4286, "step": 7220 }, { "epoch": 0.38212520810760814, "grad_norm": 84.2881088256836, "learning_rate": 5e-06, "loss": 2.2765, "step": 7230 }, { "epoch": 0.3826537353664015, "grad_norm": 66.8350830078125, "learning_rate": 5e-06, "loss": 2.0202, "step": 7240 }, { "epoch": 0.3831822626251949, "grad_norm": 48.085628509521484, "learning_rate": 5e-06, "loss": 2.3484, "step": 7250 }, { "epoch": 0.38371078988398827, "grad_norm": 55.55941390991211, "learning_rate": 5e-06, "loss": 1.9765, "step": 7260 }, { "epoch": 0.38423931714278164, "grad_norm": 110.5053939819336, "learning_rate": 5e-06, "loss": 2.1988, "step": 7270 }, { "epoch": 0.384767844401575, "grad_norm": 99.68704223632812, "learning_rate": 5e-06, "loss": 2.2875, "step": 7280 }, { "epoch": 0.3852963716603684, "grad_norm": 79.54515838623047, "learning_rate": 5e-06, "loss": 2.1256, "step": 7290 }, { "epoch": 0.3858248989191618, "grad_norm": 70.53141021728516, "learning_rate": 5e-06, "loss": 2.3494, "step": 7300 }, { "epoch": 0.38635342617795515, "grad_norm": 78.9384994506836, "learning_rate": 5e-06, "loss": 2.6466, "step": 7310 }, { "epoch": 0.3868819534367485, "grad_norm": 106.67530822753906, "learning_rate": 5e-06, "loss": 1.8519, "step": 7320 }, { "epoch": 0.38741048069554185, "grad_norm": 70.67112731933594, "learning_rate": 5e-06, "loss": 2.5161, "step": 7330 }, { "epoch": 0.3879390079543352, "grad_norm": 104.21273040771484, "learning_rate": 5e-06, "loss": 2.3868, "step": 7340 }, { "epoch": 0.3884675352131286, "grad_norm": 81.47088623046875, "learning_rate": 5e-06, "loss": 2.612, "step": 7350 }, { "epoch": 0.388996062471922, "grad_norm": 69.62950897216797, "learning_rate": 5e-06, "loss": 2.378, "step": 7360 }, { "epoch": 0.38952458973071535, "grad_norm": 70.50045013427734, "learning_rate": 5e-06, "loss": 2.4306, "step": 7370 }, { "epoch": 0.39005311698950873, "grad_norm": 81.20893859863281, "learning_rate": 5e-06, "loss": 2.263, "step": 7380 }, { "epoch": 0.3905816442483021, "grad_norm": 94.03824615478516, "learning_rate": 5e-06, "loss": 2.4995, "step": 7390 }, { "epoch": 0.3911101715070955, "grad_norm": 57.91423797607422, "learning_rate": 5e-06, "loss": 1.503, "step": 7400 }, { "epoch": 0.39163869876588886, "grad_norm": 100.7950668334961, "learning_rate": 5e-06, "loss": 2.6309, "step": 7410 }, { "epoch": 0.39216722602468224, "grad_norm": 102.22388458251953, "learning_rate": 5e-06, "loss": 2.2763, "step": 7420 }, { "epoch": 0.3926957532834756, "grad_norm": 112.81481170654297, "learning_rate": 5e-06, "loss": 2.1477, "step": 7430 }, { "epoch": 0.393224280542269, "grad_norm": 78.27204132080078, "learning_rate": 5e-06, "loss": 2.3135, "step": 7440 }, { "epoch": 0.39375280780106237, "grad_norm": 101.54691314697266, "learning_rate": 5e-06, "loss": 2.1847, "step": 7450 }, { "epoch": 0.3942813350598557, "grad_norm": 82.32347869873047, "learning_rate": 5e-06, "loss": 2.1439, "step": 7460 }, { "epoch": 0.39480986231864906, "grad_norm": 68.02698516845703, "learning_rate": 5e-06, "loss": 2.3319, "step": 7470 }, { "epoch": 0.39533838957744244, "grad_norm": 117.5510025024414, "learning_rate": 5e-06, "loss": 2.5163, "step": 7480 }, { "epoch": 0.3958669168362358, "grad_norm": 79.62327575683594, "learning_rate": 5e-06, "loss": 2.0647, "step": 7490 }, { "epoch": 0.3963954440950292, "grad_norm": 92.02657318115234, "learning_rate": 5e-06, "loss": 2.3278, "step": 7500 }, { "epoch": 0.39692397135382257, "grad_norm": 196.5633087158203, "learning_rate": 5e-06, "loss": 2.5017, "step": 7510 }, { "epoch": 0.39745249861261595, "grad_norm": 80.96989440917969, "learning_rate": 5e-06, "loss": 2.6479, "step": 7520 }, { "epoch": 0.3979810258714093, "grad_norm": 64.2668228149414, "learning_rate": 5e-06, "loss": 2.2592, "step": 7530 }, { "epoch": 0.3985095531302027, "grad_norm": 66.24748229980469, "learning_rate": 5e-06, "loss": 2.2102, "step": 7540 }, { "epoch": 0.3990380803889961, "grad_norm": 93.50106811523438, "learning_rate": 5e-06, "loss": 2.6086, "step": 7550 }, { "epoch": 0.39956660764778945, "grad_norm": 94.76029205322266, "learning_rate": 5e-06, "loss": 2.2675, "step": 7560 }, { "epoch": 0.4000951349065828, "grad_norm": 81.29190826416016, "learning_rate": 5e-06, "loss": 2.207, "step": 7570 }, { "epoch": 0.4006236621653762, "grad_norm": 74.0347671508789, "learning_rate": 5e-06, "loss": 1.758, "step": 7580 }, { "epoch": 0.4011521894241695, "grad_norm": 122.70045471191406, "learning_rate": 5e-06, "loss": 2.3628, "step": 7590 }, { "epoch": 0.4016807166829629, "grad_norm": 86.93870544433594, "learning_rate": 5e-06, "loss": 2.3014, "step": 7600 }, { "epoch": 0.4022092439417563, "grad_norm": 75.14707946777344, "learning_rate": 5e-06, "loss": 2.5705, "step": 7610 }, { "epoch": 0.40273777120054965, "grad_norm": 92.18370056152344, "learning_rate": 5e-06, "loss": 2.2995, "step": 7620 }, { "epoch": 0.40326629845934303, "grad_norm": 91.4660415649414, "learning_rate": 5e-06, "loss": 2.3978, "step": 7630 }, { "epoch": 0.4037948257181364, "grad_norm": 106.81151580810547, "learning_rate": 5e-06, "loss": 2.6242, "step": 7640 }, { "epoch": 0.4043233529769298, "grad_norm": 101.3980484008789, "learning_rate": 5e-06, "loss": 2.2912, "step": 7650 }, { "epoch": 0.40485188023572316, "grad_norm": 91.17699432373047, "learning_rate": 5e-06, "loss": 2.6774, "step": 7660 }, { "epoch": 0.40538040749451654, "grad_norm": 58.752525329589844, "learning_rate": 5e-06, "loss": 2.5206, "step": 7670 }, { "epoch": 0.4059089347533099, "grad_norm": 72.27642822265625, "learning_rate": 5e-06, "loss": 2.2776, "step": 7680 }, { "epoch": 0.4064374620121033, "grad_norm": 83.94076538085938, "learning_rate": 5e-06, "loss": 2.4176, "step": 7690 }, { "epoch": 0.40696598927089667, "grad_norm": 58.16764831542969, "learning_rate": 5e-06, "loss": 2.1437, "step": 7700 }, { "epoch": 0.40749451652969004, "grad_norm": 66.45829010009766, "learning_rate": 5e-06, "loss": 2.4326, "step": 7710 }, { "epoch": 0.40802304378848336, "grad_norm": 88.25824737548828, "learning_rate": 5e-06, "loss": 2.3895, "step": 7720 }, { "epoch": 0.40855157104727674, "grad_norm": 87.04804229736328, "learning_rate": 5e-06, "loss": 2.3312, "step": 7730 }, { "epoch": 0.4090800983060701, "grad_norm": 85.75321960449219, "learning_rate": 5e-06, "loss": 2.3787, "step": 7740 }, { "epoch": 0.4096086255648635, "grad_norm": 87.73616790771484, "learning_rate": 5e-06, "loss": 2.4938, "step": 7750 }, { "epoch": 0.41013715282365687, "grad_norm": 109.9384765625, "learning_rate": 5e-06, "loss": 2.3136, "step": 7760 }, { "epoch": 0.41066568008245025, "grad_norm": 81.22037506103516, "learning_rate": 5e-06, "loss": 2.4216, "step": 7770 }, { "epoch": 0.4111942073412436, "grad_norm": 68.64344024658203, "learning_rate": 5e-06, "loss": 2.2783, "step": 7780 }, { "epoch": 0.411722734600037, "grad_norm": 64.24508666992188, "learning_rate": 5e-06, "loss": 2.2578, "step": 7790 }, { "epoch": 0.4122512618588304, "grad_norm": 93.29961395263672, "learning_rate": 5e-06, "loss": 2.506, "step": 7800 }, { "epoch": 0.41277978911762375, "grad_norm": 73.07658386230469, "learning_rate": 5e-06, "loss": 2.3722, "step": 7810 }, { "epoch": 0.41330831637641713, "grad_norm": 58.512969970703125, "learning_rate": 5e-06, "loss": 2.5224, "step": 7820 }, { "epoch": 0.4138368436352105, "grad_norm": 75.0316162109375, "learning_rate": 5e-06, "loss": 2.3156, "step": 7830 }, { "epoch": 0.4143653708940039, "grad_norm": 83.12090301513672, "learning_rate": 5e-06, "loss": 2.0756, "step": 7840 }, { "epoch": 0.41489389815279726, "grad_norm": 89.32904052734375, "learning_rate": 5e-06, "loss": 2.2506, "step": 7850 }, { "epoch": 0.4154224254115906, "grad_norm": 90.87010955810547, "learning_rate": 5e-06, "loss": 2.1303, "step": 7860 }, { "epoch": 0.41595095267038396, "grad_norm": 110.53621673583984, "learning_rate": 5e-06, "loss": 2.2839, "step": 7870 }, { "epoch": 0.41647947992917733, "grad_norm": 123.16323852539062, "learning_rate": 5e-06, "loss": 2.3839, "step": 7880 }, { "epoch": 0.4170080071879707, "grad_norm": 65.1724853515625, "learning_rate": 5e-06, "loss": 1.8874, "step": 7890 }, { "epoch": 0.4175365344467641, "grad_norm": 83.29474639892578, "learning_rate": 5e-06, "loss": 2.2933, "step": 7900 }, { "epoch": 0.41806506170555746, "grad_norm": 81.61724853515625, "learning_rate": 5e-06, "loss": 2.188, "step": 7910 }, { "epoch": 0.41859358896435084, "grad_norm": 53.88490295410156, "learning_rate": 5e-06, "loss": 1.8456, "step": 7920 }, { "epoch": 0.4191221162231442, "grad_norm": 55.26011657714844, "learning_rate": 5e-06, "loss": 2.4328, "step": 7930 }, { "epoch": 0.4196506434819376, "grad_norm": 77.19078826904297, "learning_rate": 5e-06, "loss": 2.1574, "step": 7940 }, { "epoch": 0.42017917074073097, "grad_norm": 68.58393096923828, "learning_rate": 5e-06, "loss": 2.1441, "step": 7950 }, { "epoch": 0.42070769799952434, "grad_norm": 78.54864501953125, "learning_rate": 5e-06, "loss": 2.1233, "step": 7960 }, { "epoch": 0.4212362252583177, "grad_norm": 101.91667175292969, "learning_rate": 5e-06, "loss": 1.982, "step": 7970 }, { "epoch": 0.4217647525171111, "grad_norm": 95.27210998535156, "learning_rate": 5e-06, "loss": 2.4667, "step": 7980 }, { "epoch": 0.4222932797759044, "grad_norm": 90.08631896972656, "learning_rate": 5e-06, "loss": 2.3762, "step": 7990 }, { "epoch": 0.4228218070346978, "grad_norm": 76.3437728881836, "learning_rate": 5e-06, "loss": 2.1713, "step": 8000 }, { "epoch": 0.4228218070346978, "eval_loss": 2.037670135498047, "eval_runtime": 44.8575, "eval_samples_per_second": 272.686, "eval_steps_per_second": 8.538, "eval_sts-dev_pearson_cosine": 0.8504501040428015, "eval_sts-dev_pearson_dot": 0.832041872812504, "eval_sts-dev_pearson_euclidean": 0.8551780442943523, "eval_sts-dev_pearson_manhattan": 0.8553229694668281, "eval_sts-dev_pearson_max": 0.8553229694668281, "eval_sts-dev_spearman_cosine": 0.8544381952843636, "eval_sts-dev_spearman_dot": 0.8313332365255663, "eval_sts-dev_spearman_euclidean": 0.8565249895429383, "eval_sts-dev_spearman_manhattan": 0.8565081339573394, "eval_sts-dev_spearman_max": 0.8565249895429383, "step": 8000 }, { "epoch": 0.42335033429349117, "grad_norm": 84.17282104492188, "learning_rate": 5e-06, "loss": 2.3028, "step": 8010 }, { "epoch": 0.42387886155228455, "grad_norm": 86.34334564208984, "learning_rate": 5e-06, "loss": 2.2786, "step": 8020 }, { "epoch": 0.4244073888110779, "grad_norm": 73.70162200927734, "learning_rate": 5e-06, "loss": 2.8235, "step": 8030 }, { "epoch": 0.4249359160698713, "grad_norm": 101.8917236328125, "learning_rate": 5e-06, "loss": 2.0357, "step": 8040 }, { "epoch": 0.4254644433286647, "grad_norm": 95.89956665039062, "learning_rate": 5e-06, "loss": 2.708, "step": 8050 }, { "epoch": 0.42599297058745805, "grad_norm": 90.04303741455078, "learning_rate": 5e-06, "loss": 2.266, "step": 8060 }, { "epoch": 0.42652149784625143, "grad_norm": 101.10943603515625, "learning_rate": 5e-06, "loss": 2.3242, "step": 8070 }, { "epoch": 0.4270500251050448, "grad_norm": 84.576171875, "learning_rate": 5e-06, "loss": 2.09, "step": 8080 }, { "epoch": 0.4275785523638382, "grad_norm": 52.01784896850586, "learning_rate": 5e-06, "loss": 2.0059, "step": 8090 }, { "epoch": 0.42810707962263156, "grad_norm": 82.29341125488281, "learning_rate": 5e-06, "loss": 2.2282, "step": 8100 }, { "epoch": 0.42863560688142494, "grad_norm": 82.75264739990234, "learning_rate": 5e-06, "loss": 2.7941, "step": 8110 }, { "epoch": 0.42916413414021826, "grad_norm": 83.516845703125, "learning_rate": 5e-06, "loss": 2.4779, "step": 8120 }, { "epoch": 0.42969266139901163, "grad_norm": 81.35162353515625, "learning_rate": 5e-06, "loss": 2.2005, "step": 8130 }, { "epoch": 0.430221188657805, "grad_norm": 73.7695083618164, "learning_rate": 5e-06, "loss": 2.3417, "step": 8140 }, { "epoch": 0.4307497159165984, "grad_norm": 72.13195037841797, "learning_rate": 5e-06, "loss": 2.3071, "step": 8150 }, { "epoch": 0.43127824317539176, "grad_norm": 92.3564224243164, "learning_rate": 5e-06, "loss": 1.8912, "step": 8160 }, { "epoch": 0.43180677043418514, "grad_norm": 62.33097839355469, "learning_rate": 5e-06, "loss": 2.4276, "step": 8170 }, { "epoch": 0.4323352976929785, "grad_norm": 64.7095718383789, "learning_rate": 5e-06, "loss": 2.2056, "step": 8180 }, { "epoch": 0.4328638249517719, "grad_norm": 68.21233367919922, "learning_rate": 5e-06, "loss": 2.3353, "step": 8190 }, { "epoch": 0.43339235221056527, "grad_norm": 129.8682098388672, "learning_rate": 5e-06, "loss": 2.4659, "step": 8200 }, { "epoch": 0.43392087946935864, "grad_norm": 56.16792297363281, "learning_rate": 5e-06, "loss": 1.8468, "step": 8210 }, { "epoch": 0.434449406728152, "grad_norm": 99.28547668457031, "learning_rate": 5e-06, "loss": 2.5243, "step": 8220 }, { "epoch": 0.4349779339869454, "grad_norm": 64.94953155517578, "learning_rate": 5e-06, "loss": 2.334, "step": 8230 }, { "epoch": 0.4355064612457388, "grad_norm": 80.8280258178711, "learning_rate": 5e-06, "loss": 2.4295, "step": 8240 }, { "epoch": 0.4360349885045321, "grad_norm": 103.33686065673828, "learning_rate": 5e-06, "loss": 2.4532, "step": 8250 }, { "epoch": 0.43656351576332547, "grad_norm": 60.97555923461914, "learning_rate": 5e-06, "loss": 2.4278, "step": 8260 }, { "epoch": 0.43709204302211885, "grad_norm": 76.90562438964844, "learning_rate": 5e-06, "loss": 2.3989, "step": 8270 }, { "epoch": 0.4376205702809122, "grad_norm": 68.10173797607422, "learning_rate": 5e-06, "loss": 1.5646, "step": 8280 }, { "epoch": 0.4381490975397056, "grad_norm": 87.57270050048828, "learning_rate": 5e-06, "loss": 2.4047, "step": 8290 }, { "epoch": 0.438677624798499, "grad_norm": 73.70536041259766, "learning_rate": 5e-06, "loss": 2.4523, "step": 8300 }, { "epoch": 0.43920615205729235, "grad_norm": 84.42355346679688, "learning_rate": 5e-06, "loss": 2.4982, "step": 8310 }, { "epoch": 0.43973467931608573, "grad_norm": 83.89588165283203, "learning_rate": 5e-06, "loss": 1.9305, "step": 8320 }, { "epoch": 0.4402632065748791, "grad_norm": 94.1212158203125, "learning_rate": 5e-06, "loss": 2.1717, "step": 8330 }, { "epoch": 0.4407917338336725, "grad_norm": 75.4131851196289, "learning_rate": 5e-06, "loss": 2.4828, "step": 8340 }, { "epoch": 0.44132026109246586, "grad_norm": 71.93439483642578, "learning_rate": 5e-06, "loss": 2.1911, "step": 8350 }, { "epoch": 0.44184878835125924, "grad_norm": 62.71750259399414, "learning_rate": 5e-06, "loss": 2.1961, "step": 8360 }, { "epoch": 0.4423773156100526, "grad_norm": 75.80941009521484, "learning_rate": 5e-06, "loss": 2.0246, "step": 8370 }, { "epoch": 0.44290584286884593, "grad_norm": 67.60956573486328, "learning_rate": 5e-06, "loss": 2.2048, "step": 8380 }, { "epoch": 0.4434343701276393, "grad_norm": 77.0855941772461, "learning_rate": 5e-06, "loss": 2.397, "step": 8390 }, { "epoch": 0.4439628973864327, "grad_norm": 77.5289306640625, "learning_rate": 5e-06, "loss": 2.1629, "step": 8400 }, { "epoch": 0.44449142464522606, "grad_norm": 71.46099853515625, "learning_rate": 5e-06, "loss": 2.4421, "step": 8410 }, { "epoch": 0.44501995190401944, "grad_norm": 99.5431137084961, "learning_rate": 5e-06, "loss": 2.386, "step": 8420 }, { "epoch": 0.4455484791628128, "grad_norm": 163.84603881835938, "learning_rate": 5e-06, "loss": 2.1045, "step": 8430 }, { "epoch": 0.4460770064216062, "grad_norm": 78.96137237548828, "learning_rate": 5e-06, "loss": 1.8055, "step": 8440 }, { "epoch": 0.44660553368039957, "grad_norm": 74.40946197509766, "learning_rate": 5e-06, "loss": 2.2208, "step": 8450 }, { "epoch": 0.44713406093919295, "grad_norm": 109.62026977539062, "learning_rate": 5e-06, "loss": 2.0568, "step": 8460 }, { "epoch": 0.4476625881979863, "grad_norm": 94.00117492675781, "learning_rate": 5e-06, "loss": 2.2589, "step": 8470 }, { "epoch": 0.4481911154567797, "grad_norm": 80.487548828125, "learning_rate": 5e-06, "loss": 2.8444, "step": 8480 }, { "epoch": 0.4487196427155731, "grad_norm": 71.5920181274414, "learning_rate": 5e-06, "loss": 2.2143, "step": 8490 }, { "epoch": 0.44924816997436645, "grad_norm": 78.3924789428711, "learning_rate": 5e-06, "loss": 2.264, "step": 8500 }, { "epoch": 0.44977669723315983, "grad_norm": 73.50074005126953, "learning_rate": 5e-06, "loss": 2.1463, "step": 8510 }, { "epoch": 0.45030522449195315, "grad_norm": 110.69117736816406, "learning_rate": 5e-06, "loss": 2.2488, "step": 8520 }, { "epoch": 0.4508337517507465, "grad_norm": 95.23622131347656, "learning_rate": 5e-06, "loss": 2.3966, "step": 8530 }, { "epoch": 0.4513622790095399, "grad_norm": 89.26216888427734, "learning_rate": 5e-06, "loss": 2.1854, "step": 8540 }, { "epoch": 0.4518908062683333, "grad_norm": 63.33109664916992, "learning_rate": 5e-06, "loss": 2.2416, "step": 8550 }, { "epoch": 0.45241933352712665, "grad_norm": 81.17635345458984, "learning_rate": 5e-06, "loss": 2.1948, "step": 8560 }, { "epoch": 0.45294786078592003, "grad_norm": 77.80203247070312, "learning_rate": 5e-06, "loss": 2.311, "step": 8570 }, { "epoch": 0.4534763880447134, "grad_norm": 59.15773010253906, "learning_rate": 5e-06, "loss": 2.0158, "step": 8580 }, { "epoch": 0.4540049153035068, "grad_norm": 96.2970199584961, "learning_rate": 5e-06, "loss": 2.5005, "step": 8590 }, { "epoch": 0.45453344256230016, "grad_norm": 81.16047668457031, "learning_rate": 5e-06, "loss": 2.1042, "step": 8600 }, { "epoch": 0.45506196982109354, "grad_norm": 58.100242614746094, "learning_rate": 5e-06, "loss": 1.9172, "step": 8610 }, { "epoch": 0.4555904970798869, "grad_norm": 98.82510375976562, "learning_rate": 5e-06, "loss": 2.5975, "step": 8620 }, { "epoch": 0.4561190243386803, "grad_norm": 95.67361450195312, "learning_rate": 5e-06, "loss": 2.1305, "step": 8630 }, { "epoch": 0.45664755159747367, "grad_norm": 88.38704681396484, "learning_rate": 5e-06, "loss": 2.1287, "step": 8640 }, { "epoch": 0.457176078856267, "grad_norm": 76.74720001220703, "learning_rate": 5e-06, "loss": 1.9931, "step": 8650 }, { "epoch": 0.45770460611506036, "grad_norm": 153.64523315429688, "learning_rate": 5e-06, "loss": 2.8442, "step": 8660 }, { "epoch": 0.45823313337385374, "grad_norm": 78.54724884033203, "learning_rate": 5e-06, "loss": 2.2031, "step": 8670 }, { "epoch": 0.4587616606326471, "grad_norm": 88.57320404052734, "learning_rate": 5e-06, "loss": 2.3233, "step": 8680 }, { "epoch": 0.4592901878914405, "grad_norm": 73.68769836425781, "learning_rate": 5e-06, "loss": 2.3802, "step": 8690 }, { "epoch": 0.45981871515023387, "grad_norm": 67.89664459228516, "learning_rate": 5e-06, "loss": 2.1564, "step": 8700 }, { "epoch": 0.46034724240902725, "grad_norm": 81.0985336303711, "learning_rate": 5e-06, "loss": 2.2118, "step": 8710 }, { "epoch": 0.4608757696678206, "grad_norm": 81.54734802246094, "learning_rate": 5e-06, "loss": 1.9577, "step": 8720 }, { "epoch": 0.461404296926614, "grad_norm": 85.1313705444336, "learning_rate": 5e-06, "loss": 2.3115, "step": 8730 }, { "epoch": 0.4619328241854074, "grad_norm": 57.35528564453125, "learning_rate": 5e-06, "loss": 2.4691, "step": 8740 }, { "epoch": 0.46246135144420075, "grad_norm": 50.70564270019531, "learning_rate": 5e-06, "loss": 1.9895, "step": 8750 }, { "epoch": 0.46298987870299413, "grad_norm": 88.63624572753906, "learning_rate": 5e-06, "loss": 2.398, "step": 8760 }, { "epoch": 0.4635184059617875, "grad_norm": 86.6895751953125, "learning_rate": 5e-06, "loss": 2.2087, "step": 8770 }, { "epoch": 0.4640469332205808, "grad_norm": 104.60720825195312, "learning_rate": 5e-06, "loss": 2.3215, "step": 8780 }, { "epoch": 0.4645754604793742, "grad_norm": 65.08056640625, "learning_rate": 5e-06, "loss": 2.8088, "step": 8790 }, { "epoch": 0.4651039877381676, "grad_norm": 89.63246154785156, "learning_rate": 5e-06, "loss": 1.9797, "step": 8800 }, { "epoch": 0.46563251499696096, "grad_norm": 64.14983367919922, "learning_rate": 5e-06, "loss": 2.0951, "step": 8810 }, { "epoch": 0.46616104225575433, "grad_norm": 77.82695007324219, "learning_rate": 5e-06, "loss": 2.572, "step": 8820 }, { "epoch": 0.4666895695145477, "grad_norm": 86.38075256347656, "learning_rate": 5e-06, "loss": 2.197, "step": 8830 }, { "epoch": 0.4672180967733411, "grad_norm": 67.32428741455078, "learning_rate": 5e-06, "loss": 2.2119, "step": 8840 }, { "epoch": 0.46774662403213446, "grad_norm": 116.0421371459961, "learning_rate": 5e-06, "loss": 1.9442, "step": 8850 }, { "epoch": 0.46827515129092784, "grad_norm": 75.73082733154297, "learning_rate": 5e-06, "loss": 2.0326, "step": 8860 }, { "epoch": 0.4688036785497212, "grad_norm": 89.14604949951172, "learning_rate": 5e-06, "loss": 2.1965, "step": 8870 }, { "epoch": 0.4693322058085146, "grad_norm": 92.06327056884766, "learning_rate": 5e-06, "loss": 1.8604, "step": 8880 }, { "epoch": 0.46986073306730797, "grad_norm": 70.97053527832031, "learning_rate": 5e-06, "loss": 2.7984, "step": 8890 }, { "epoch": 0.47038926032610134, "grad_norm": 86.17658233642578, "learning_rate": 5e-06, "loss": 1.8942, "step": 8900 }, { "epoch": 0.47091778758489466, "grad_norm": 54.239017486572266, "learning_rate": 5e-06, "loss": 1.9657, "step": 8910 }, { "epoch": 0.47144631484368804, "grad_norm": 80.33621978759766, "learning_rate": 5e-06, "loss": 1.8274, "step": 8920 }, { "epoch": 0.4719748421024814, "grad_norm": 96.34200286865234, "learning_rate": 5e-06, "loss": 2.5539, "step": 8930 }, { "epoch": 0.4725033693612748, "grad_norm": 61.046974182128906, "learning_rate": 5e-06, "loss": 2.293, "step": 8940 }, { "epoch": 0.47303189662006817, "grad_norm": 64.22650146484375, "learning_rate": 5e-06, "loss": 2.3011, "step": 8950 }, { "epoch": 0.47356042387886155, "grad_norm": 80.69186401367188, "learning_rate": 5e-06, "loss": 2.3161, "step": 8960 }, { "epoch": 0.4740889511376549, "grad_norm": 62.911094665527344, "learning_rate": 5e-06, "loss": 1.8796, "step": 8970 }, { "epoch": 0.4746174783964483, "grad_norm": 90.61019134521484, "learning_rate": 5e-06, "loss": 2.4049, "step": 8980 }, { "epoch": 0.4751460056552417, "grad_norm": 142.2237548828125, "learning_rate": 5e-06, "loss": 2.4173, "step": 8990 }, { "epoch": 0.47567453291403505, "grad_norm": 83.27155303955078, "learning_rate": 5e-06, "loss": 2.1677, "step": 9000 }, { "epoch": 0.47567453291403505, "eval_loss": 2.000697612762451, "eval_runtime": 45.4755, "eval_samples_per_second": 268.98, "eval_steps_per_second": 8.422, "eval_sts-dev_pearson_cosine": 0.848608594070204, "eval_sts-dev_pearson_dot": 0.8222903254893028, "eval_sts-dev_pearson_euclidean": 0.85571555832111, "eval_sts-dev_pearson_manhattan": 0.8552958439768454, "eval_sts-dev_pearson_max": 0.85571555832111, "eval_sts-dev_spearman_cosine": 0.8535256515647943, "eval_sts-dev_spearman_dot": 0.8194125119916688, "eval_sts-dev_spearman_euclidean": 0.8561276564338725, "eval_sts-dev_spearman_manhattan": 0.8554393033566626, "eval_sts-dev_spearman_max": 0.8561276564338725, "step": 9000 }, { "epoch": 0.47620306017282843, "grad_norm": 109.68781280517578, "learning_rate": 5e-06, "loss": 2.5106, "step": 9010 }, { "epoch": 0.4767315874316218, "grad_norm": 59.00276565551758, "learning_rate": 5e-06, "loss": 2.2912, "step": 9020 }, { "epoch": 0.4772601146904152, "grad_norm": 75.38319396972656, "learning_rate": 5e-06, "loss": 1.956, "step": 9030 }, { "epoch": 0.4777886419492085, "grad_norm": 83.20968627929688, "learning_rate": 5e-06, "loss": 2.5097, "step": 9040 }, { "epoch": 0.4783171692080019, "grad_norm": 60.01792526245117, "learning_rate": 5e-06, "loss": 2.5623, "step": 9050 }, { "epoch": 0.47884569646679526, "grad_norm": 65.10391235351562, "learning_rate": 5e-06, "loss": 2.1834, "step": 9060 }, { "epoch": 0.47937422372558863, "grad_norm": 69.19440460205078, "learning_rate": 5e-06, "loss": 2.1405, "step": 9070 }, { "epoch": 0.479902750984382, "grad_norm": 67.66195678710938, "learning_rate": 5e-06, "loss": 2.1491, "step": 9080 }, { "epoch": 0.4804312782431754, "grad_norm": 71.31856536865234, "learning_rate": 5e-06, "loss": 2.1401, "step": 9090 }, { "epoch": 0.48095980550196876, "grad_norm": 89.15548706054688, "learning_rate": 5e-06, "loss": 2.1113, "step": 9100 }, { "epoch": 0.48148833276076214, "grad_norm": 88.10211944580078, "learning_rate": 5e-06, "loss": 2.0769, "step": 9110 }, { "epoch": 0.4820168600195555, "grad_norm": 89.93597412109375, "learning_rate": 5e-06, "loss": 2.1247, "step": 9120 }, { "epoch": 0.4825453872783489, "grad_norm": 62.70326614379883, "learning_rate": 5e-06, "loss": 1.8837, "step": 9130 }, { "epoch": 0.48307391453714227, "grad_norm": 51.83104705810547, "learning_rate": 5e-06, "loss": 1.8841, "step": 9140 }, { "epoch": 0.48360244179593564, "grad_norm": 86.19837188720703, "learning_rate": 5e-06, "loss": 2.1995, "step": 9150 }, { "epoch": 0.484130969054729, "grad_norm": 80.20140838623047, "learning_rate": 5e-06, "loss": 2.6699, "step": 9160 }, { "epoch": 0.48465949631352234, "grad_norm": 80.85934448242188, "learning_rate": 5e-06, "loss": 1.9363, "step": 9170 }, { "epoch": 0.4851880235723157, "grad_norm": 68.81660461425781, "learning_rate": 5e-06, "loss": 2.1295, "step": 9180 }, { "epoch": 0.4857165508311091, "grad_norm": 62.29378128051758, "learning_rate": 5e-06, "loss": 2.2007, "step": 9190 }, { "epoch": 0.48624507808990247, "grad_norm": 109.8076400756836, "learning_rate": 5e-06, "loss": 2.3909, "step": 9200 }, { "epoch": 0.48677360534869585, "grad_norm": 93.58944702148438, "learning_rate": 5e-06, "loss": 2.064, "step": 9210 }, { "epoch": 0.4873021326074892, "grad_norm": 76.8218002319336, "learning_rate": 5e-06, "loss": 2.3434, "step": 9220 }, { "epoch": 0.4878306598662826, "grad_norm": 104.14209747314453, "learning_rate": 5e-06, "loss": 2.2552, "step": 9230 }, { "epoch": 0.488359187125076, "grad_norm": 66.23809051513672, "learning_rate": 5e-06, "loss": 2.1987, "step": 9240 }, { "epoch": 0.48888771438386935, "grad_norm": 98.32476043701172, "learning_rate": 5e-06, "loss": 2.464, "step": 9250 }, { "epoch": 0.48941624164266273, "grad_norm": 77.5644760131836, "learning_rate": 5e-06, "loss": 1.9504, "step": 9260 }, { "epoch": 0.4899447689014561, "grad_norm": 49.850887298583984, "learning_rate": 5e-06, "loss": 1.8869, "step": 9270 }, { "epoch": 0.4904732961602495, "grad_norm": 71.82830810546875, "learning_rate": 5e-06, "loss": 2.7072, "step": 9280 }, { "epoch": 0.49100182341904286, "grad_norm": 83.45108032226562, "learning_rate": 5e-06, "loss": 2.042, "step": 9290 }, { "epoch": 0.49153035067783624, "grad_norm": 87.52446746826172, "learning_rate": 5e-06, "loss": 1.8342, "step": 9300 }, { "epoch": 0.49205887793662956, "grad_norm": 70.19332885742188, "learning_rate": 5e-06, "loss": 2.2111, "step": 9310 }, { "epoch": 0.49258740519542293, "grad_norm": 89.36085510253906, "learning_rate": 5e-06, "loss": 2.3119, "step": 9320 }, { "epoch": 0.4931159324542163, "grad_norm": 66.47834777832031, "learning_rate": 5e-06, "loss": 2.1631, "step": 9330 }, { "epoch": 0.4936444597130097, "grad_norm": 84.3299560546875, "learning_rate": 5e-06, "loss": 2.2638, "step": 9340 }, { "epoch": 0.49417298697180306, "grad_norm": 86.4233169555664, "learning_rate": 5e-06, "loss": 2.1999, "step": 9350 }, { "epoch": 0.49470151423059644, "grad_norm": 82.92412567138672, "learning_rate": 5e-06, "loss": 2.0939, "step": 9360 }, { "epoch": 0.4952300414893898, "grad_norm": 101.32273864746094, "learning_rate": 5e-06, "loss": 2.142, "step": 9370 }, { "epoch": 0.4957585687481832, "grad_norm": 80.90555572509766, "learning_rate": 5e-06, "loss": 2.3336, "step": 9380 }, { "epoch": 0.49628709600697657, "grad_norm": 88.67681121826172, "learning_rate": 5e-06, "loss": 2.1553, "step": 9390 }, { "epoch": 0.49681562326576995, "grad_norm": 66.96258544921875, "learning_rate": 5e-06, "loss": 1.9937, "step": 9400 }, { "epoch": 0.4973441505245633, "grad_norm": 65.61308288574219, "learning_rate": 5e-06, "loss": 1.7992, "step": 9410 }, { "epoch": 0.4978726777833567, "grad_norm": 103.406005859375, "learning_rate": 5e-06, "loss": 2.3669, "step": 9420 }, { "epoch": 0.4984012050421501, "grad_norm": 80.52974700927734, "learning_rate": 5e-06, "loss": 2.3509, "step": 9430 }, { "epoch": 0.4989297323009434, "grad_norm": 55.96629333496094, "learning_rate": 5e-06, "loss": 2.0023, "step": 9440 }, { "epoch": 0.4994582595597368, "grad_norm": 69.77912902832031, "learning_rate": 5e-06, "loss": 2.1049, "step": 9450 }, { "epoch": 0.49998678681853015, "grad_norm": 90.34381866455078, "learning_rate": 5e-06, "loss": 2.5829, "step": 9460 }, { "epoch": 0.5005153140773235, "grad_norm": 101.19933319091797, "learning_rate": 5e-06, "loss": 2.3369, "step": 9470 }, { "epoch": 0.5010438413361169, "grad_norm": 53.92314529418945, "learning_rate": 5e-06, "loss": 2.0524, "step": 9480 }, { "epoch": 0.5015723685949103, "grad_norm": 63.887542724609375, "learning_rate": 5e-06, "loss": 2.4597, "step": 9490 }, { "epoch": 0.5021008958537037, "grad_norm": 76.12770080566406, "learning_rate": 5e-06, "loss": 1.9285, "step": 9500 }, { "epoch": 0.502629423112497, "grad_norm": 78.45658111572266, "learning_rate": 5e-06, "loss": 2.4426, "step": 9510 }, { "epoch": 0.5031579503712904, "grad_norm": 214.7315673828125, "learning_rate": 5e-06, "loss": 2.2354, "step": 9520 }, { "epoch": 0.5036864776300838, "grad_norm": 72.56678771972656, "learning_rate": 5e-06, "loss": 2.4182, "step": 9530 }, { "epoch": 0.5042150048888772, "grad_norm": 92.85859680175781, "learning_rate": 5e-06, "loss": 2.0607, "step": 9540 }, { "epoch": 0.5047435321476705, "grad_norm": 67.09737396240234, "learning_rate": 5e-06, "loss": 2.1768, "step": 9550 }, { "epoch": 0.5052720594064639, "grad_norm": 79.30492401123047, "learning_rate": 5e-06, "loss": 1.9907, "step": 9560 }, { "epoch": 0.5058005866652573, "grad_norm": 83.70542907714844, "learning_rate": 5e-06, "loss": 2.1211, "step": 9570 }, { "epoch": 0.5063291139240507, "grad_norm": 104.08589935302734, "learning_rate": 5e-06, "loss": 2.0412, "step": 9580 }, { "epoch": 0.506857641182844, "grad_norm": 99.36257934570312, "learning_rate": 5e-06, "loss": 2.153, "step": 9590 }, { "epoch": 0.5073861684416374, "grad_norm": 59.897186279296875, "learning_rate": 5e-06, "loss": 1.949, "step": 9600 }, { "epoch": 0.5079146957004308, "grad_norm": 83.39509582519531, "learning_rate": 5e-06, "loss": 2.1452, "step": 9610 }, { "epoch": 0.5084432229592242, "grad_norm": 77.48872375488281, "learning_rate": 5e-06, "loss": 2.4183, "step": 9620 }, { "epoch": 0.5089717502180175, "grad_norm": 181.36204528808594, "learning_rate": 5e-06, "loss": 1.6261, "step": 9630 }, { "epoch": 0.5095002774768108, "grad_norm": 78.51200866699219, "learning_rate": 5e-06, "loss": 1.678, "step": 9640 }, { "epoch": 0.5100288047356042, "grad_norm": 78.73798370361328, "learning_rate": 5e-06, "loss": 1.8126, "step": 9650 }, { "epoch": 0.5105573319943976, "grad_norm": 110.20191192626953, "learning_rate": 5e-06, "loss": 2.5273, "step": 9660 }, { "epoch": 0.5110858592531909, "grad_norm": 77.06707000732422, "learning_rate": 5e-06, "loss": 2.3741, "step": 9670 }, { "epoch": 0.5116143865119843, "grad_norm": 91.65406799316406, "learning_rate": 5e-06, "loss": 2.0246, "step": 9680 }, { "epoch": 0.5121429137707777, "grad_norm": 58.375728607177734, "learning_rate": 5e-06, "loss": 2.5248, "step": 9690 }, { "epoch": 0.5126714410295711, "grad_norm": 95.68407440185547, "learning_rate": 5e-06, "loss": 2.1825, "step": 9700 }, { "epoch": 0.5131999682883644, "grad_norm": 102.95429992675781, "learning_rate": 5e-06, "loss": 2.2337, "step": 9710 }, { "epoch": 0.5137284955471578, "grad_norm": 67.77655029296875, "learning_rate": 5e-06, "loss": 2.1342, "step": 9720 }, { "epoch": 0.5142570228059512, "grad_norm": 81.43525695800781, "learning_rate": 5e-06, "loss": 2.1805, "step": 9730 }, { "epoch": 0.5147855500647446, "grad_norm": 112.84664154052734, "learning_rate": 5e-06, "loss": 2.2736, "step": 9740 }, { "epoch": 0.515314077323538, "grad_norm": 72.68231201171875, "learning_rate": 5e-06, "loss": 1.8928, "step": 9750 }, { "epoch": 0.5158426045823313, "grad_norm": 90.49602508544922, "learning_rate": 5e-06, "loss": 1.763, "step": 9760 }, { "epoch": 0.5163711318411247, "grad_norm": 73.89015197753906, "learning_rate": 5e-06, "loss": 2.1687, "step": 9770 }, { "epoch": 0.5168996590999181, "grad_norm": 84.64232635498047, "learning_rate": 5e-06, "loss": 2.4035, "step": 9780 }, { "epoch": 0.5174281863587115, "grad_norm": 57.00529861450195, "learning_rate": 5e-06, "loss": 1.9486, "step": 9790 }, { "epoch": 0.5179567136175048, "grad_norm": 86.94971466064453, "learning_rate": 5e-06, "loss": 2.3296, "step": 9800 }, { "epoch": 0.5184852408762982, "grad_norm": 60.35567092895508, "learning_rate": 5e-06, "loss": 2.2497, "step": 9810 }, { "epoch": 0.5190137681350916, "grad_norm": 87.73572540283203, "learning_rate": 5e-06, "loss": 2.1453, "step": 9820 }, { "epoch": 0.519542295393885, "grad_norm": 94.50737762451172, "learning_rate": 5e-06, "loss": 2.0844, "step": 9830 }, { "epoch": 0.5200708226526783, "grad_norm": 73.4122314453125, "learning_rate": 5e-06, "loss": 2.546, "step": 9840 }, { "epoch": 0.5205993499114717, "grad_norm": 65.6158676147461, "learning_rate": 5e-06, "loss": 1.8924, "step": 9850 }, { "epoch": 0.5211278771702651, "grad_norm": 66.35785675048828, "learning_rate": 5e-06, "loss": 2.2063, "step": 9860 }, { "epoch": 0.5216564044290585, "grad_norm": 102.1550064086914, "learning_rate": 5e-06, "loss": 1.963, "step": 9870 }, { "epoch": 0.5221849316878518, "grad_norm": 100.68046569824219, "learning_rate": 5e-06, "loss": 1.9821, "step": 9880 }, { "epoch": 0.5227134589466452, "grad_norm": 102.52055358886719, "learning_rate": 5e-06, "loss": 2.0298, "step": 9890 }, { "epoch": 0.5232419862054385, "grad_norm": 73.86116790771484, "learning_rate": 5e-06, "loss": 1.9623, "step": 9900 }, { "epoch": 0.5237705134642319, "grad_norm": 67.47290802001953, "learning_rate": 5e-06, "loss": 2.0428, "step": 9910 }, { "epoch": 0.5242990407230252, "grad_norm": 52.87221908569336, "learning_rate": 5e-06, "loss": 2.1254, "step": 9920 }, { "epoch": 0.5248275679818186, "grad_norm": 71.35987091064453, "learning_rate": 5e-06, "loss": 2.1403, "step": 9930 }, { "epoch": 0.525356095240612, "grad_norm": 81.87875366210938, "learning_rate": 5e-06, "loss": 2.3268, "step": 9940 }, { "epoch": 0.5258846224994054, "grad_norm": 94.28164672851562, "learning_rate": 5e-06, "loss": 2.5542, "step": 9950 }, { "epoch": 0.5264131497581988, "grad_norm": 59.118553161621094, "learning_rate": 5e-06, "loss": 1.8183, "step": 9960 }, { "epoch": 0.5269416770169921, "grad_norm": 68.60144805908203, "learning_rate": 5e-06, "loss": 2.1065, "step": 9970 }, { "epoch": 0.5274702042757855, "grad_norm": 68.3833236694336, "learning_rate": 5e-06, "loss": 2.198, "step": 9980 }, { "epoch": 0.5279987315345789, "grad_norm": 81.69363403320312, "learning_rate": 5e-06, "loss": 1.8904, "step": 9990 }, { "epoch": 0.5285272587933723, "grad_norm": 140.57589721679688, "learning_rate": 5e-06, "loss": 1.7677, "step": 10000 }, { "epoch": 0.5285272587933723, "eval_loss": 1.9539854526519775, "eval_runtime": 38.0718, "eval_samples_per_second": 321.288, "eval_steps_per_second": 10.06, "eval_sts-dev_pearson_cosine": 0.8499343257941645, "eval_sts-dev_pearson_dot": 0.828003378799855, "eval_sts-dev_pearson_euclidean": 0.8561023720076497, "eval_sts-dev_pearson_manhattan": 0.8559923261114952, "eval_sts-dev_pearson_max": 0.8561023720076497, "eval_sts-dev_spearman_cosine": 0.8551519486771606, "eval_sts-dev_spearman_dot": 0.8260355087536708, "eval_sts-dev_spearman_euclidean": 0.8563010289340317, "eval_sts-dev_spearman_manhattan": 0.8561785986291325, "eval_sts-dev_spearman_max": 0.8563010289340317, "step": 10000 }, { "epoch": 0.5290557860521656, "grad_norm": 85.65283203125, "learning_rate": 5e-06, "loss": 2.0951, "step": 10010 }, { "epoch": 0.529584313310959, "grad_norm": 75.0933609008789, "learning_rate": 5e-06, "loss": 1.9752, "step": 10020 }, { "epoch": 0.5301128405697524, "grad_norm": 66.42445373535156, "learning_rate": 5e-06, "loss": 2.3603, "step": 10030 }, { "epoch": 0.5306413678285458, "grad_norm": 45.87712478637695, "learning_rate": 5e-06, "loss": 1.8831, "step": 10040 }, { "epoch": 0.5311698950873391, "grad_norm": 65.76874542236328, "learning_rate": 5e-06, "loss": 1.9157, "step": 10050 }, { "epoch": 0.5316984223461325, "grad_norm": 69.95355987548828, "learning_rate": 5e-06, "loss": 2.3116, "step": 10060 }, { "epoch": 0.5322269496049259, "grad_norm": 79.4979248046875, "learning_rate": 5e-06, "loss": 2.3408, "step": 10070 }, { "epoch": 0.5327554768637193, "grad_norm": 74.2297134399414, "learning_rate": 5e-06, "loss": 2.0343, "step": 10080 }, { "epoch": 0.5332840041225126, "grad_norm": 77.3371810913086, "learning_rate": 5e-06, "loss": 2.2694, "step": 10090 }, { "epoch": 0.533812531381306, "grad_norm": 85.7645263671875, "learning_rate": 5e-06, "loss": 2.1704, "step": 10100 }, { "epoch": 0.5343410586400994, "grad_norm": 81.91168975830078, "learning_rate": 5e-06, "loss": 2.09, "step": 10110 }, { "epoch": 0.5348695858988928, "grad_norm": 77.06794738769531, "learning_rate": 5e-06, "loss": 1.8177, "step": 10120 }, { "epoch": 0.5353981131576862, "grad_norm": 69.15670013427734, "learning_rate": 5e-06, "loss": 1.7114, "step": 10130 }, { "epoch": 0.5359266404164795, "grad_norm": 53.872222900390625, "learning_rate": 5e-06, "loss": 2.5503, "step": 10140 }, { "epoch": 0.5364551676752729, "grad_norm": 105.8225326538086, "learning_rate": 5e-06, "loss": 1.6709, "step": 10150 }, { "epoch": 0.5369836949340662, "grad_norm": 74.62741088867188, "learning_rate": 5e-06, "loss": 2.3356, "step": 10160 }, { "epoch": 0.5375122221928595, "grad_norm": 65.19194793701172, "learning_rate": 5e-06, "loss": 1.968, "step": 10170 }, { "epoch": 0.5380407494516529, "grad_norm": 55.52613830566406, "learning_rate": 5e-06, "loss": 1.9661, "step": 10180 }, { "epoch": 0.5385692767104463, "grad_norm": 58.993064880371094, "learning_rate": 5e-06, "loss": 1.9646, "step": 10190 }, { "epoch": 0.5390978039692397, "grad_norm": 71.94892883300781, "learning_rate": 5e-06, "loss": 2.1833, "step": 10200 }, { "epoch": 0.539626331228033, "grad_norm": 67.49871063232422, "learning_rate": 5e-06, "loss": 2.02, "step": 10210 }, { "epoch": 0.5401548584868264, "grad_norm": 86.75818634033203, "learning_rate": 5e-06, "loss": 2.3554, "step": 10220 }, { "epoch": 0.5406833857456198, "grad_norm": 63.222110748291016, "learning_rate": 5e-06, "loss": 1.9534, "step": 10230 }, { "epoch": 0.5412119130044132, "grad_norm": 91.1675796508789, "learning_rate": 5e-06, "loss": 2.1227, "step": 10240 }, { "epoch": 0.5417404402632066, "grad_norm": 58.55626678466797, "learning_rate": 5e-06, "loss": 2.0639, "step": 10250 }, { "epoch": 0.5422689675219999, "grad_norm": 77.57384490966797, "learning_rate": 5e-06, "loss": 1.964, "step": 10260 }, { "epoch": 0.5427974947807933, "grad_norm": 90.95801544189453, "learning_rate": 5e-06, "loss": 2.1593, "step": 10270 }, { "epoch": 0.5433260220395867, "grad_norm": 101.7471694946289, "learning_rate": 5e-06, "loss": 2.0885, "step": 10280 }, { "epoch": 0.5438545492983801, "grad_norm": 86.3983154296875, "learning_rate": 5e-06, "loss": 2.4906, "step": 10290 }, { "epoch": 0.5443830765571734, "grad_norm": 101.63555145263672, "learning_rate": 5e-06, "loss": 2.0691, "step": 10300 }, { "epoch": 0.5449116038159668, "grad_norm": 61.16963577270508, "learning_rate": 5e-06, "loss": 2.0257, "step": 10310 }, { "epoch": 0.5454401310747602, "grad_norm": 71.81607818603516, "learning_rate": 5e-06, "loss": 2.1725, "step": 10320 }, { "epoch": 0.5459686583335536, "grad_norm": 81.55643463134766, "learning_rate": 5e-06, "loss": 2.1807, "step": 10330 }, { "epoch": 0.546497185592347, "grad_norm": 69.73988342285156, "learning_rate": 5e-06, "loss": 2.7032, "step": 10340 }, { "epoch": 0.5470257128511403, "grad_norm": 60.26805877685547, "learning_rate": 5e-06, "loss": 1.9263, "step": 10350 }, { "epoch": 0.5475542401099337, "grad_norm": 75.18399810791016, "learning_rate": 5e-06, "loss": 2.2478, "step": 10360 }, { "epoch": 0.5480827673687271, "grad_norm": 101.49771118164062, "learning_rate": 5e-06, "loss": 2.0125, "step": 10370 }, { "epoch": 0.5486112946275205, "grad_norm": 93.70978546142578, "learning_rate": 5e-06, "loss": 1.8108, "step": 10380 }, { "epoch": 0.5491398218863138, "grad_norm": 84.1195297241211, "learning_rate": 5e-06, "loss": 2.4188, "step": 10390 }, { "epoch": 0.5496683491451072, "grad_norm": 100.32792663574219, "learning_rate": 5e-06, "loss": 2.3122, "step": 10400 }, { "epoch": 0.5501968764039006, "grad_norm": 86.36019134521484, "learning_rate": 5e-06, "loss": 2.1894, "step": 10410 }, { "epoch": 0.550725403662694, "grad_norm": 75.76577758789062, "learning_rate": 5e-06, "loss": 2.7591, "step": 10420 }, { "epoch": 0.5512539309214872, "grad_norm": 80.83454132080078, "learning_rate": 5e-06, "loss": 2.2437, "step": 10430 }, { "epoch": 0.5517824581802806, "grad_norm": 94.29025268554688, "learning_rate": 5e-06, "loss": 2.134, "step": 10440 }, { "epoch": 0.552310985439074, "grad_norm": 68.68152618408203, "learning_rate": 5e-06, "loss": 1.6311, "step": 10450 }, { "epoch": 0.5528395126978674, "grad_norm": 72.70677947998047, "learning_rate": 5e-06, "loss": 2.1808, "step": 10460 }, { "epoch": 0.5533680399566607, "grad_norm": 76.77523803710938, "learning_rate": 5e-06, "loss": 1.9564, "step": 10470 }, { "epoch": 0.5538965672154541, "grad_norm": 91.957275390625, "learning_rate": 5e-06, "loss": 2.2802, "step": 10480 }, { "epoch": 0.5544250944742475, "grad_norm": 79.1895523071289, "learning_rate": 5e-06, "loss": 2.1426, "step": 10490 }, { "epoch": 0.5549536217330409, "grad_norm": 79.99895477294922, "learning_rate": 5e-06, "loss": 2.1947, "step": 10500 }, { "epoch": 0.5554821489918342, "grad_norm": 68.53219604492188, "learning_rate": 5e-06, "loss": 1.9626, "step": 10510 }, { "epoch": 0.5560106762506276, "grad_norm": 108.16405487060547, "learning_rate": 5e-06, "loss": 2.0372, "step": 10520 }, { "epoch": 0.556539203509421, "grad_norm": 78.80455780029297, "learning_rate": 5e-06, "loss": 2.0687, "step": 10530 }, { "epoch": 0.5570677307682144, "grad_norm": 96.68619537353516, "learning_rate": 5e-06, "loss": 1.7241, "step": 10540 }, { "epoch": 0.5575962580270077, "grad_norm": 53.9423713684082, "learning_rate": 5e-06, "loss": 2.0803, "step": 10550 }, { "epoch": 0.5581247852858011, "grad_norm": 107.8885726928711, "learning_rate": 5e-06, "loss": 1.8777, "step": 10560 }, { "epoch": 0.5586533125445945, "grad_norm": 95.13790130615234, "learning_rate": 5e-06, "loss": 2.4981, "step": 10570 }, { "epoch": 0.5591818398033879, "grad_norm": 68.58380126953125, "learning_rate": 5e-06, "loss": 2.516, "step": 10580 }, { "epoch": 0.5597103670621812, "grad_norm": 71.80757141113281, "learning_rate": 5e-06, "loss": 2.0462, "step": 10590 }, { "epoch": 0.5602388943209746, "grad_norm": 73.8110580444336, "learning_rate": 5e-06, "loss": 1.6649, "step": 10600 }, { "epoch": 0.560767421579768, "grad_norm": 91.26495361328125, "learning_rate": 5e-06, "loss": 2.1002, "step": 10610 }, { "epoch": 0.5612959488385614, "grad_norm": 87.0208511352539, "learning_rate": 5e-06, "loss": 2.1324, "step": 10620 }, { "epoch": 0.5618244760973548, "grad_norm": 119.60919952392578, "learning_rate": 5e-06, "loss": 1.9618, "step": 10630 }, { "epoch": 0.5623530033561481, "grad_norm": 88.09393310546875, "learning_rate": 5e-06, "loss": 2.3026, "step": 10640 }, { "epoch": 0.5628815306149415, "grad_norm": 110.76757049560547, "learning_rate": 5e-06, "loss": 2.2034, "step": 10650 }, { "epoch": 0.5634100578737349, "grad_norm": 65.5195083618164, "learning_rate": 5e-06, "loss": 2.0022, "step": 10660 }, { "epoch": 0.5639385851325283, "grad_norm": 75.06954193115234, "learning_rate": 5e-06, "loss": 2.1413, "step": 10670 }, { "epoch": 0.5644671123913216, "grad_norm": 99.46965026855469, "learning_rate": 5e-06, "loss": 2.6467, "step": 10680 }, { "epoch": 0.5649956396501149, "grad_norm": 90.08120727539062, "learning_rate": 5e-06, "loss": 2.07, "step": 10690 }, { "epoch": 0.5655241669089083, "grad_norm": 88.28016662597656, "learning_rate": 5e-06, "loss": 2.1584, "step": 10700 }, { "epoch": 0.5660526941677017, "grad_norm": 79.94560241699219, "learning_rate": 5e-06, "loss": 2.4212, "step": 10710 }, { "epoch": 0.566581221426495, "grad_norm": 63.54833221435547, "learning_rate": 5e-06, "loss": 2.0421, "step": 10720 }, { "epoch": 0.5671097486852884, "grad_norm": 80.46841430664062, "learning_rate": 5e-06, "loss": 1.7682, "step": 10730 }, { "epoch": 0.5676382759440818, "grad_norm": 64.35215759277344, "learning_rate": 5e-06, "loss": 1.8888, "step": 10740 }, { "epoch": 0.5681668032028752, "grad_norm": 93.6233139038086, "learning_rate": 5e-06, "loss": 2.0324, "step": 10750 }, { "epoch": 0.5686953304616685, "grad_norm": 72.54785919189453, "learning_rate": 5e-06, "loss": 2.514, "step": 10760 }, { "epoch": 0.5692238577204619, "grad_norm": 56.67709732055664, "learning_rate": 5e-06, "loss": 2.1098, "step": 10770 }, { "epoch": 0.5697523849792553, "grad_norm": 72.2990951538086, "learning_rate": 5e-06, "loss": 1.879, "step": 10780 }, { "epoch": 0.5702809122380487, "grad_norm": 76.36679077148438, "learning_rate": 5e-06, "loss": 2.2944, "step": 10790 }, { "epoch": 0.570809439496842, "grad_norm": 95.23561096191406, "learning_rate": 5e-06, "loss": 2.2605, "step": 10800 }, { "epoch": 0.5713379667556354, "grad_norm": 84.22502136230469, "learning_rate": 5e-06, "loss": 1.9865, "step": 10810 }, { "epoch": 0.5718664940144288, "grad_norm": 73.37518310546875, "learning_rate": 5e-06, "loss": 2.1831, "step": 10820 }, { "epoch": 0.5723950212732222, "grad_norm": 70.90452575683594, "learning_rate": 5e-06, "loss": 2.1799, "step": 10830 }, { "epoch": 0.5729235485320155, "grad_norm": 97.03677368164062, "learning_rate": 5e-06, "loss": 2.5063, "step": 10840 }, { "epoch": 0.5734520757908089, "grad_norm": 70.40719604492188, "learning_rate": 5e-06, "loss": 1.9118, "step": 10850 }, { "epoch": 0.5739806030496023, "grad_norm": 67.69002532958984, "learning_rate": 5e-06, "loss": 2.1921, "step": 10860 }, { "epoch": 0.5745091303083957, "grad_norm": 100.77033233642578, "learning_rate": 5e-06, "loss": 1.9744, "step": 10870 }, { "epoch": 0.575037657567189, "grad_norm": 76.27334594726562, "learning_rate": 5e-06, "loss": 2.1143, "step": 10880 }, { "epoch": 0.5755661848259824, "grad_norm": 95.56637573242188, "learning_rate": 5e-06, "loss": 1.6315, "step": 10890 }, { "epoch": 0.5760947120847758, "grad_norm": 85.03011322021484, "learning_rate": 5e-06, "loss": 2.2336, "step": 10900 }, { "epoch": 0.5766232393435692, "grad_norm": 79.77254486083984, "learning_rate": 5e-06, "loss": 2.1237, "step": 10910 }, { "epoch": 0.5771517666023626, "grad_norm": 105.82007598876953, "learning_rate": 5e-06, "loss": 2.0204, "step": 10920 }, { "epoch": 0.5776802938611559, "grad_norm": 74.43701171875, "learning_rate": 5e-06, "loss": 1.8126, "step": 10930 }, { "epoch": 0.5782088211199493, "grad_norm": 69.89442443847656, "learning_rate": 5e-06, "loss": 2.3253, "step": 10940 }, { "epoch": 0.5787373483787427, "grad_norm": 102.32848358154297, "learning_rate": 5e-06, "loss": 1.7748, "step": 10950 }, { "epoch": 0.579265875637536, "grad_norm": 73.18042755126953, "learning_rate": 5e-06, "loss": 1.8381, "step": 10960 }, { "epoch": 0.5797944028963293, "grad_norm": 66.38732147216797, "learning_rate": 5e-06, "loss": 2.0043, "step": 10970 }, { "epoch": 0.5803229301551227, "grad_norm": 116.1729507446289, "learning_rate": 5e-06, "loss": 2.0692, "step": 10980 }, { "epoch": 0.5808514574139161, "grad_norm": 72.55099487304688, "learning_rate": 5e-06, "loss": 2.098, "step": 10990 }, { "epoch": 0.5813799846727095, "grad_norm": 83.53316497802734, "learning_rate": 5e-06, "loss": 2.0639, "step": 11000 }, { "epoch": 0.5813799846727095, "eval_loss": 1.8978537321090698, "eval_runtime": 39.4283, "eval_samples_per_second": 310.234, "eval_steps_per_second": 9.714, "eval_sts-dev_pearson_cosine": 0.8452087964841841, "eval_sts-dev_pearson_dot": 0.8213640238009419, "eval_sts-dev_pearson_euclidean": 0.8511831639647767, "eval_sts-dev_pearson_manhattan": 0.8506928896487167, "eval_sts-dev_pearson_max": 0.8511831639647767, "eval_sts-dev_spearman_cosine": 0.8495537218268623, "eval_sts-dev_spearman_dot": 0.8197924521563636, "eval_sts-dev_spearman_euclidean": 0.8523157140611485, "eval_sts-dev_spearman_manhattan": 0.8515640133054317, "eval_sts-dev_spearman_max": 0.8523157140611485, "step": 11000 }, { "epoch": 0.5819085119315028, "grad_norm": 59.2221565246582, "learning_rate": 5e-06, "loss": 2.1486, "step": 11010 }, { "epoch": 0.5824370391902962, "grad_norm": 93.96782684326172, "learning_rate": 5e-06, "loss": 2.182, "step": 11020 }, { "epoch": 0.5829655664490896, "grad_norm": 99.84814453125, "learning_rate": 5e-06, "loss": 2.1611, "step": 11030 }, { "epoch": 0.583494093707883, "grad_norm": 95.67292022705078, "learning_rate": 5e-06, "loss": 2.079, "step": 11040 }, { "epoch": 0.5840226209666763, "grad_norm": 84.619140625, "learning_rate": 5e-06, "loss": 2.0544, "step": 11050 }, { "epoch": 0.5845511482254697, "grad_norm": 65.70954132080078, "learning_rate": 5e-06, "loss": 2.0646, "step": 11060 }, { "epoch": 0.5850796754842631, "grad_norm": 73.69808959960938, "learning_rate": 5e-06, "loss": 2.0739, "step": 11070 }, { "epoch": 0.5856082027430565, "grad_norm": 58.25351333618164, "learning_rate": 5e-06, "loss": 1.9585, "step": 11080 }, { "epoch": 0.5861367300018498, "grad_norm": 81.82132720947266, "learning_rate": 5e-06, "loss": 1.9072, "step": 11090 }, { "epoch": 0.5866652572606432, "grad_norm": 69.7490463256836, "learning_rate": 5e-06, "loss": 1.9004, "step": 11100 }, { "epoch": 0.5871937845194366, "grad_norm": 67.66670227050781, "learning_rate": 5e-06, "loss": 2.4679, "step": 11110 }, { "epoch": 0.58772231177823, "grad_norm": 69.42993927001953, "learning_rate": 5e-06, "loss": 1.9036, "step": 11120 }, { "epoch": 0.5882508390370234, "grad_norm": 71.51252746582031, "learning_rate": 5e-06, "loss": 1.9639, "step": 11130 }, { "epoch": 0.5887793662958167, "grad_norm": 96.71595001220703, "learning_rate": 5e-06, "loss": 1.8777, "step": 11140 }, { "epoch": 0.5893078935546101, "grad_norm": 114.90606689453125, "learning_rate": 5e-06, "loss": 2.1693, "step": 11150 }, { "epoch": 0.5898364208134035, "grad_norm": 66.78081512451172, "learning_rate": 5e-06, "loss": 1.8278, "step": 11160 }, { "epoch": 0.5903649480721969, "grad_norm": 114.71138763427734, "learning_rate": 5e-06, "loss": 2.5077, "step": 11170 }, { "epoch": 0.5908934753309902, "grad_norm": 65.72053527832031, "learning_rate": 5e-06, "loss": 1.9915, "step": 11180 }, { "epoch": 0.5914220025897836, "grad_norm": 96.37431335449219, "learning_rate": 5e-06, "loss": 1.8883, "step": 11190 }, { "epoch": 0.591950529848577, "grad_norm": 87.63094329833984, "learning_rate": 5e-06, "loss": 2.2696, "step": 11200 }, { "epoch": 0.5924790571073704, "grad_norm": 81.29655456542969, "learning_rate": 5e-06, "loss": 2.2666, "step": 11210 }, { "epoch": 0.5930075843661636, "grad_norm": 63.82039260864258, "learning_rate": 5e-06, "loss": 1.8786, "step": 11220 }, { "epoch": 0.593536111624957, "grad_norm": 57.89633560180664, "learning_rate": 5e-06, "loss": 2.0693, "step": 11230 }, { "epoch": 0.5940646388837504, "grad_norm": 59.93412780761719, "learning_rate": 5e-06, "loss": 2.4056, "step": 11240 }, { "epoch": 0.5945931661425438, "grad_norm": 46.310096740722656, "learning_rate": 5e-06, "loss": 1.7391, "step": 11250 }, { "epoch": 0.5951216934013371, "grad_norm": 53.82455825805664, "learning_rate": 5e-06, "loss": 1.8847, "step": 11260 }, { "epoch": 0.5956502206601305, "grad_norm": 71.66796875, "learning_rate": 5e-06, "loss": 2.3905, "step": 11270 }, { "epoch": 0.5961787479189239, "grad_norm": 66.03099822998047, "learning_rate": 5e-06, "loss": 2.0135, "step": 11280 }, { "epoch": 0.5967072751777173, "grad_norm": 67.79035186767578, "learning_rate": 5e-06, "loss": 2.0468, "step": 11290 }, { "epoch": 0.5972358024365106, "grad_norm": 72.20701599121094, "learning_rate": 5e-06, "loss": 1.7526, "step": 11300 }, { "epoch": 0.597764329695304, "grad_norm": 72.53498077392578, "learning_rate": 5e-06, "loss": 2.1148, "step": 11310 }, { "epoch": 0.5982928569540974, "grad_norm": 88.7719497680664, "learning_rate": 5e-06, "loss": 2.0166, "step": 11320 }, { "epoch": 0.5988213842128908, "grad_norm": 107.517578125, "learning_rate": 5e-06, "loss": 2.0425, "step": 11330 }, { "epoch": 0.5993499114716841, "grad_norm": 74.0730209350586, "learning_rate": 5e-06, "loss": 2.2274, "step": 11340 }, { "epoch": 0.5998784387304775, "grad_norm": 65.8453598022461, "learning_rate": 5e-06, "loss": 2.1106, "step": 11350 }, { "epoch": 0.6004069659892709, "grad_norm": 104.13138580322266, "learning_rate": 5e-06, "loss": 2.1045, "step": 11360 }, { "epoch": 0.6009354932480643, "grad_norm": 80.7543716430664, "learning_rate": 5e-06, "loss": 1.7801, "step": 11370 }, { "epoch": 0.6014640205068577, "grad_norm": 87.68011474609375, "learning_rate": 5e-06, "loss": 1.9665, "step": 11380 }, { "epoch": 0.601992547765651, "grad_norm": 70.28311157226562, "learning_rate": 5e-06, "loss": 2.1777, "step": 11390 }, { "epoch": 0.6025210750244444, "grad_norm": 79.18292999267578, "learning_rate": 5e-06, "loss": 2.4798, "step": 11400 }, { "epoch": 0.6030496022832378, "grad_norm": 79.25562286376953, "learning_rate": 5e-06, "loss": 2.2001, "step": 11410 }, { "epoch": 0.6035781295420312, "grad_norm": 95.41483306884766, "learning_rate": 5e-06, "loss": 1.9067, "step": 11420 }, { "epoch": 0.6041066568008245, "grad_norm": 61.51045608520508, "learning_rate": 5e-06, "loss": 1.7085, "step": 11430 }, { "epoch": 0.6046351840596179, "grad_norm": 102.42302703857422, "learning_rate": 5e-06, "loss": 1.8458, "step": 11440 }, { "epoch": 0.6051637113184113, "grad_norm": 81.78809356689453, "learning_rate": 5e-06, "loss": 2.1554, "step": 11450 }, { "epoch": 0.6056922385772047, "grad_norm": 87.76679229736328, "learning_rate": 5e-06, "loss": 1.9733, "step": 11460 }, { "epoch": 0.606220765835998, "grad_norm": 78.5721664428711, "learning_rate": 5e-06, "loss": 2.2038, "step": 11470 }, { "epoch": 0.6067492930947913, "grad_norm": 82.00606536865234, "learning_rate": 5e-06, "loss": 1.9311, "step": 11480 }, { "epoch": 0.6072778203535847, "grad_norm": 82.4979019165039, "learning_rate": 5e-06, "loss": 2.0353, "step": 11490 }, { "epoch": 0.6078063476123781, "grad_norm": 111.72450256347656, "learning_rate": 5e-06, "loss": 1.9198, "step": 11500 }, { "epoch": 0.6083348748711714, "grad_norm": 75.56639099121094, "learning_rate": 5e-06, "loss": 2.2445, "step": 11510 }, { "epoch": 0.6088634021299648, "grad_norm": 98.55653381347656, "learning_rate": 5e-06, "loss": 1.937, "step": 11520 }, { "epoch": 0.6093919293887582, "grad_norm": 59.58787155151367, "learning_rate": 5e-06, "loss": 1.8261, "step": 11530 }, { "epoch": 0.6099204566475516, "grad_norm": 81.2514877319336, "learning_rate": 5e-06, "loss": 1.9965, "step": 11540 }, { "epoch": 0.6104489839063449, "grad_norm": 79.58076477050781, "learning_rate": 5e-06, "loss": 2.2201, "step": 11550 }, { "epoch": 0.6109775111651383, "grad_norm": 70.32428741455078, "learning_rate": 5e-06, "loss": 2.2935, "step": 11560 }, { "epoch": 0.6115060384239317, "grad_norm": 62.41090774536133, "learning_rate": 5e-06, "loss": 2.1433, "step": 11570 }, { "epoch": 0.6120345656827251, "grad_norm": 66.6859359741211, "learning_rate": 5e-06, "loss": 2.1784, "step": 11580 }, { "epoch": 0.6125630929415185, "grad_norm": 80.27626037597656, "learning_rate": 5e-06, "loss": 1.9676, "step": 11590 }, { "epoch": 0.6130916202003118, "grad_norm": 89.901611328125, "learning_rate": 5e-06, "loss": 2.3047, "step": 11600 }, { "epoch": 0.6136201474591052, "grad_norm": 82.42327117919922, "learning_rate": 5e-06, "loss": 1.8418, "step": 11610 }, { "epoch": 0.6141486747178986, "grad_norm": 59.45391082763672, "learning_rate": 5e-06, "loss": 1.9098, "step": 11620 }, { "epoch": 0.614677201976692, "grad_norm": 75.94921112060547, "learning_rate": 5e-06, "loss": 1.6835, "step": 11630 }, { "epoch": 0.6152057292354853, "grad_norm": 89.63483428955078, "learning_rate": 5e-06, "loss": 1.8062, "step": 11640 }, { "epoch": 0.6157342564942787, "grad_norm": 113.56159973144531, "learning_rate": 5e-06, "loss": 1.7458, "step": 11650 }, { "epoch": 0.6162627837530721, "grad_norm": 357.4466552734375, "learning_rate": 5e-06, "loss": 1.7197, "step": 11660 }, { "epoch": 0.6167913110118655, "grad_norm": 101.31066131591797, "learning_rate": 5e-06, "loss": 2.3869, "step": 11670 }, { "epoch": 0.6173198382706588, "grad_norm": 140.31748962402344, "learning_rate": 5e-06, "loss": 2.172, "step": 11680 }, { "epoch": 0.6178483655294522, "grad_norm": 65.51746368408203, "learning_rate": 5e-06, "loss": 2.183, "step": 11690 }, { "epoch": 0.6183768927882456, "grad_norm": 67.61272430419922, "learning_rate": 5e-06, "loss": 1.9627, "step": 11700 }, { "epoch": 0.618905420047039, "grad_norm": 88.16532897949219, "learning_rate": 5e-06, "loss": 1.8449, "step": 11710 }, { "epoch": 0.6194339473058323, "grad_norm": 98.38895416259766, "learning_rate": 5e-06, "loss": 2.0688, "step": 11720 }, { "epoch": 0.6199624745646257, "grad_norm": 62.77302551269531, "learning_rate": 5e-06, "loss": 2.0803, "step": 11730 }, { "epoch": 0.6204910018234191, "grad_norm": 108.11663818359375, "learning_rate": 5e-06, "loss": 1.8961, "step": 11740 }, { "epoch": 0.6210195290822124, "grad_norm": 84.52525329589844, "learning_rate": 5e-06, "loss": 1.8968, "step": 11750 }, { "epoch": 0.6215480563410057, "grad_norm": 60.99509811401367, "learning_rate": 5e-06, "loss": 2.2872, "step": 11760 }, { "epoch": 0.6220765835997991, "grad_norm": 83.09530639648438, "learning_rate": 5e-06, "loss": 1.8778, "step": 11770 }, { "epoch": 0.6226051108585925, "grad_norm": 79.84248352050781, "learning_rate": 5e-06, "loss": 2.1743, "step": 11780 }, { "epoch": 0.6231336381173859, "grad_norm": 71.4755630493164, "learning_rate": 5e-06, "loss": 2.036, "step": 11790 }, { "epoch": 0.6236621653761792, "grad_norm": 87.0229721069336, "learning_rate": 5e-06, "loss": 1.7392, "step": 11800 }, { "epoch": 0.6241906926349726, "grad_norm": 90.05377960205078, "learning_rate": 5e-06, "loss": 1.9301, "step": 11810 }, { "epoch": 0.624719219893766, "grad_norm": 90.21224975585938, "learning_rate": 5e-06, "loss": 1.8089, "step": 11820 }, { "epoch": 0.6252477471525594, "grad_norm": 68.72386932373047, "learning_rate": 5e-06, "loss": 1.9279, "step": 11830 }, { "epoch": 0.6257762744113528, "grad_norm": 71.66873931884766, "learning_rate": 5e-06, "loss": 1.8997, "step": 11840 }, { "epoch": 0.6263048016701461, "grad_norm": 101.8065414428711, "learning_rate": 5e-06, "loss": 2.1427, "step": 11850 }, { "epoch": 0.6268333289289395, "grad_norm": 109.1715087890625, "learning_rate": 5e-06, "loss": 2.004, "step": 11860 }, { "epoch": 0.6273618561877329, "grad_norm": 91.91815185546875, "learning_rate": 5e-06, "loss": 2.148, "step": 11870 }, { "epoch": 0.6278903834465263, "grad_norm": 66.95806884765625, "learning_rate": 5e-06, "loss": 2.0547, "step": 11880 }, { "epoch": 0.6284189107053196, "grad_norm": 82.94200134277344, "learning_rate": 5e-06, "loss": 1.9453, "step": 11890 }, { "epoch": 0.628947437964113, "grad_norm": 61.771705627441406, "learning_rate": 5e-06, "loss": 2.1952, "step": 11900 }, { "epoch": 0.6294759652229064, "grad_norm": 60.75806427001953, "learning_rate": 5e-06, "loss": 2.2278, "step": 11910 }, { "epoch": 0.6300044924816998, "grad_norm": 110.42340087890625, "learning_rate": 5e-06, "loss": 2.1061, "step": 11920 }, { "epoch": 0.6305330197404931, "grad_norm": 84.47340393066406, "learning_rate": 5e-06, "loss": 1.8044, "step": 11930 }, { "epoch": 0.6310615469992865, "grad_norm": 66.05319213867188, "learning_rate": 5e-06, "loss": 1.8451, "step": 11940 }, { "epoch": 0.6315900742580799, "grad_norm": 88.146240234375, "learning_rate": 5e-06, "loss": 2.2427, "step": 11950 }, { "epoch": 0.6321186015168733, "grad_norm": 85.69880676269531, "learning_rate": 5e-06, "loss": 1.7752, "step": 11960 }, { "epoch": 0.6326471287756666, "grad_norm": 78.68860626220703, "learning_rate": 5e-06, "loss": 2.0104, "step": 11970 }, { "epoch": 0.63317565603446, "grad_norm": 99.48846435546875, "learning_rate": 5e-06, "loss": 2.4741, "step": 11980 }, { "epoch": 0.6337041832932534, "grad_norm": 85.57804107666016, "learning_rate": 5e-06, "loss": 2.3391, "step": 11990 }, { "epoch": 0.6342327105520468, "grad_norm": 64.55311584472656, "learning_rate": 5e-06, "loss": 2.0381, "step": 12000 }, { "epoch": 0.6342327105520468, "eval_loss": 1.8542531728744507, "eval_runtime": 42.3819, "eval_samples_per_second": 288.614, "eval_steps_per_second": 9.037, "eval_sts-dev_pearson_cosine": 0.8522035258114219, "eval_sts-dev_pearson_dot": 0.8253500570545005, "eval_sts-dev_pearson_euclidean": 0.856500135555142, "eval_sts-dev_pearson_manhattan": 0.8560532317449714, "eval_sts-dev_pearson_max": 0.856500135555142, "eval_sts-dev_spearman_cosine": 0.8561628729175815, "eval_sts-dev_spearman_dot": 0.8215731407667729, "eval_sts-dev_spearman_euclidean": 0.858455764578477, "eval_sts-dev_spearman_manhattan": 0.8577401397224773, "eval_sts-dev_spearman_max": 0.858455764578477, "step": 12000 }, { "epoch": 0.63476123781084, "grad_norm": 50.43362808227539, "learning_rate": 5e-06, "loss": 1.8344, "step": 12010 }, { "epoch": 0.6352897650696334, "grad_norm": 57.333984375, "learning_rate": 5e-06, "loss": 1.9874, "step": 12020 }, { "epoch": 0.6358182923284268, "grad_norm": 75.91407012939453, "learning_rate": 5e-06, "loss": 1.7864, "step": 12030 }, { "epoch": 0.6363468195872202, "grad_norm": 80.71363830566406, "learning_rate": 5e-06, "loss": 1.8547, "step": 12040 }, { "epoch": 0.6368753468460135, "grad_norm": 65.81807708740234, "learning_rate": 5e-06, "loss": 2.2635, "step": 12050 }, { "epoch": 0.6374038741048069, "grad_norm": 85.16212463378906, "learning_rate": 5e-06, "loss": 1.8673, "step": 12060 }, { "epoch": 0.6379324013636003, "grad_norm": 60.951480865478516, "learning_rate": 5e-06, "loss": 1.8329, "step": 12070 }, { "epoch": 0.6384609286223937, "grad_norm": 51.626564025878906, "learning_rate": 5e-06, "loss": 1.9495, "step": 12080 }, { "epoch": 0.638989455881187, "grad_norm": 46.31781005859375, "learning_rate": 5e-06, "loss": 2.0415, "step": 12090 }, { "epoch": 0.6395179831399804, "grad_norm": 104.44295501708984, "learning_rate": 5e-06, "loss": 2.3783, "step": 12100 }, { "epoch": 0.6400465103987738, "grad_norm": 107.35517883300781, "learning_rate": 5e-06, "loss": 2.0993, "step": 12110 }, { "epoch": 0.6405750376575672, "grad_norm": 88.35047149658203, "learning_rate": 5e-06, "loss": 2.1413, "step": 12120 }, { "epoch": 0.6411035649163606, "grad_norm": 66.21955871582031, "learning_rate": 5e-06, "loss": 1.7323, "step": 12130 }, { "epoch": 0.6416320921751539, "grad_norm": 95.03646087646484, "learning_rate": 5e-06, "loss": 1.8732, "step": 12140 }, { "epoch": 0.6421606194339473, "grad_norm": 81.33208465576172, "learning_rate": 5e-06, "loss": 1.935, "step": 12150 }, { "epoch": 0.6426891466927407, "grad_norm": 85.91375732421875, "learning_rate": 5e-06, "loss": 2.2395, "step": 12160 }, { "epoch": 0.6432176739515341, "grad_norm": 67.06218719482422, "learning_rate": 5e-06, "loss": 1.6384, "step": 12170 }, { "epoch": 0.6437462012103274, "grad_norm": 111.14989471435547, "learning_rate": 5e-06, "loss": 2.1973, "step": 12180 }, { "epoch": 0.6442747284691208, "grad_norm": 87.77964782714844, "learning_rate": 5e-06, "loss": 2.0399, "step": 12190 }, { "epoch": 0.6448032557279142, "grad_norm": 60.48622131347656, "learning_rate": 5e-06, "loss": 1.8922, "step": 12200 }, { "epoch": 0.6453317829867076, "grad_norm": 112.66234588623047, "learning_rate": 5e-06, "loss": 1.9138, "step": 12210 }, { "epoch": 0.645860310245501, "grad_norm": 91.74418640136719, "learning_rate": 5e-06, "loss": 1.8753, "step": 12220 }, { "epoch": 0.6463888375042943, "grad_norm": 54.61818313598633, "learning_rate": 5e-06, "loss": 2.1754, "step": 12230 }, { "epoch": 0.6469173647630877, "grad_norm": 89.18709564208984, "learning_rate": 5e-06, "loss": 1.9337, "step": 12240 }, { "epoch": 0.6474458920218811, "grad_norm": 66.5029067993164, "learning_rate": 5e-06, "loss": 1.9203, "step": 12250 }, { "epoch": 0.6479744192806745, "grad_norm": 83.1377944946289, "learning_rate": 5e-06, "loss": 2.0738, "step": 12260 }, { "epoch": 0.6485029465394678, "grad_norm": 81.12735748291016, "learning_rate": 5e-06, "loss": 1.8897, "step": 12270 }, { "epoch": 0.6490314737982611, "grad_norm": 88.5683364868164, "learning_rate": 5e-06, "loss": 2.1153, "step": 12280 }, { "epoch": 0.6495600010570545, "grad_norm": 79.2741470336914, "learning_rate": 5e-06, "loss": 2.1041, "step": 12290 }, { "epoch": 0.6500885283158478, "grad_norm": 67.34881591796875, "learning_rate": 5e-06, "loss": 1.9919, "step": 12300 }, { "epoch": 0.6506170555746412, "grad_norm": 58.59946060180664, "learning_rate": 5e-06, "loss": 1.9593, "step": 12310 }, { "epoch": 0.6511455828334346, "grad_norm": 86.0778579711914, "learning_rate": 5e-06, "loss": 2.0696, "step": 12320 }, { "epoch": 0.651674110092228, "grad_norm": 75.9932861328125, "learning_rate": 5e-06, "loss": 2.1404, "step": 12330 }, { "epoch": 0.6522026373510214, "grad_norm": 67.0188980102539, "learning_rate": 5e-06, "loss": 2.219, "step": 12340 }, { "epoch": 0.6527311646098147, "grad_norm": 78.68538665771484, "learning_rate": 5e-06, "loss": 2.2225, "step": 12350 }, { "epoch": 0.6532596918686081, "grad_norm": 104.84745025634766, "learning_rate": 5e-06, "loss": 2.3324, "step": 12360 }, { "epoch": 0.6537882191274015, "grad_norm": 70.46642303466797, "learning_rate": 5e-06, "loss": 2.1853, "step": 12370 }, { "epoch": 0.6543167463861949, "grad_norm": 91.52671813964844, "learning_rate": 5e-06, "loss": 2.1468, "step": 12380 }, { "epoch": 0.6548452736449882, "grad_norm": 75.09017181396484, "learning_rate": 5e-06, "loss": 1.899, "step": 12390 }, { "epoch": 0.6553738009037816, "grad_norm": 69.65746307373047, "learning_rate": 5e-06, "loss": 1.8529, "step": 12400 }, { "epoch": 0.655902328162575, "grad_norm": 64.5865478515625, "learning_rate": 5e-06, "loss": 1.7523, "step": 12410 }, { "epoch": 0.6564308554213684, "grad_norm": 74.995849609375, "learning_rate": 5e-06, "loss": 1.7441, "step": 12420 }, { "epoch": 0.6569593826801617, "grad_norm": 106.54707336425781, "learning_rate": 5e-06, "loss": 2.1761, "step": 12430 }, { "epoch": 0.6574879099389551, "grad_norm": 79.5743408203125, "learning_rate": 5e-06, "loss": 1.9019, "step": 12440 }, { "epoch": 0.6580164371977485, "grad_norm": 94.24537658691406, "learning_rate": 5e-06, "loss": 1.9996, "step": 12450 }, { "epoch": 0.6585449644565419, "grad_norm": 68.45813751220703, "learning_rate": 5e-06, "loss": 2.1452, "step": 12460 }, { "epoch": 0.6590734917153352, "grad_norm": 52.959842681884766, "learning_rate": 5e-06, "loss": 2.0755, "step": 12470 }, { "epoch": 0.6596020189741286, "grad_norm": 57.8477897644043, "learning_rate": 5e-06, "loss": 2.0293, "step": 12480 }, { "epoch": 0.660130546232922, "grad_norm": 56.49106216430664, "learning_rate": 5e-06, "loss": 1.9489, "step": 12490 }, { "epoch": 0.6606590734917154, "grad_norm": 73.39006805419922, "learning_rate": 5e-06, "loss": 2.3248, "step": 12500 }, { "epoch": 0.6611876007505088, "grad_norm": 78.31349182128906, "learning_rate": 5e-06, "loss": 2.3648, "step": 12510 }, { "epoch": 0.6617161280093021, "grad_norm": 33.30984115600586, "learning_rate": 5e-06, "loss": 1.8356, "step": 12520 }, { "epoch": 0.6622446552680955, "grad_norm": 94.69254302978516, "learning_rate": 5e-06, "loss": 1.7052, "step": 12530 }, { "epoch": 0.6627731825268888, "grad_norm": 85.45375061035156, "learning_rate": 5e-06, "loss": 1.7435, "step": 12540 }, { "epoch": 0.6633017097856821, "grad_norm": 49.551856994628906, "learning_rate": 5e-06, "loss": 2.096, "step": 12550 }, { "epoch": 0.6638302370444755, "grad_norm": 84.09873962402344, "learning_rate": 5e-06, "loss": 2.4834, "step": 12560 }, { "epoch": 0.6643587643032689, "grad_norm": 86.67302703857422, "learning_rate": 5e-06, "loss": 1.9981, "step": 12570 }, { "epoch": 0.6648872915620623, "grad_norm": 88.13245391845703, "learning_rate": 5e-06, "loss": 2.0664, "step": 12580 }, { "epoch": 0.6654158188208557, "grad_norm": 155.3566131591797, "learning_rate": 5e-06, "loss": 1.9207, "step": 12590 }, { "epoch": 0.665944346079649, "grad_norm": 69.61137390136719, "learning_rate": 5e-06, "loss": 2.2112, "step": 12600 }, { "epoch": 0.6664728733384424, "grad_norm": 86.81015014648438, "learning_rate": 5e-06, "loss": 1.83, "step": 12610 }, { "epoch": 0.6670014005972358, "grad_norm": 98.53312683105469, "learning_rate": 5e-06, "loss": 2.1127, "step": 12620 }, { "epoch": 0.6675299278560292, "grad_norm": 101.17139434814453, "learning_rate": 5e-06, "loss": 2.3118, "step": 12630 }, { "epoch": 0.6680584551148225, "grad_norm": 110.29513549804688, "learning_rate": 5e-06, "loss": 1.9837, "step": 12640 }, { "epoch": 0.6685869823736159, "grad_norm": 106.20242309570312, "learning_rate": 5e-06, "loss": 2.1302, "step": 12650 }, { "epoch": 0.6691155096324093, "grad_norm": 68.84351348876953, "learning_rate": 5e-06, "loss": 2.1408, "step": 12660 }, { "epoch": 0.6696440368912027, "grad_norm": 94.61917877197266, "learning_rate": 5e-06, "loss": 2.0331, "step": 12670 }, { "epoch": 0.670172564149996, "grad_norm": 67.94039916992188, "learning_rate": 5e-06, "loss": 1.8371, "step": 12680 }, { "epoch": 0.6707010914087894, "grad_norm": 55.5589599609375, "learning_rate": 5e-06, "loss": 1.6359, "step": 12690 }, { "epoch": 0.6712296186675828, "grad_norm": 64.12275695800781, "learning_rate": 5e-06, "loss": 1.7247, "step": 12700 }, { "epoch": 0.6717581459263762, "grad_norm": 111.85751342773438, "learning_rate": 5e-06, "loss": 1.8856, "step": 12710 }, { "epoch": 0.6722866731851695, "grad_norm": 74.51805877685547, "learning_rate": 5e-06, "loss": 1.8371, "step": 12720 }, { "epoch": 0.6728152004439629, "grad_norm": 92.9873275756836, "learning_rate": 5e-06, "loss": 1.7796, "step": 12730 }, { "epoch": 0.6733437277027563, "grad_norm": 81.36759185791016, "learning_rate": 5e-06, "loss": 2.3258, "step": 12740 }, { "epoch": 0.6738722549615497, "grad_norm": 65.11761474609375, "learning_rate": 5e-06, "loss": 1.8546, "step": 12750 }, { "epoch": 0.674400782220343, "grad_norm": 65.88800048828125, "learning_rate": 5e-06, "loss": 1.787, "step": 12760 }, { "epoch": 0.6749293094791364, "grad_norm": 67.13685607910156, "learning_rate": 5e-06, "loss": 1.9841, "step": 12770 }, { "epoch": 0.6754578367379298, "grad_norm": 71.55039978027344, "learning_rate": 5e-06, "loss": 2.0517, "step": 12780 }, { "epoch": 0.6759863639967232, "grad_norm": 63.232608795166016, "learning_rate": 5e-06, "loss": 2.1414, "step": 12790 }, { "epoch": 0.6765148912555164, "grad_norm": 67.03996276855469, "learning_rate": 5e-06, "loss": 2.2079, "step": 12800 }, { "epoch": 0.6770434185143098, "grad_norm": 75.99896240234375, "learning_rate": 5e-06, "loss": 2.2421, "step": 12810 }, { "epoch": 0.6775719457731032, "grad_norm": 89.27505493164062, "learning_rate": 5e-06, "loss": 2.0478, "step": 12820 }, { "epoch": 0.6781004730318966, "grad_norm": 99.16153717041016, "learning_rate": 5e-06, "loss": 2.0051, "step": 12830 }, { "epoch": 0.67862900029069, "grad_norm": 84.13148498535156, "learning_rate": 5e-06, "loss": 1.9491, "step": 12840 }, { "epoch": 0.6791575275494833, "grad_norm": 73.17948150634766, "learning_rate": 5e-06, "loss": 1.7726, "step": 12850 }, { "epoch": 0.6796860548082767, "grad_norm": 75.44953918457031, "learning_rate": 5e-06, "loss": 2.1154, "step": 12860 }, { "epoch": 0.6802145820670701, "grad_norm": 75.23055267333984, "learning_rate": 5e-06, "loss": 2.0071, "step": 12870 }, { "epoch": 0.6807431093258635, "grad_norm": 73.5411148071289, "learning_rate": 5e-06, "loss": 1.8357, "step": 12880 }, { "epoch": 0.6812716365846568, "grad_norm": 80.57621765136719, "learning_rate": 5e-06, "loss": 1.9066, "step": 12890 }, { "epoch": 0.6818001638434502, "grad_norm": 73.24987030029297, "learning_rate": 5e-06, "loss": 2.0694, "step": 12900 }, { "epoch": 0.6823286911022436, "grad_norm": 72.4339828491211, "learning_rate": 5e-06, "loss": 1.7093, "step": 12910 }, { "epoch": 0.682857218361037, "grad_norm": 58.76856231689453, "learning_rate": 5e-06, "loss": 2.1024, "step": 12920 }, { "epoch": 0.6833857456198303, "grad_norm": 71.12699127197266, "learning_rate": 5e-06, "loss": 2.018, "step": 12930 }, { "epoch": 0.6839142728786237, "grad_norm": 86.96455383300781, "learning_rate": 5e-06, "loss": 1.8788, "step": 12940 }, { "epoch": 0.6844428001374171, "grad_norm": 107.40628051757812, "learning_rate": 5e-06, "loss": 2.2499, "step": 12950 }, { "epoch": 0.6849713273962105, "grad_norm": 77.5145263671875, "learning_rate": 5e-06, "loss": 2.0075, "step": 12960 }, { "epoch": 0.6854998546550038, "grad_norm": 56.60562515258789, "learning_rate": 5e-06, "loss": 1.8124, "step": 12970 }, { "epoch": 0.6860283819137972, "grad_norm": 48.75310516357422, "learning_rate": 5e-06, "loss": 1.6558, "step": 12980 }, { "epoch": 0.6865569091725906, "grad_norm": 81.0751724243164, "learning_rate": 5e-06, "loss": 1.9499, "step": 12990 }, { "epoch": 0.687085436431384, "grad_norm": 110.26273345947266, "learning_rate": 5e-06, "loss": 2.1345, "step": 13000 }, { "epoch": 0.687085436431384, "eval_loss": 1.83036208152771, "eval_runtime": 38.8492, "eval_samples_per_second": 314.858, "eval_steps_per_second": 9.859, "eval_sts-dev_pearson_cosine": 0.8441608768617241, "eval_sts-dev_pearson_dot": 0.8192828504678054, "eval_sts-dev_pearson_euclidean": 0.8495391643273076, "eval_sts-dev_pearson_manhattan": 0.8497825696980998, "eval_sts-dev_pearson_max": 0.8497825696980998, "eval_sts-dev_spearman_cosine": 0.8478942055292698, "eval_sts-dev_spearman_dot": 0.816440857848567, "eval_sts-dev_spearman_euclidean": 0.851168441732283, "eval_sts-dev_spearman_manhattan": 0.8514217915217386, "eval_sts-dev_spearman_max": 0.8514217915217386, "step": 13000 }, { "epoch": 0.6876139636901774, "grad_norm": 67.4935531616211, "learning_rate": 5e-06, "loss": 2.2065, "step": 13010 }, { "epoch": 0.6881424909489707, "grad_norm": 56.69532012939453, "learning_rate": 5e-06, "loss": 2.0467, "step": 13020 }, { "epoch": 0.6886710182077641, "grad_norm": 48.872005462646484, "learning_rate": 5e-06, "loss": 2.0892, "step": 13030 }, { "epoch": 0.6891995454665575, "grad_norm": 85.39813232421875, "learning_rate": 5e-06, "loss": 1.6893, "step": 13040 }, { "epoch": 0.6897280727253509, "grad_norm": 72.00165557861328, "learning_rate": 5e-06, "loss": 1.8721, "step": 13050 }, { "epoch": 0.6902565999841442, "grad_norm": 69.91156005859375, "learning_rate": 5e-06, "loss": 2.1926, "step": 13060 }, { "epoch": 0.6907851272429375, "grad_norm": 81.31251525878906, "learning_rate": 5e-06, "loss": 1.7002, "step": 13070 }, { "epoch": 0.6913136545017309, "grad_norm": 72.94538879394531, "learning_rate": 5e-06, "loss": 2.2163, "step": 13080 }, { "epoch": 0.6918421817605243, "grad_norm": 89.55048370361328, "learning_rate": 5e-06, "loss": 1.8287, "step": 13090 }, { "epoch": 0.6923707090193176, "grad_norm": 75.39757537841797, "learning_rate": 5e-06, "loss": 2.4078, "step": 13100 }, { "epoch": 0.692899236278111, "grad_norm": 62.43842697143555, "learning_rate": 5e-06, "loss": 1.9527, "step": 13110 }, { "epoch": 0.6934277635369044, "grad_norm": 46.58412170410156, "learning_rate": 5e-06, "loss": 1.7032, "step": 13120 }, { "epoch": 0.6939562907956978, "grad_norm": 82.52165985107422, "learning_rate": 5e-06, "loss": 2.0739, "step": 13130 }, { "epoch": 0.6944848180544911, "grad_norm": 66.78712463378906, "learning_rate": 5e-06, "loss": 1.6657, "step": 13140 }, { "epoch": 0.6950133453132845, "grad_norm": 84.80516815185547, "learning_rate": 5e-06, "loss": 1.8662, "step": 13150 }, { "epoch": 0.6955418725720779, "grad_norm": 75.5099868774414, "learning_rate": 5e-06, "loss": 1.9048, "step": 13160 }, { "epoch": 0.6960703998308713, "grad_norm": 72.38082885742188, "learning_rate": 5e-06, "loss": 2.0232, "step": 13170 }, { "epoch": 0.6965989270896646, "grad_norm": 90.71684265136719, "learning_rate": 5e-06, "loss": 2.2416, "step": 13180 }, { "epoch": 0.697127454348458, "grad_norm": 52.98802947998047, "learning_rate": 5e-06, "loss": 1.9242, "step": 13190 }, { "epoch": 0.6976559816072514, "grad_norm": 61.31077194213867, "learning_rate": 5e-06, "loss": 1.9015, "step": 13200 }, { "epoch": 0.6981845088660448, "grad_norm": 78.02396392822266, "learning_rate": 5e-06, "loss": 1.8781, "step": 13210 }, { "epoch": 0.6987130361248381, "grad_norm": 70.98793029785156, "learning_rate": 5e-06, "loss": 2.3833, "step": 13220 }, { "epoch": 0.6992415633836315, "grad_norm": 114.75908660888672, "learning_rate": 5e-06, "loss": 1.979, "step": 13230 }, { "epoch": 0.6997700906424249, "grad_norm": 85.00691986083984, "learning_rate": 5e-06, "loss": 1.7518, "step": 13240 }, { "epoch": 0.7002986179012183, "grad_norm": 105.02362060546875, "learning_rate": 5e-06, "loss": 1.7496, "step": 13250 }, { "epoch": 0.7008271451600117, "grad_norm": 91.41190338134766, "learning_rate": 5e-06, "loss": 2.003, "step": 13260 }, { "epoch": 0.701355672418805, "grad_norm": 62.90702819824219, "learning_rate": 5e-06, "loss": 2.3034, "step": 13270 }, { "epoch": 0.7018841996775984, "grad_norm": 79.90744018554688, "learning_rate": 5e-06, "loss": 2.1549, "step": 13280 }, { "epoch": 0.7024127269363918, "grad_norm": 92.8213882446289, "learning_rate": 5e-06, "loss": 2.1288, "step": 13290 }, { "epoch": 0.7029412541951852, "grad_norm": 83.79239654541016, "learning_rate": 5e-06, "loss": 1.9741, "step": 13300 }, { "epoch": 0.7034697814539785, "grad_norm": 89.50908660888672, "learning_rate": 5e-06, "loss": 2.0876, "step": 13310 }, { "epoch": 0.7039983087127719, "grad_norm": 66.40426635742188, "learning_rate": 5e-06, "loss": 2.1894, "step": 13320 }, { "epoch": 0.7045268359715652, "grad_norm": 81.602783203125, "learning_rate": 5e-06, "loss": 2.2875, "step": 13330 }, { "epoch": 0.7050553632303586, "grad_norm": 63.79530334472656, "learning_rate": 5e-06, "loss": 1.6306, "step": 13340 }, { "epoch": 0.7055838904891519, "grad_norm": 61.07679748535156, "learning_rate": 5e-06, "loss": 1.9152, "step": 13350 }, { "epoch": 0.7061124177479453, "grad_norm": 73.37248992919922, "learning_rate": 5e-06, "loss": 2.031, "step": 13360 }, { "epoch": 0.7066409450067387, "grad_norm": 69.66431427001953, "learning_rate": 5e-06, "loss": 1.7202, "step": 13370 }, { "epoch": 0.7071694722655321, "grad_norm": 52.90100860595703, "learning_rate": 5e-06, "loss": 1.8831, "step": 13380 }, { "epoch": 0.7076979995243254, "grad_norm": 53.96466064453125, "learning_rate": 5e-06, "loss": 1.8461, "step": 13390 }, { "epoch": 0.7082265267831188, "grad_norm": 95.98743438720703, "learning_rate": 5e-06, "loss": 1.9417, "step": 13400 }, { "epoch": 0.7087550540419122, "grad_norm": 84.02827453613281, "learning_rate": 5e-06, "loss": 1.8882, "step": 13410 }, { "epoch": 0.7092835813007056, "grad_norm": 61.968711853027344, "learning_rate": 5e-06, "loss": 1.9229, "step": 13420 }, { "epoch": 0.7098121085594989, "grad_norm": 65.08840942382812, "learning_rate": 5e-06, "loss": 1.6988, "step": 13430 }, { "epoch": 0.7103406358182923, "grad_norm": 85.03690338134766, "learning_rate": 5e-06, "loss": 2.102, "step": 13440 }, { "epoch": 0.7108691630770857, "grad_norm": 108.20443725585938, "learning_rate": 5e-06, "loss": 1.959, "step": 13450 }, { "epoch": 0.7113976903358791, "grad_norm": 81.24241638183594, "learning_rate": 5e-06, "loss": 1.9317, "step": 13460 }, { "epoch": 0.7119262175946725, "grad_norm": 74.01918029785156, "learning_rate": 5e-06, "loss": 2.023, "step": 13470 }, { "epoch": 0.7124547448534658, "grad_norm": 54.00932693481445, "learning_rate": 5e-06, "loss": 1.6614, "step": 13480 }, { "epoch": 0.7129832721122592, "grad_norm": 85.21739196777344, "learning_rate": 5e-06, "loss": 1.9577, "step": 13490 }, { "epoch": 0.7135117993710526, "grad_norm": 64.0634994506836, "learning_rate": 5e-06, "loss": 1.6266, "step": 13500 }, { "epoch": 0.714040326629846, "grad_norm": 59.827537536621094, "learning_rate": 5e-06, "loss": 1.9946, "step": 13510 }, { "epoch": 0.7145688538886393, "grad_norm": 125.35968780517578, "learning_rate": 5e-06, "loss": 2.1656, "step": 13520 }, { "epoch": 0.7150973811474327, "grad_norm": 82.6695556640625, "learning_rate": 5e-06, "loss": 1.8144, "step": 13530 }, { "epoch": 0.7156259084062261, "grad_norm": 90.18817138671875, "learning_rate": 5e-06, "loss": 2.5021, "step": 13540 }, { "epoch": 0.7161544356650195, "grad_norm": 89.6347885131836, "learning_rate": 5e-06, "loss": 2.2266, "step": 13550 }, { "epoch": 0.7166829629238128, "grad_norm": 104.1307601928711, "learning_rate": 5e-06, "loss": 2.1829, "step": 13560 }, { "epoch": 0.7172114901826062, "grad_norm": 54.60108947753906, "learning_rate": 5e-06, "loss": 1.9261, "step": 13570 }, { "epoch": 0.7177400174413996, "grad_norm": 86.93578338623047, "learning_rate": 5e-06, "loss": 2.3309, "step": 13580 }, { "epoch": 0.718268544700193, "grad_norm": 72.26850891113281, "learning_rate": 5e-06, "loss": 1.9284, "step": 13590 }, { "epoch": 0.7187970719589862, "grad_norm": 50.57677459716797, "learning_rate": 5e-06, "loss": 1.9484, "step": 13600 }, { "epoch": 0.7193255992177796, "grad_norm": 91.233642578125, "learning_rate": 5e-06, "loss": 1.8471, "step": 13610 }, { "epoch": 0.719854126476573, "grad_norm": 61.453369140625, "learning_rate": 5e-06, "loss": 2.0326, "step": 13620 }, { "epoch": 0.7203826537353664, "grad_norm": 60.43076705932617, "learning_rate": 5e-06, "loss": 2.0909, "step": 13630 }, { "epoch": 0.7209111809941597, "grad_norm": 61.92556381225586, "learning_rate": 5e-06, "loss": 1.9077, "step": 13640 }, { "epoch": 0.7214397082529531, "grad_norm": 75.29458618164062, "learning_rate": 5e-06, "loss": 2.1054, "step": 13650 }, { "epoch": 0.7219682355117465, "grad_norm": 58.09067916870117, "learning_rate": 5e-06, "loss": 1.8671, "step": 13660 }, { "epoch": 0.7224967627705399, "grad_norm": 59.3167724609375, "learning_rate": 5e-06, "loss": 1.6785, "step": 13670 }, { "epoch": 0.7230252900293332, "grad_norm": 96.232421875, "learning_rate": 5e-06, "loss": 2.0017, "step": 13680 }, { "epoch": 0.7235538172881266, "grad_norm": 72.16514587402344, "learning_rate": 5e-06, "loss": 1.7687, "step": 13690 }, { "epoch": 0.72408234454692, "grad_norm": 90.83307647705078, "learning_rate": 5e-06, "loss": 2.194, "step": 13700 }, { "epoch": 0.7246108718057134, "grad_norm": 75.21852111816406, "learning_rate": 5e-06, "loss": 2.2445, "step": 13710 }, { "epoch": 0.7251393990645068, "grad_norm": 69.645751953125, "learning_rate": 5e-06, "loss": 1.842, "step": 13720 }, { "epoch": 0.7256679263233001, "grad_norm": 95.62348937988281, "learning_rate": 5e-06, "loss": 2.0074, "step": 13730 }, { "epoch": 0.7261964535820935, "grad_norm": 61.0582275390625, "learning_rate": 5e-06, "loss": 2.0612, "step": 13740 }, { "epoch": 0.7267249808408869, "grad_norm": 76.27189636230469, "learning_rate": 5e-06, "loss": 2.1411, "step": 13750 }, { "epoch": 0.7272535080996803, "grad_norm": 91.54174041748047, "learning_rate": 5e-06, "loss": 2.1116, "step": 13760 }, { "epoch": 0.7277820353584736, "grad_norm": 66.5391616821289, "learning_rate": 5e-06, "loss": 1.977, "step": 13770 }, { "epoch": 0.728310562617267, "grad_norm": 68.07477569580078, "learning_rate": 5e-06, "loss": 1.8613, "step": 13780 }, { "epoch": 0.7288390898760604, "grad_norm": 84.36400604248047, "learning_rate": 5e-06, "loss": 2.0578, "step": 13790 }, { "epoch": 0.7293676171348538, "grad_norm": 162.1019744873047, "learning_rate": 5e-06, "loss": 1.9875, "step": 13800 }, { "epoch": 0.7298961443936471, "grad_norm": 69.1906509399414, "learning_rate": 5e-06, "loss": 1.8705, "step": 13810 }, { "epoch": 0.7304246716524405, "grad_norm": 79.55948638916016, "learning_rate": 5e-06, "loss": 2.1817, "step": 13820 }, { "epoch": 0.7309531989112339, "grad_norm": 85.5940933227539, "learning_rate": 5e-06, "loss": 2.0038, "step": 13830 }, { "epoch": 0.7314817261700273, "grad_norm": 73.04915618896484, "learning_rate": 5e-06, "loss": 1.5057, "step": 13840 }, { "epoch": 0.7320102534288206, "grad_norm": 48.61487579345703, "learning_rate": 5e-06, "loss": 1.7593, "step": 13850 }, { "epoch": 0.7325387806876139, "grad_norm": 72.5323257446289, "learning_rate": 5e-06, "loss": 2.0221, "step": 13860 }, { "epoch": 0.7330673079464073, "grad_norm": 95.33984375, "learning_rate": 5e-06, "loss": 2.3687, "step": 13870 }, { "epoch": 0.7335958352052007, "grad_norm": 77.47138214111328, "learning_rate": 5e-06, "loss": 2.0598, "step": 13880 }, { "epoch": 0.734124362463994, "grad_norm": 85.97520446777344, "learning_rate": 5e-06, "loss": 2.0212, "step": 13890 }, { "epoch": 0.7346528897227874, "grad_norm": 101.71643829345703, "learning_rate": 5e-06, "loss": 2.2552, "step": 13900 }, { "epoch": 0.7351814169815808, "grad_norm": 81.4498062133789, "learning_rate": 5e-06, "loss": 2.2452, "step": 13910 }, { "epoch": 0.7357099442403742, "grad_norm": 54.104736328125, "learning_rate": 5e-06, "loss": 1.7635, "step": 13920 }, { "epoch": 0.7362384714991675, "grad_norm": 86.19744110107422, "learning_rate": 5e-06, "loss": 1.979, "step": 13930 }, { "epoch": 0.7367669987579609, "grad_norm": 81.58574676513672, "learning_rate": 5e-06, "loss": 1.9275, "step": 13940 }, { "epoch": 0.7372955260167543, "grad_norm": 102.15382385253906, "learning_rate": 5e-06, "loss": 1.8365, "step": 13950 }, { "epoch": 0.7378240532755477, "grad_norm": 75.61170959472656, "learning_rate": 5e-06, "loss": 2.0113, "step": 13960 }, { "epoch": 0.738352580534341, "grad_norm": 56.20273971557617, "learning_rate": 5e-06, "loss": 1.7207, "step": 13970 }, { "epoch": 0.7388811077931344, "grad_norm": 70.11517333984375, "learning_rate": 5e-06, "loss": 2.1969, "step": 13980 }, { "epoch": 0.7394096350519278, "grad_norm": 63.60990905761719, "learning_rate": 5e-06, "loss": 2.1661, "step": 13990 }, { "epoch": 0.7399381623107212, "grad_norm": 71.17716217041016, "learning_rate": 5e-06, "loss": 1.9854, "step": 14000 }, { "epoch": 0.7399381623107212, "eval_loss": 1.797134280204773, "eval_runtime": 40.8531, "eval_samples_per_second": 299.414, "eval_steps_per_second": 9.375, "eval_sts-dev_pearson_cosine": 0.8494673613127581, "eval_sts-dev_pearson_dot": 0.8284773726948824, "eval_sts-dev_pearson_euclidean": 0.8536219363134325, "eval_sts-dev_pearson_manhattan": 0.8534310692889782, "eval_sts-dev_pearson_max": 0.8536219363134325, "eval_sts-dev_spearman_cosine": 0.8534743367310396, "eval_sts-dev_spearman_dot": 0.8266306616098604, "eval_sts-dev_spearman_euclidean": 0.8547325377807958, "eval_sts-dev_spearman_manhattan": 0.854313263763251, "eval_sts-dev_spearman_max": 0.8547325377807958, "step": 14000 }, { "epoch": 0.7404666895695146, "grad_norm": 84.50469970703125, "learning_rate": 5e-06, "loss": 2.1683, "step": 14010 }, { "epoch": 0.7409952168283079, "grad_norm": 72.12297821044922, "learning_rate": 5e-06, "loss": 1.7725, "step": 14020 }, { "epoch": 0.7415237440871013, "grad_norm": 68.54808044433594, "learning_rate": 5e-06, "loss": 1.8333, "step": 14030 }, { "epoch": 0.7420522713458947, "grad_norm": 77.23931884765625, "learning_rate": 5e-06, "loss": 1.6342, "step": 14040 }, { "epoch": 0.7425807986046881, "grad_norm": 86.75444030761719, "learning_rate": 5e-06, "loss": 1.6626, "step": 14050 }, { "epoch": 0.7431093258634814, "grad_norm": 74.99226379394531, "learning_rate": 5e-06, "loss": 2.1473, "step": 14060 }, { "epoch": 0.7436378531222748, "grad_norm": 73.15399169921875, "learning_rate": 5e-06, "loss": 1.7319, "step": 14070 }, { "epoch": 0.7441663803810682, "grad_norm": 60.271080017089844, "learning_rate": 5e-06, "loss": 1.8876, "step": 14080 }, { "epoch": 0.7446949076398616, "grad_norm": 76.52225494384766, "learning_rate": 5e-06, "loss": 1.8277, "step": 14090 }, { "epoch": 0.745223434898655, "grad_norm": 77.07417297363281, "learning_rate": 5e-06, "loss": 1.8462, "step": 14100 }, { "epoch": 0.7457519621574483, "grad_norm": 71.2138900756836, "learning_rate": 5e-06, "loss": 1.8455, "step": 14110 }, { "epoch": 0.7462804894162416, "grad_norm": 83.97593688964844, "learning_rate": 5e-06, "loss": 2.2302, "step": 14120 }, { "epoch": 0.746809016675035, "grad_norm": 74.93870544433594, "learning_rate": 5e-06, "loss": 2.2887, "step": 14130 }, { "epoch": 0.7473375439338283, "grad_norm": 93.32628631591797, "learning_rate": 5e-06, "loss": 2.0642, "step": 14140 }, { "epoch": 0.7478660711926217, "grad_norm": 83.35697937011719, "learning_rate": 5e-06, "loss": 1.9294, "step": 14150 }, { "epoch": 0.7483945984514151, "grad_norm": 102.67933654785156, "learning_rate": 5e-06, "loss": 1.6868, "step": 14160 }, { "epoch": 0.7489231257102085, "grad_norm": 54.10956573486328, "learning_rate": 5e-06, "loss": 2.1736, "step": 14170 }, { "epoch": 0.7494516529690018, "grad_norm": 94.8926010131836, "learning_rate": 5e-06, "loss": 2.1815, "step": 14180 }, { "epoch": 0.7499801802277952, "grad_norm": 49.98324966430664, "learning_rate": 5e-06, "loss": 1.9743, "step": 14190 }, { "epoch": 0.7505087074865886, "grad_norm": 68.60459899902344, "learning_rate": 5e-06, "loss": 1.7082, "step": 14200 }, { "epoch": 0.751037234745382, "grad_norm": 74.58995819091797, "learning_rate": 5e-06, "loss": 2.1353, "step": 14210 }, { "epoch": 0.7515657620041754, "grad_norm": 63.263919830322266, "learning_rate": 5e-06, "loss": 1.7371, "step": 14220 }, { "epoch": 0.7520942892629687, "grad_norm": 111.81888580322266, "learning_rate": 5e-06, "loss": 2.1287, "step": 14230 }, { "epoch": 0.7526228165217621, "grad_norm": 80.28509521484375, "learning_rate": 5e-06, "loss": 1.5459, "step": 14240 }, { "epoch": 0.7531513437805555, "grad_norm": 80.69876098632812, "learning_rate": 5e-06, "loss": 1.482, "step": 14250 }, { "epoch": 0.7536798710393489, "grad_norm": 163.49742126464844, "learning_rate": 5e-06, "loss": 1.8119, "step": 14260 }, { "epoch": 0.7542083982981422, "grad_norm": 41.70513153076172, "learning_rate": 5e-06, "loss": 1.5714, "step": 14270 }, { "epoch": 0.7547369255569356, "grad_norm": 53.4793701171875, "learning_rate": 5e-06, "loss": 2.2632, "step": 14280 }, { "epoch": 0.755265452815729, "grad_norm": 57.4197998046875, "learning_rate": 5e-06, "loss": 1.969, "step": 14290 }, { "epoch": 0.7557939800745224, "grad_norm": 84.42742919921875, "learning_rate": 5e-06, "loss": 2.0387, "step": 14300 }, { "epoch": 0.7563225073333157, "grad_norm": 162.214111328125, "learning_rate": 5e-06, "loss": 2.1817, "step": 14310 }, { "epoch": 0.7568510345921091, "grad_norm": 82.27605438232422, "learning_rate": 5e-06, "loss": 2.1087, "step": 14320 }, { "epoch": 0.7573795618509025, "grad_norm": 74.5183334350586, "learning_rate": 5e-06, "loss": 2.3147, "step": 14330 }, { "epoch": 0.7579080891096959, "grad_norm": 67.3210220336914, "learning_rate": 5e-06, "loss": 2.3387, "step": 14340 }, { "epoch": 0.7584366163684892, "grad_norm": 62.653438568115234, "learning_rate": 5e-06, "loss": 1.9135, "step": 14350 }, { "epoch": 0.7589651436272826, "grad_norm": 79.77814483642578, "learning_rate": 5e-06, "loss": 1.9133, "step": 14360 }, { "epoch": 0.759493670886076, "grad_norm": 70.50086975097656, "learning_rate": 5e-06, "loss": 1.885, "step": 14370 }, { "epoch": 0.7600221981448694, "grad_norm": 87.52069854736328, "learning_rate": 5e-06, "loss": 1.8155, "step": 14380 }, { "epoch": 0.7605507254036626, "grad_norm": 77.37886047363281, "learning_rate": 5e-06, "loss": 1.9801, "step": 14390 }, { "epoch": 0.761079252662456, "grad_norm": 121.9998550415039, "learning_rate": 5e-06, "loss": 1.9567, "step": 14400 }, { "epoch": 0.7616077799212494, "grad_norm": 78.34786987304688, "learning_rate": 5e-06, "loss": 1.8329, "step": 14410 }, { "epoch": 0.7621363071800428, "grad_norm": 66.50887298583984, "learning_rate": 5e-06, "loss": 1.9587, "step": 14420 }, { "epoch": 0.7626648344388361, "grad_norm": 34.712345123291016, "learning_rate": 5e-06, "loss": 1.7114, "step": 14430 }, { "epoch": 0.7631933616976295, "grad_norm": 69.65988159179688, "learning_rate": 5e-06, "loss": 1.9032, "step": 14440 }, { "epoch": 0.7637218889564229, "grad_norm": 50.179683685302734, "learning_rate": 5e-06, "loss": 1.9712, "step": 14450 }, { "epoch": 0.7642504162152163, "grad_norm": 55.41749954223633, "learning_rate": 5e-06, "loss": 1.7924, "step": 14460 }, { "epoch": 0.7647789434740097, "grad_norm": 71.23522186279297, "learning_rate": 5e-06, "loss": 1.922, "step": 14470 }, { "epoch": 0.765307470732803, "grad_norm": 62.98783493041992, "learning_rate": 5e-06, "loss": 2.0401, "step": 14480 }, { "epoch": 0.7658359979915964, "grad_norm": 113.51453399658203, "learning_rate": 5e-06, "loss": 2.019, "step": 14490 }, { "epoch": 0.7663645252503898, "grad_norm": 71.70709991455078, "learning_rate": 5e-06, "loss": 1.871, "step": 14500 }, { "epoch": 0.7668930525091832, "grad_norm": 70.15160369873047, "learning_rate": 5e-06, "loss": 1.6628, "step": 14510 }, { "epoch": 0.7674215797679765, "grad_norm": 66.19117736816406, "learning_rate": 5e-06, "loss": 2.1395, "step": 14520 }, { "epoch": 0.7679501070267699, "grad_norm": 76.49700927734375, "learning_rate": 5e-06, "loss": 2.0559, "step": 14530 }, { "epoch": 0.7684786342855633, "grad_norm": 57.30728530883789, "learning_rate": 5e-06, "loss": 1.7492, "step": 14540 }, { "epoch": 0.7690071615443567, "grad_norm": 83.58246612548828, "learning_rate": 5e-06, "loss": 1.8538, "step": 14550 }, { "epoch": 0.76953568880315, "grad_norm": 77.94324493408203, "learning_rate": 5e-06, "loss": 2.0276, "step": 14560 }, { "epoch": 0.7700642160619434, "grad_norm": 56.17070007324219, "learning_rate": 5e-06, "loss": 1.7587, "step": 14570 }, { "epoch": 0.7705927433207368, "grad_norm": 83.4946517944336, "learning_rate": 5e-06, "loss": 1.9258, "step": 14580 }, { "epoch": 0.7711212705795302, "grad_norm": 62.06380844116211, "learning_rate": 5e-06, "loss": 2.1503, "step": 14590 }, { "epoch": 0.7716497978383235, "grad_norm": 58.03206253051758, "learning_rate": 5e-06, "loss": 2.0958, "step": 14600 }, { "epoch": 0.7721783250971169, "grad_norm": 62.890228271484375, "learning_rate": 5e-06, "loss": 1.852, "step": 14610 }, { "epoch": 0.7727068523559103, "grad_norm": 47.618858337402344, "learning_rate": 5e-06, "loss": 1.9172, "step": 14620 }, { "epoch": 0.7732353796147037, "grad_norm": 81.466064453125, "learning_rate": 5e-06, "loss": 1.966, "step": 14630 }, { "epoch": 0.773763906873497, "grad_norm": 72.4347915649414, "learning_rate": 5e-06, "loss": 1.5633, "step": 14640 }, { "epoch": 0.7742924341322903, "grad_norm": 76.28731536865234, "learning_rate": 5e-06, "loss": 1.7901, "step": 14650 }, { "epoch": 0.7748209613910837, "grad_norm": 58.220149993896484, "learning_rate": 5e-06, "loss": 2.0186, "step": 14660 }, { "epoch": 0.7753494886498771, "grad_norm": 66.88105773925781, "learning_rate": 5e-06, "loss": 1.6695, "step": 14670 }, { "epoch": 0.7758780159086704, "grad_norm": 77.73107147216797, "learning_rate": 5e-06, "loss": 2.2001, "step": 14680 }, { "epoch": 0.7764065431674638, "grad_norm": 76.59333038330078, "learning_rate": 5e-06, "loss": 1.9772, "step": 14690 }, { "epoch": 0.7769350704262572, "grad_norm": 70.18540954589844, "learning_rate": 5e-06, "loss": 1.7697, "step": 14700 }, { "epoch": 0.7774635976850506, "grad_norm": 53.45761489868164, "learning_rate": 5e-06, "loss": 1.6297, "step": 14710 }, { "epoch": 0.777992124943844, "grad_norm": 62.28361892700195, "learning_rate": 5e-06, "loss": 1.7892, "step": 14720 }, { "epoch": 0.7785206522026373, "grad_norm": 73.50800323486328, "learning_rate": 5e-06, "loss": 1.5018, "step": 14730 }, { "epoch": 0.7790491794614307, "grad_norm": 65.53732299804688, "learning_rate": 5e-06, "loss": 2.0884, "step": 14740 }, { "epoch": 0.7795777067202241, "grad_norm": 53.26022720336914, "learning_rate": 5e-06, "loss": 1.7974, "step": 14750 }, { "epoch": 0.7801062339790175, "grad_norm": 79.38594818115234, "learning_rate": 5e-06, "loss": 2.1531, "step": 14760 }, { "epoch": 0.7806347612378108, "grad_norm": 59.26079177856445, "learning_rate": 5e-06, "loss": 2.019, "step": 14770 }, { "epoch": 0.7811632884966042, "grad_norm": 65.92668914794922, "learning_rate": 5e-06, "loss": 1.7616, "step": 14780 }, { "epoch": 0.7816918157553976, "grad_norm": 48.678462982177734, "learning_rate": 5e-06, "loss": 1.8626, "step": 14790 }, { "epoch": 0.782220343014191, "grad_norm": 62.52599334716797, "learning_rate": 5e-06, "loss": 2.2728, "step": 14800 }, { "epoch": 0.7827488702729843, "grad_norm": 104.0319595336914, "learning_rate": 5e-06, "loss": 2.1229, "step": 14810 }, { "epoch": 0.7832773975317777, "grad_norm": 57.97853469848633, "learning_rate": 5e-06, "loss": 1.7864, "step": 14820 }, { "epoch": 0.7838059247905711, "grad_norm": 71.96578979492188, "learning_rate": 5e-06, "loss": 1.7094, "step": 14830 }, { "epoch": 0.7843344520493645, "grad_norm": 160.54147338867188, "learning_rate": 5e-06, "loss": 1.7426, "step": 14840 }, { "epoch": 0.7848629793081578, "grad_norm": 71.55754089355469, "learning_rate": 5e-06, "loss": 2.1278, "step": 14850 }, { "epoch": 0.7853915065669512, "grad_norm": 61.13692092895508, "learning_rate": 5e-06, "loss": 2.2591, "step": 14860 }, { "epoch": 0.7859200338257446, "grad_norm": 69.27803802490234, "learning_rate": 5e-06, "loss": 1.9518, "step": 14870 }, { "epoch": 0.786448561084538, "grad_norm": 74.49102020263672, "learning_rate": 5e-06, "loss": 2.1244, "step": 14880 }, { "epoch": 0.7869770883433314, "grad_norm": 59.75688171386719, "learning_rate": 5e-06, "loss": 1.9967, "step": 14890 }, { "epoch": 0.7875056156021247, "grad_norm": 64.8705062866211, "learning_rate": 5e-06, "loss": 1.689, "step": 14900 }, { "epoch": 0.788034142860918, "grad_norm": 82.1138687133789, "learning_rate": 5e-06, "loss": 2.1413, "step": 14910 }, { "epoch": 0.7885626701197114, "grad_norm": 57.67179489135742, "learning_rate": 5e-06, "loss": 1.7533, "step": 14920 }, { "epoch": 0.7890911973785047, "grad_norm": 77.77244567871094, "learning_rate": 5e-06, "loss": 2.0678, "step": 14930 }, { "epoch": 0.7896197246372981, "grad_norm": 62.85699462890625, "learning_rate": 5e-06, "loss": 1.9112, "step": 14940 }, { "epoch": 0.7901482518960915, "grad_norm": 59.97759246826172, "learning_rate": 5e-06, "loss": 1.9306, "step": 14950 }, { "epoch": 0.7906767791548849, "grad_norm": 71.47175598144531, "learning_rate": 5e-06, "loss": 1.6499, "step": 14960 }, { "epoch": 0.7912053064136783, "grad_norm": 80.95116424560547, "learning_rate": 5e-06, "loss": 2.0537, "step": 14970 }, { "epoch": 0.7917338336724716, "grad_norm": 49.25926971435547, "learning_rate": 5e-06, "loss": 2.3653, "step": 14980 }, { "epoch": 0.792262360931265, "grad_norm": 93.49542999267578, "learning_rate": 5e-06, "loss": 2.076, "step": 14990 }, { "epoch": 0.7927908881900584, "grad_norm": 76.29934692382812, "learning_rate": 5e-06, "loss": 1.7841, "step": 15000 }, { "epoch": 0.7927908881900584, "eval_loss": 1.7890739440917969, "eval_runtime": 45.0525, "eval_samples_per_second": 271.506, "eval_steps_per_second": 8.501, "eval_sts-dev_pearson_cosine": 0.8478125672537944, "eval_sts-dev_pearson_dot": 0.8257023759721457, "eval_sts-dev_pearson_euclidean": 0.85056724492807, "eval_sts-dev_pearson_manhattan": 0.8501588923645502, "eval_sts-dev_pearson_max": 0.85056724492807, "eval_sts-dev_spearman_cosine": 0.851403843428005, "eval_sts-dev_spearman_dot": 0.8232889550344444, "eval_sts-dev_spearman_euclidean": 0.8516004041464188, "eval_sts-dev_spearman_manhattan": 0.8509735392154489, "eval_sts-dev_spearman_max": 0.8516004041464188, "step": 15000 }, { "epoch": 0.7933194154488518, "grad_norm": 83.32725524902344, "learning_rate": 5e-06, "loss": 1.9321, "step": 15010 }, { "epoch": 0.7938479427076451, "grad_norm": 84.81912994384766, "learning_rate": 5e-06, "loss": 1.8186, "step": 15020 }, { "epoch": 0.7943764699664385, "grad_norm": 65.5103759765625, "learning_rate": 5e-06, "loss": 1.8876, "step": 15030 }, { "epoch": 0.7949049972252319, "grad_norm": 91.05558013916016, "learning_rate": 5e-06, "loss": 1.9201, "step": 15040 }, { "epoch": 0.7954335244840253, "grad_norm": 54.298744201660156, "learning_rate": 5e-06, "loss": 1.7631, "step": 15050 }, { "epoch": 0.7959620517428186, "grad_norm": 74.23414611816406, "learning_rate": 5e-06, "loss": 1.8544, "step": 15060 }, { "epoch": 0.796490579001612, "grad_norm": 73.18302917480469, "learning_rate": 5e-06, "loss": 1.6339, "step": 15070 }, { "epoch": 0.7970191062604054, "grad_norm": 69.7076416015625, "learning_rate": 5e-06, "loss": 2.2041, "step": 15080 }, { "epoch": 0.7975476335191988, "grad_norm": 66.45732879638672, "learning_rate": 5e-06, "loss": 1.7937, "step": 15090 }, { "epoch": 0.7980761607779921, "grad_norm": 66.5468521118164, "learning_rate": 5e-06, "loss": 1.5816, "step": 15100 }, { "epoch": 0.7986046880367855, "grad_norm": 171.53314208984375, "learning_rate": 5e-06, "loss": 2.1176, "step": 15110 }, { "epoch": 0.7991332152955789, "grad_norm": 37.3952522277832, "learning_rate": 5e-06, "loss": 2.0837, "step": 15120 }, { "epoch": 0.7996617425543723, "grad_norm": 54.1025276184082, "learning_rate": 5e-06, "loss": 1.831, "step": 15130 }, { "epoch": 0.8001902698131657, "grad_norm": 72.2220230102539, "learning_rate": 5e-06, "loss": 2.0015, "step": 15140 }, { "epoch": 0.800718797071959, "grad_norm": 55.20166015625, "learning_rate": 5e-06, "loss": 1.7092, "step": 15150 }, { "epoch": 0.8012473243307524, "grad_norm": 87.35902404785156, "learning_rate": 5e-06, "loss": 2.1795, "step": 15160 }, { "epoch": 0.8017758515895458, "grad_norm": 83.80599212646484, "learning_rate": 5e-06, "loss": 1.8364, "step": 15170 }, { "epoch": 0.802304378848339, "grad_norm": 96.60919189453125, "learning_rate": 5e-06, "loss": 1.8268, "step": 15180 }, { "epoch": 0.8028329061071324, "grad_norm": 77.40782165527344, "learning_rate": 5e-06, "loss": 1.7613, "step": 15190 }, { "epoch": 0.8033614333659258, "grad_norm": 67.30529022216797, "learning_rate": 5e-06, "loss": 1.9624, "step": 15200 }, { "epoch": 0.8038899606247192, "grad_norm": 43.36810302734375, "learning_rate": 5e-06, "loss": 2.2151, "step": 15210 }, { "epoch": 0.8044184878835126, "grad_norm": 66.91053009033203, "learning_rate": 5e-06, "loss": 1.7904, "step": 15220 }, { "epoch": 0.8049470151423059, "grad_norm": 68.7862777709961, "learning_rate": 5e-06, "loss": 1.7482, "step": 15230 }, { "epoch": 0.8054755424010993, "grad_norm": 110.2019271850586, "learning_rate": 5e-06, "loss": 1.7255, "step": 15240 }, { "epoch": 0.8060040696598927, "grad_norm": 76.73047637939453, "learning_rate": 5e-06, "loss": 1.9899, "step": 15250 }, { "epoch": 0.8065325969186861, "grad_norm": 102.4013442993164, "learning_rate": 5e-06, "loss": 1.9251, "step": 15260 }, { "epoch": 0.8070611241774794, "grad_norm": 73.10269165039062, "learning_rate": 5e-06, "loss": 1.8913, "step": 15270 }, { "epoch": 0.8075896514362728, "grad_norm": 53.11415481567383, "learning_rate": 5e-06, "loss": 1.7971, "step": 15280 }, { "epoch": 0.8081181786950662, "grad_norm": 102.80229187011719, "learning_rate": 5e-06, "loss": 1.7314, "step": 15290 }, { "epoch": 0.8086467059538596, "grad_norm": 58.69233703613281, "learning_rate": 5e-06, "loss": 2.124, "step": 15300 }, { "epoch": 0.809175233212653, "grad_norm": 72.30756378173828, "learning_rate": 5e-06, "loss": 2.2308, "step": 15310 }, { "epoch": 0.8097037604714463, "grad_norm": 80.77075958251953, "learning_rate": 5e-06, "loss": 2.226, "step": 15320 }, { "epoch": 0.8102322877302397, "grad_norm": 76.4244155883789, "learning_rate": 5e-06, "loss": 1.8701, "step": 15330 }, { "epoch": 0.8107608149890331, "grad_norm": 73.40149688720703, "learning_rate": 5e-06, "loss": 1.8641, "step": 15340 }, { "epoch": 0.8112893422478265, "grad_norm": 64.20404052734375, "learning_rate": 5e-06, "loss": 1.719, "step": 15350 }, { "epoch": 0.8118178695066198, "grad_norm": 87.33538055419922, "learning_rate": 5e-06, "loss": 2.0002, "step": 15360 }, { "epoch": 0.8123463967654132, "grad_norm": 136.60867309570312, "learning_rate": 5e-06, "loss": 2.2023, "step": 15370 }, { "epoch": 0.8128749240242066, "grad_norm": 78.46726989746094, "learning_rate": 5e-06, "loss": 2.0849, "step": 15380 }, { "epoch": 0.813403451283, "grad_norm": 61.7611083984375, "learning_rate": 5e-06, "loss": 2.0228, "step": 15390 }, { "epoch": 0.8139319785417933, "grad_norm": 70.33870697021484, "learning_rate": 5e-06, "loss": 1.8715, "step": 15400 }, { "epoch": 0.8144605058005867, "grad_norm": 94.92715454101562, "learning_rate": 5e-06, "loss": 1.9315, "step": 15410 }, { "epoch": 0.8149890330593801, "grad_norm": 65.75609588623047, "learning_rate": 5e-06, "loss": 1.6358, "step": 15420 }, { "epoch": 0.8155175603181735, "grad_norm": 75.76026153564453, "learning_rate": 5e-06, "loss": 1.9982, "step": 15430 }, { "epoch": 0.8160460875769667, "grad_norm": 78.74514770507812, "learning_rate": 5e-06, "loss": 1.4689, "step": 15440 }, { "epoch": 0.8165746148357601, "grad_norm": 99.40294647216797, "learning_rate": 5e-06, "loss": 1.6202, "step": 15450 }, { "epoch": 0.8171031420945535, "grad_norm": 77.35928344726562, "learning_rate": 5e-06, "loss": 1.79, "step": 15460 }, { "epoch": 0.8176316693533469, "grad_norm": 74.98629760742188, "learning_rate": 5e-06, "loss": 1.6029, "step": 15470 }, { "epoch": 0.8181601966121402, "grad_norm": 77.57484436035156, "learning_rate": 5e-06, "loss": 1.5504, "step": 15480 }, { "epoch": 0.8186887238709336, "grad_norm": 86.13865661621094, "learning_rate": 5e-06, "loss": 1.5575, "step": 15490 }, { "epoch": 0.819217251129727, "grad_norm": 75.45899963378906, "learning_rate": 5e-06, "loss": 1.6412, "step": 15500 }, { "epoch": 0.8197457783885204, "grad_norm": 87.57365417480469, "learning_rate": 5e-06, "loss": 2.2212, "step": 15510 }, { "epoch": 0.8202743056473137, "grad_norm": 69.81256866455078, "learning_rate": 5e-06, "loss": 1.9557, "step": 15520 }, { "epoch": 0.8208028329061071, "grad_norm": 94.55403900146484, "learning_rate": 5e-06, "loss": 1.7794, "step": 15530 }, { "epoch": 0.8213313601649005, "grad_norm": 94.55152893066406, "learning_rate": 5e-06, "loss": 2.204, "step": 15540 }, { "epoch": 0.8218598874236939, "grad_norm": 31.40751838684082, "learning_rate": 5e-06, "loss": 1.7541, "step": 15550 }, { "epoch": 0.8223884146824872, "grad_norm": 96.78617095947266, "learning_rate": 5e-06, "loss": 1.7305, "step": 15560 }, { "epoch": 0.8229169419412806, "grad_norm": 63.54246139526367, "learning_rate": 5e-06, "loss": 1.7843, "step": 15570 }, { "epoch": 0.823445469200074, "grad_norm": 53.791893005371094, "learning_rate": 5e-06, "loss": 2.2315, "step": 15580 }, { "epoch": 0.8239739964588674, "grad_norm": 91.70677947998047, "learning_rate": 5e-06, "loss": 2.1169, "step": 15590 }, { "epoch": 0.8245025237176608, "grad_norm": 60.97389221191406, "learning_rate": 5e-06, "loss": 1.7413, "step": 15600 }, { "epoch": 0.8250310509764541, "grad_norm": 51.49700927734375, "learning_rate": 5e-06, "loss": 1.669, "step": 15610 }, { "epoch": 0.8255595782352475, "grad_norm": 97.26958465576172, "learning_rate": 5e-06, "loss": 1.8512, "step": 15620 }, { "epoch": 0.8260881054940409, "grad_norm": 75.7226333618164, "learning_rate": 5e-06, "loss": 2.1501, "step": 15630 }, { "epoch": 0.8266166327528343, "grad_norm": 96.92597198486328, "learning_rate": 5e-06, "loss": 2.0347, "step": 15640 }, { "epoch": 0.8271451600116276, "grad_norm": 63.97439193725586, "learning_rate": 5e-06, "loss": 1.7874, "step": 15650 }, { "epoch": 0.827673687270421, "grad_norm": 66.61109924316406, "learning_rate": 5e-06, "loss": 2.2077, "step": 15660 }, { "epoch": 0.8282022145292144, "grad_norm": 52.561431884765625, "learning_rate": 5e-06, "loss": 1.7809, "step": 15670 }, { "epoch": 0.8287307417880078, "grad_norm": 55.5440673828125, "learning_rate": 5e-06, "loss": 1.7826, "step": 15680 }, { "epoch": 0.8292592690468011, "grad_norm": 64.21073913574219, "learning_rate": 5e-06, "loss": 1.7034, "step": 15690 }, { "epoch": 0.8297877963055945, "grad_norm": 61.674434661865234, "learning_rate": 5e-06, "loss": 1.8718, "step": 15700 }, { "epoch": 0.8303163235643878, "grad_norm": 91.69981384277344, "learning_rate": 5e-06, "loss": 2.302, "step": 15710 }, { "epoch": 0.8308448508231812, "grad_norm": 59.0601921081543, "learning_rate": 5e-06, "loss": 1.8248, "step": 15720 }, { "epoch": 0.8313733780819745, "grad_norm": 71.42572784423828, "learning_rate": 5e-06, "loss": 2.0155, "step": 15730 }, { "epoch": 0.8319019053407679, "grad_norm": 62.17811584472656, "learning_rate": 5e-06, "loss": 1.9881, "step": 15740 }, { "epoch": 0.8324304325995613, "grad_norm": 85.58234405517578, "learning_rate": 5e-06, "loss": 2.0001, "step": 15750 }, { "epoch": 0.8329589598583547, "grad_norm": 65.08682250976562, "learning_rate": 5e-06, "loss": 2.0252, "step": 15760 }, { "epoch": 0.833487487117148, "grad_norm": 78.23741912841797, "learning_rate": 5e-06, "loss": 1.3172, "step": 15770 }, { "epoch": 0.8340160143759414, "grad_norm": 63.195682525634766, "learning_rate": 5e-06, "loss": 1.9285, "step": 15780 }, { "epoch": 0.8345445416347348, "grad_norm": 63.793827056884766, "learning_rate": 5e-06, "loss": 1.9097, "step": 15790 }, { "epoch": 0.8350730688935282, "grad_norm": 105.22562408447266, "learning_rate": 5e-06, "loss": 1.8052, "step": 15800 }, { "epoch": 0.8356015961523215, "grad_norm": 75.73572540283203, "learning_rate": 5e-06, "loss": 1.7123, "step": 15810 }, { "epoch": 0.8361301234111149, "grad_norm": 76.26692962646484, "learning_rate": 5e-06, "loss": 2.0562, "step": 15820 }, { "epoch": 0.8366586506699083, "grad_norm": 45.39213180541992, "learning_rate": 5e-06, "loss": 1.5375, "step": 15830 }, { "epoch": 0.8371871779287017, "grad_norm": 57.22304916381836, "learning_rate": 5e-06, "loss": 1.8422, "step": 15840 }, { "epoch": 0.837715705187495, "grad_norm": 61.52183532714844, "learning_rate": 5e-06, "loss": 1.68, "step": 15850 }, { "epoch": 0.8382442324462884, "grad_norm": 61.058921813964844, "learning_rate": 5e-06, "loss": 1.7195, "step": 15860 }, { "epoch": 0.8387727597050818, "grad_norm": 44.133056640625, "learning_rate": 5e-06, "loss": 1.7364, "step": 15870 }, { "epoch": 0.8393012869638752, "grad_norm": 77.5013427734375, "learning_rate": 5e-06, "loss": 1.4744, "step": 15880 }, { "epoch": 0.8398298142226686, "grad_norm": 57.69696044921875, "learning_rate": 5e-06, "loss": 1.8041, "step": 15890 }, { "epoch": 0.8403583414814619, "grad_norm": 112.70054626464844, "learning_rate": 5e-06, "loss": 1.3925, "step": 15900 }, { "epoch": 0.8408868687402553, "grad_norm": 104.39891052246094, "learning_rate": 5e-06, "loss": 2.1898, "step": 15910 }, { "epoch": 0.8414153959990487, "grad_norm": 76.90241241455078, "learning_rate": 5e-06, "loss": 1.9226, "step": 15920 }, { "epoch": 0.8419439232578421, "grad_norm": 56.3410758972168, "learning_rate": 5e-06, "loss": 1.7578, "step": 15930 }, { "epoch": 0.8424724505166354, "grad_norm": 53.4869270324707, "learning_rate": 5e-06, "loss": 1.7141, "step": 15940 }, { "epoch": 0.8430009777754288, "grad_norm": 66.99232482910156, "learning_rate": 5e-06, "loss": 1.7091, "step": 15950 }, { "epoch": 0.8435295050342222, "grad_norm": 75.88782501220703, "learning_rate": 5e-06, "loss": 1.7745, "step": 15960 }, { "epoch": 0.8440580322930155, "grad_norm": 45.25175094604492, "learning_rate": 5e-06, "loss": 1.715, "step": 15970 }, { "epoch": 0.8445865595518088, "grad_norm": 64.1935806274414, "learning_rate": 5e-06, "loss": 1.4926, "step": 15980 }, { "epoch": 0.8451150868106022, "grad_norm": 80.57048034667969, "learning_rate": 5e-06, "loss": 1.7833, "step": 15990 }, { "epoch": 0.8456436140693956, "grad_norm": 76.42806243896484, "learning_rate": 5e-06, "loss": 1.6752, "step": 16000 }, { "epoch": 0.8456436140693956, "eval_loss": 1.7300459146499634, "eval_runtime": 45.0618, "eval_samples_per_second": 271.449, "eval_steps_per_second": 8.499, "eval_sts-dev_pearson_cosine": 0.8478486834337826, "eval_sts-dev_pearson_dot": 0.8272854022417035, "eval_sts-dev_pearson_euclidean": 0.8517669190936299, "eval_sts-dev_pearson_manhattan": 0.851710366712456, "eval_sts-dev_pearson_max": 0.8517669190936299, "eval_sts-dev_spearman_cosine": 0.85057446948072, "eval_sts-dev_spearman_dot": 0.8240429902674024, "eval_sts-dev_spearman_euclidean": 0.8520664104663148, "eval_sts-dev_spearman_manhattan": 0.8516383402021719, "eval_sts-dev_spearman_max": 0.8520664104663148, "step": 16000 }, { "epoch": 0.846172141328189, "grad_norm": 70.12432098388672, "learning_rate": 5e-06, "loss": 1.9896, "step": 16010 }, { "epoch": 0.8467006685869823, "grad_norm": 57.79811096191406, "learning_rate": 5e-06, "loss": 1.7665, "step": 16020 }, { "epoch": 0.8472291958457757, "grad_norm": 80.64972686767578, "learning_rate": 5e-06, "loss": 1.7884, "step": 16030 }, { "epoch": 0.8477577231045691, "grad_norm": 53.106361389160156, "learning_rate": 5e-06, "loss": 2.1921, "step": 16040 }, { "epoch": 0.8482862503633625, "grad_norm": 64.3389663696289, "learning_rate": 5e-06, "loss": 1.7347, "step": 16050 }, { "epoch": 0.8488147776221558, "grad_norm": 61.87338638305664, "learning_rate": 5e-06, "loss": 1.668, "step": 16060 }, { "epoch": 0.8493433048809492, "grad_norm": 73.58171844482422, "learning_rate": 5e-06, "loss": 1.931, "step": 16070 }, { "epoch": 0.8498718321397426, "grad_norm": 63.14702606201172, "learning_rate": 5e-06, "loss": 1.698, "step": 16080 }, { "epoch": 0.850400359398536, "grad_norm": 77.0079345703125, "learning_rate": 5e-06, "loss": 1.8721, "step": 16090 }, { "epoch": 0.8509288866573294, "grad_norm": 86.19035339355469, "learning_rate": 5e-06, "loss": 1.8785, "step": 16100 }, { "epoch": 0.8514574139161227, "grad_norm": 77.01704406738281, "learning_rate": 5e-06, "loss": 2.0045, "step": 16110 }, { "epoch": 0.8519859411749161, "grad_norm": 95.22618865966797, "learning_rate": 5e-06, "loss": 1.6429, "step": 16120 }, { "epoch": 0.8525144684337095, "grad_norm": 76.56039428710938, "learning_rate": 5e-06, "loss": 1.9085, "step": 16130 }, { "epoch": 0.8530429956925029, "grad_norm": 50.36111831665039, "learning_rate": 5e-06, "loss": 1.6109, "step": 16140 }, { "epoch": 0.8535715229512962, "grad_norm": 61.932044982910156, "learning_rate": 5e-06, "loss": 1.5936, "step": 16150 }, { "epoch": 0.8541000502100896, "grad_norm": 65.52845764160156, "learning_rate": 5e-06, "loss": 2.0421, "step": 16160 }, { "epoch": 0.854628577468883, "grad_norm": 92.12174224853516, "learning_rate": 5e-06, "loss": 2.2305, "step": 16170 }, { "epoch": 0.8551571047276764, "grad_norm": 61.09035110473633, "learning_rate": 5e-06, "loss": 1.6916, "step": 16180 }, { "epoch": 0.8556856319864697, "grad_norm": 54.358985900878906, "learning_rate": 5e-06, "loss": 1.8242, "step": 16190 }, { "epoch": 0.8562141592452631, "grad_norm": 91.9179458618164, "learning_rate": 5e-06, "loss": 2.0866, "step": 16200 }, { "epoch": 0.8567426865040565, "grad_norm": 111.12257385253906, "learning_rate": 5e-06, "loss": 1.7995, "step": 16210 }, { "epoch": 0.8572712137628499, "grad_norm": 95.23286437988281, "learning_rate": 5e-06, "loss": 2.1889, "step": 16220 }, { "epoch": 0.8577997410216431, "grad_norm": 59.36803436279297, "learning_rate": 5e-06, "loss": 1.6782, "step": 16230 }, { "epoch": 0.8583282682804365, "grad_norm": 74.90691375732422, "learning_rate": 5e-06, "loss": 1.8106, "step": 16240 }, { "epoch": 0.8588567955392299, "grad_norm": 85.9646224975586, "learning_rate": 5e-06, "loss": 2.0599, "step": 16250 }, { "epoch": 0.8593853227980233, "grad_norm": 65.61123657226562, "learning_rate": 5e-06, "loss": 2.0237, "step": 16260 }, { "epoch": 0.8599138500568166, "grad_norm": 59.03030776977539, "learning_rate": 5e-06, "loss": 1.7058, "step": 16270 }, { "epoch": 0.86044237731561, "grad_norm": 79.39157104492188, "learning_rate": 5e-06, "loss": 1.5354, "step": 16280 }, { "epoch": 0.8609709045744034, "grad_norm": 69.6932144165039, "learning_rate": 5e-06, "loss": 1.9212, "step": 16290 }, { "epoch": 0.8614994318331968, "grad_norm": 54.254329681396484, "learning_rate": 5e-06, "loss": 1.4723, "step": 16300 }, { "epoch": 0.8620279590919901, "grad_norm": 101.0817642211914, "learning_rate": 5e-06, "loss": 1.8753, "step": 16310 }, { "epoch": 0.8625564863507835, "grad_norm": 95.39096069335938, "learning_rate": 5e-06, "loss": 2.2743, "step": 16320 }, { "epoch": 0.8630850136095769, "grad_norm": 62.89730453491211, "learning_rate": 5e-06, "loss": 1.5959, "step": 16330 }, { "epoch": 0.8636135408683703, "grad_norm": 85.42479705810547, "learning_rate": 5e-06, "loss": 1.8223, "step": 16340 }, { "epoch": 0.8641420681271637, "grad_norm": 98.60250854492188, "learning_rate": 5e-06, "loss": 1.79, "step": 16350 }, { "epoch": 0.864670595385957, "grad_norm": 172.56124877929688, "learning_rate": 5e-06, "loss": 1.953, "step": 16360 }, { "epoch": 0.8651991226447504, "grad_norm": 66.81640625, "learning_rate": 5e-06, "loss": 1.8832, "step": 16370 }, { "epoch": 0.8657276499035438, "grad_norm": 78.17627716064453, "learning_rate": 5e-06, "loss": 1.8519, "step": 16380 }, { "epoch": 0.8662561771623372, "grad_norm": 66.23819732666016, "learning_rate": 5e-06, "loss": 1.986, "step": 16390 }, { "epoch": 0.8667847044211305, "grad_norm": 101.04570770263672, "learning_rate": 5e-06, "loss": 2.3631, "step": 16400 }, { "epoch": 0.8673132316799239, "grad_norm": 92.04788970947266, "learning_rate": 5e-06, "loss": 1.8011, "step": 16410 }, { "epoch": 0.8678417589387173, "grad_norm": 142.7364044189453, "learning_rate": 5e-06, "loss": 2.0299, "step": 16420 }, { "epoch": 0.8683702861975107, "grad_norm": 94.37661743164062, "learning_rate": 5e-06, "loss": 1.7676, "step": 16430 }, { "epoch": 0.868898813456304, "grad_norm": 95.45365142822266, "learning_rate": 5e-06, "loss": 1.8484, "step": 16440 }, { "epoch": 0.8694273407150974, "grad_norm": 83.99198150634766, "learning_rate": 5e-06, "loss": 1.7024, "step": 16450 }, { "epoch": 0.8699558679738908, "grad_norm": 78.85735321044922, "learning_rate": 5e-06, "loss": 1.7343, "step": 16460 }, { "epoch": 0.8704843952326842, "grad_norm": 89.36212158203125, "learning_rate": 5e-06, "loss": 1.6952, "step": 16470 }, { "epoch": 0.8710129224914775, "grad_norm": 46.621952056884766, "learning_rate": 5e-06, "loss": 1.6862, "step": 16480 }, { "epoch": 0.8715414497502709, "grad_norm": 66.6725845336914, "learning_rate": 5e-06, "loss": 1.8444, "step": 16490 }, { "epoch": 0.8720699770090642, "grad_norm": 58.09086227416992, "learning_rate": 5e-06, "loss": 1.9506, "step": 16500 }, { "epoch": 0.8725985042678576, "grad_norm": 64.69146728515625, "learning_rate": 5e-06, "loss": 1.5542, "step": 16510 }, { "epoch": 0.8731270315266509, "grad_norm": 65.49239349365234, "learning_rate": 5e-06, "loss": 1.7351, "step": 16520 }, { "epoch": 0.8736555587854443, "grad_norm": 64.09361267089844, "learning_rate": 5e-06, "loss": 1.7692, "step": 16530 }, { "epoch": 0.8741840860442377, "grad_norm": 76.89824676513672, "learning_rate": 5e-06, "loss": 1.8384, "step": 16540 }, { "epoch": 0.8747126133030311, "grad_norm": 37.78486251831055, "learning_rate": 5e-06, "loss": 1.6962, "step": 16550 }, { "epoch": 0.8752411405618244, "grad_norm": 65.12787628173828, "learning_rate": 5e-06, "loss": 1.9556, "step": 16560 }, { "epoch": 0.8757696678206178, "grad_norm": 92.01142883300781, "learning_rate": 5e-06, "loss": 1.4659, "step": 16570 }, { "epoch": 0.8762981950794112, "grad_norm": 74.14949035644531, "learning_rate": 5e-06, "loss": 2.1089, "step": 16580 }, { "epoch": 0.8768267223382046, "grad_norm": 76.88835906982422, "learning_rate": 5e-06, "loss": 1.9262, "step": 16590 }, { "epoch": 0.877355249596998, "grad_norm": 58.240196228027344, "learning_rate": 5e-06, "loss": 1.8139, "step": 16600 }, { "epoch": 0.8778837768557913, "grad_norm": 48.16357421875, "learning_rate": 5e-06, "loss": 1.6581, "step": 16610 }, { "epoch": 0.8784123041145847, "grad_norm": 65.86573791503906, "learning_rate": 5e-06, "loss": 1.5369, "step": 16620 }, { "epoch": 0.8789408313733781, "grad_norm": 72.5399169921875, "learning_rate": 5e-06, "loss": 1.8774, "step": 16630 }, { "epoch": 0.8794693586321715, "grad_norm": 387.50775146484375, "learning_rate": 5e-06, "loss": 1.6136, "step": 16640 }, { "epoch": 0.8799978858909648, "grad_norm": 65.29890441894531, "learning_rate": 5e-06, "loss": 1.7394, "step": 16650 }, { "epoch": 0.8805264131497582, "grad_norm": 75.46720123291016, "learning_rate": 5e-06, "loss": 2.1092, "step": 16660 }, { "epoch": 0.8810549404085516, "grad_norm": 90.58956909179688, "learning_rate": 5e-06, "loss": 1.7937, "step": 16670 }, { "epoch": 0.881583467667345, "grad_norm": 84.27156066894531, "learning_rate": 5e-06, "loss": 1.8088, "step": 16680 }, { "epoch": 0.8821119949261383, "grad_norm": 57.76335906982422, "learning_rate": 5e-06, "loss": 1.9776, "step": 16690 }, { "epoch": 0.8826405221849317, "grad_norm": 61.775875091552734, "learning_rate": 5e-06, "loss": 1.7753, "step": 16700 }, { "epoch": 0.8831690494437251, "grad_norm": 57.04155731201172, "learning_rate": 5e-06, "loss": 1.9534, "step": 16710 }, { "epoch": 0.8836975767025185, "grad_norm": 167.1499481201172, "learning_rate": 5e-06, "loss": 1.6512, "step": 16720 }, { "epoch": 0.8842261039613118, "grad_norm": 76.0776138305664, "learning_rate": 5e-06, "loss": 2.1021, "step": 16730 }, { "epoch": 0.8847546312201052, "grad_norm": 67.69180297851562, "learning_rate": 5e-06, "loss": 1.8289, "step": 16740 }, { "epoch": 0.8852831584788986, "grad_norm": 80.60964965820312, "learning_rate": 5e-06, "loss": 2.1929, "step": 16750 }, { "epoch": 0.8858116857376919, "grad_norm": 52.6340217590332, "learning_rate": 5e-06, "loss": 1.6479, "step": 16760 }, { "epoch": 0.8863402129964852, "grad_norm": 76.2700424194336, "learning_rate": 5e-06, "loss": 1.8956, "step": 16770 }, { "epoch": 0.8868687402552786, "grad_norm": 66.95301818847656, "learning_rate": 5e-06, "loss": 1.7106, "step": 16780 }, { "epoch": 0.887397267514072, "grad_norm": 270.90802001953125, "learning_rate": 5e-06, "loss": 1.9599, "step": 16790 }, { "epoch": 0.8879257947728654, "grad_norm": 65.9742660522461, "learning_rate": 5e-06, "loss": 1.7402, "step": 16800 }, { "epoch": 0.8884543220316587, "grad_norm": 159.32168579101562, "learning_rate": 5e-06, "loss": 1.9039, "step": 16810 }, { "epoch": 0.8889828492904521, "grad_norm": 64.12284851074219, "learning_rate": 5e-06, "loss": 1.4687, "step": 16820 }, { "epoch": 0.8895113765492455, "grad_norm": 78.39161682128906, "learning_rate": 5e-06, "loss": 2.1928, "step": 16830 }, { "epoch": 0.8900399038080389, "grad_norm": 177.34732055664062, "learning_rate": 5e-06, "loss": 1.957, "step": 16840 }, { "epoch": 0.8905684310668323, "grad_norm": 101.92220306396484, "learning_rate": 5e-06, "loss": 1.6519, "step": 16850 }, { "epoch": 0.8910969583256256, "grad_norm": 101.2462387084961, "learning_rate": 5e-06, "loss": 1.646, "step": 16860 }, { "epoch": 0.891625485584419, "grad_norm": 88.10142517089844, "learning_rate": 5e-06, "loss": 1.8819, "step": 16870 }, { "epoch": 0.8921540128432124, "grad_norm": 82.32184600830078, "learning_rate": 5e-06, "loss": 2.0152, "step": 16880 }, { "epoch": 0.8926825401020058, "grad_norm": 80.14704895019531, "learning_rate": 5e-06, "loss": 1.517, "step": 16890 }, { "epoch": 0.8932110673607991, "grad_norm": 82.62104034423828, "learning_rate": 5e-06, "loss": 1.7952, "step": 16900 }, { "epoch": 0.8937395946195925, "grad_norm": 67.10014343261719, "learning_rate": 5e-06, "loss": 1.9297, "step": 16910 }, { "epoch": 0.8942681218783859, "grad_norm": 83.10028839111328, "learning_rate": 5e-06, "loss": 1.9213, "step": 16920 }, { "epoch": 0.8947966491371793, "grad_norm": 64.5474853515625, "learning_rate": 5e-06, "loss": 2.184, "step": 16930 }, { "epoch": 0.8953251763959726, "grad_norm": 58.12422180175781, "learning_rate": 5e-06, "loss": 1.8979, "step": 16940 }, { "epoch": 0.895853703654766, "grad_norm": 98.54670715332031, "learning_rate": 5e-06, "loss": 1.8254, "step": 16950 }, { "epoch": 0.8963822309135594, "grad_norm": 179.27955627441406, "learning_rate": 5e-06, "loss": 1.8079, "step": 16960 }, { "epoch": 0.8969107581723528, "grad_norm": 86.08062744140625, "learning_rate": 5e-06, "loss": 1.7765, "step": 16970 }, { "epoch": 0.8974392854311461, "grad_norm": 52.80547332763672, "learning_rate": 5e-06, "loss": 1.8243, "step": 16980 }, { "epoch": 0.8979678126899395, "grad_norm": 45.18359375, "learning_rate": 5e-06, "loss": 1.674, "step": 16990 }, { "epoch": 0.8984963399487329, "grad_norm": 94.52594757080078, "learning_rate": 5e-06, "loss": 1.6624, "step": 17000 }, { "epoch": 0.8984963399487329, "eval_loss": 1.6669728755950928, "eval_runtime": 41.61, "eval_samples_per_second": 293.968, "eval_steps_per_second": 9.205, "eval_sts-dev_pearson_cosine": 0.8434160300002733, "eval_sts-dev_pearson_dot": 0.8230713952784022, "eval_sts-dev_pearson_euclidean": 0.8497472619228652, "eval_sts-dev_pearson_manhattan": 0.8493619146536072, "eval_sts-dev_pearson_max": 0.8497472619228652, "eval_sts-dev_spearman_cosine": 0.8494504867908752, "eval_sts-dev_spearman_dot": 0.8229632221339157, "eval_sts-dev_spearman_euclidean": 0.8506219901595088, "eval_sts-dev_spearman_manhattan": 0.8498397828045, "eval_sts-dev_spearman_max": 0.8506219901595088, "step": 17000 }, { "epoch": 0.8990248672075263, "grad_norm": 90.83659362792969, "learning_rate": 5e-06, "loss": 2.0458, "step": 17010 }, { "epoch": 0.8995533944663197, "grad_norm": 66.3012924194336, "learning_rate": 5e-06, "loss": 1.9219, "step": 17020 }, { "epoch": 0.9000819217251129, "grad_norm": 43.12640380859375, "learning_rate": 5e-06, "loss": 1.7224, "step": 17030 }, { "epoch": 0.9006104489839063, "grad_norm": 50.0443229675293, "learning_rate": 5e-06, "loss": 1.9282, "step": 17040 }, { "epoch": 0.9011389762426997, "grad_norm": 67.43492889404297, "learning_rate": 5e-06, "loss": 1.9669, "step": 17050 }, { "epoch": 0.901667503501493, "grad_norm": 66.76605987548828, "learning_rate": 5e-06, "loss": 2.3502, "step": 17060 }, { "epoch": 0.9021960307602864, "grad_norm": 67.80857849121094, "learning_rate": 5e-06, "loss": 1.5516, "step": 17070 }, { "epoch": 0.9027245580190798, "grad_norm": 85.19833374023438, "learning_rate": 5e-06, "loss": 2.013, "step": 17080 }, { "epoch": 0.9032530852778732, "grad_norm": 51.70748519897461, "learning_rate": 5e-06, "loss": 1.8253, "step": 17090 }, { "epoch": 0.9037816125366666, "grad_norm": 80.751220703125, "learning_rate": 5e-06, "loss": 1.67, "step": 17100 }, { "epoch": 0.9043101397954599, "grad_norm": 95.32965850830078, "learning_rate": 5e-06, "loss": 2.0611, "step": 17110 }, { "epoch": 0.9048386670542533, "grad_norm": 49.937461853027344, "learning_rate": 5e-06, "loss": 1.4963, "step": 17120 }, { "epoch": 0.9053671943130467, "grad_norm": 67.01627349853516, "learning_rate": 5e-06, "loss": 2.0389, "step": 17130 }, { "epoch": 0.9058957215718401, "grad_norm": 37.57774353027344, "learning_rate": 5e-06, "loss": 1.4437, "step": 17140 }, { "epoch": 0.9064242488306334, "grad_norm": 45.253746032714844, "learning_rate": 5e-06, "loss": 2.047, "step": 17150 }, { "epoch": 0.9069527760894268, "grad_norm": 66.05631256103516, "learning_rate": 5e-06, "loss": 1.976, "step": 17160 }, { "epoch": 0.9074813033482202, "grad_norm": 82.93451690673828, "learning_rate": 5e-06, "loss": 2.1239, "step": 17170 }, { "epoch": 0.9080098306070136, "grad_norm": 59.283939361572266, "learning_rate": 5e-06, "loss": 1.7206, "step": 17180 }, { "epoch": 0.908538357865807, "grad_norm": 64.6095199584961, "learning_rate": 5e-06, "loss": 1.5855, "step": 17190 }, { "epoch": 0.9090668851246003, "grad_norm": 66.44391632080078, "learning_rate": 5e-06, "loss": 1.5535, "step": 17200 }, { "epoch": 0.9095954123833937, "grad_norm": 73.57811737060547, "learning_rate": 5e-06, "loss": 1.6994, "step": 17210 }, { "epoch": 0.9101239396421871, "grad_norm": 81.08476257324219, "learning_rate": 5e-06, "loss": 1.7339, "step": 17220 }, { "epoch": 0.9106524669009805, "grad_norm": 54.57515335083008, "learning_rate": 5e-06, "loss": 1.6611, "step": 17230 }, { "epoch": 0.9111809941597738, "grad_norm": 78.49447631835938, "learning_rate": 5e-06, "loss": 1.8321, "step": 17240 }, { "epoch": 0.9117095214185672, "grad_norm": 75.7291259765625, "learning_rate": 5e-06, "loss": 1.535, "step": 17250 }, { "epoch": 0.9122380486773606, "grad_norm": 83.4405517578125, "learning_rate": 5e-06, "loss": 1.7412, "step": 17260 }, { "epoch": 0.912766575936154, "grad_norm": 71.10909271240234, "learning_rate": 5e-06, "loss": 1.9772, "step": 17270 }, { "epoch": 0.9132951031949473, "grad_norm": 49.475494384765625, "learning_rate": 5e-06, "loss": 1.5202, "step": 17280 }, { "epoch": 0.9138236304537406, "grad_norm": 67.26223754882812, "learning_rate": 5e-06, "loss": 1.8415, "step": 17290 }, { "epoch": 0.914352157712534, "grad_norm": 62.96744155883789, "learning_rate": 5e-06, "loss": 1.5936, "step": 17300 }, { "epoch": 0.9148806849713274, "grad_norm": 66.93885803222656, "learning_rate": 5e-06, "loss": 1.8015, "step": 17310 }, { "epoch": 0.9154092122301207, "grad_norm": 63.4937744140625, "learning_rate": 5e-06, "loss": 1.5186, "step": 17320 }, { "epoch": 0.9159377394889141, "grad_norm": 71.05780792236328, "learning_rate": 5e-06, "loss": 1.6216, "step": 17330 }, { "epoch": 0.9164662667477075, "grad_norm": 71.04875946044922, "learning_rate": 5e-06, "loss": 1.5983, "step": 17340 }, { "epoch": 0.9169947940065009, "grad_norm": 60.3332405090332, "learning_rate": 5e-06, "loss": 1.8516, "step": 17350 }, { "epoch": 0.9175233212652942, "grad_norm": 149.77052307128906, "learning_rate": 5e-06, "loss": 1.6204, "step": 17360 }, { "epoch": 0.9180518485240876, "grad_norm": 75.41010284423828, "learning_rate": 5e-06, "loss": 2.0564, "step": 17370 }, { "epoch": 0.918580375782881, "grad_norm": 45.09780502319336, "learning_rate": 5e-06, "loss": 1.4948, "step": 17380 }, { "epoch": 0.9191089030416744, "grad_norm": 82.64436340332031, "learning_rate": 5e-06, "loss": 1.7337, "step": 17390 }, { "epoch": 0.9196374303004677, "grad_norm": 55.80376434326172, "learning_rate": 5e-06, "loss": 1.6533, "step": 17400 }, { "epoch": 0.9201659575592611, "grad_norm": 56.8024787902832, "learning_rate": 5e-06, "loss": 1.8498, "step": 17410 }, { "epoch": 0.9206944848180545, "grad_norm": 70.37359619140625, "learning_rate": 5e-06, "loss": 1.7923, "step": 17420 }, { "epoch": 0.9212230120768479, "grad_norm": 57.683441162109375, "learning_rate": 5e-06, "loss": 1.528, "step": 17430 }, { "epoch": 0.9217515393356412, "grad_norm": 55.94240951538086, "learning_rate": 5e-06, "loss": 1.7433, "step": 17440 }, { "epoch": 0.9222800665944346, "grad_norm": 73.87308502197266, "learning_rate": 5e-06, "loss": 2.0462, "step": 17450 }, { "epoch": 0.922808593853228, "grad_norm": 83.56721496582031, "learning_rate": 5e-06, "loss": 1.7303, "step": 17460 }, { "epoch": 0.9233371211120214, "grad_norm": 62.429481506347656, "learning_rate": 5e-06, "loss": 1.8441, "step": 17470 }, { "epoch": 0.9238656483708148, "grad_norm": 53.99994659423828, "learning_rate": 5e-06, "loss": 1.5884, "step": 17480 }, { "epoch": 0.9243941756296081, "grad_norm": 81.44470977783203, "learning_rate": 5e-06, "loss": 1.6468, "step": 17490 }, { "epoch": 0.9249227028884015, "grad_norm": 71.9089126586914, "learning_rate": 5e-06, "loss": 1.8132, "step": 17500 }, { "epoch": 0.9254512301471949, "grad_norm": 60.08146286010742, "learning_rate": 5e-06, "loss": 2.2762, "step": 17510 }, { "epoch": 0.9259797574059883, "grad_norm": 77.41810607910156, "learning_rate": 5e-06, "loss": 1.8767, "step": 17520 }, { "epoch": 0.9265082846647816, "grad_norm": 66.6099853515625, "learning_rate": 5e-06, "loss": 1.4086, "step": 17530 }, { "epoch": 0.927036811923575, "grad_norm": 94.67138671875, "learning_rate": 5e-06, "loss": 2.2694, "step": 17540 }, { "epoch": 0.9275653391823683, "grad_norm": 84.14366149902344, "learning_rate": 5e-06, "loss": 1.4646, "step": 17550 }, { "epoch": 0.9280938664411617, "grad_norm": 59.49115753173828, "learning_rate": 5e-06, "loss": 1.6916, "step": 17560 }, { "epoch": 0.928622393699955, "grad_norm": 83.949951171875, "learning_rate": 5e-06, "loss": 1.8351, "step": 17570 }, { "epoch": 0.9291509209587484, "grad_norm": 76.14859771728516, "learning_rate": 5e-06, "loss": 2.3031, "step": 17580 }, { "epoch": 0.9296794482175418, "grad_norm": 96.92898559570312, "learning_rate": 5e-06, "loss": 1.8203, "step": 17590 }, { "epoch": 0.9302079754763352, "grad_norm": 80.29483795166016, "learning_rate": 5e-06, "loss": 2.1435, "step": 17600 }, { "epoch": 0.9307365027351285, "grad_norm": 55.4921875, "learning_rate": 5e-06, "loss": 1.8912, "step": 17610 }, { "epoch": 0.9312650299939219, "grad_norm": 62.67246627807617, "learning_rate": 5e-06, "loss": 2.1057, "step": 17620 }, { "epoch": 0.9317935572527153, "grad_norm": 61.913612365722656, "learning_rate": 5e-06, "loss": 1.7199, "step": 17630 }, { "epoch": 0.9323220845115087, "grad_norm": 74.82087707519531, "learning_rate": 5e-06, "loss": 1.6977, "step": 17640 }, { "epoch": 0.932850611770302, "grad_norm": 69.85176086425781, "learning_rate": 5e-06, "loss": 1.6496, "step": 17650 }, { "epoch": 0.9333791390290954, "grad_norm": 89.89632415771484, "learning_rate": 5e-06, "loss": 1.5583, "step": 17660 }, { "epoch": 0.9339076662878888, "grad_norm": 77.19249725341797, "learning_rate": 5e-06, "loss": 2.0085, "step": 17670 }, { "epoch": 0.9344361935466822, "grad_norm": 97.4102554321289, "learning_rate": 5e-06, "loss": 1.8752, "step": 17680 }, { "epoch": 0.9349647208054755, "grad_norm": 61.0529670715332, "learning_rate": 5e-06, "loss": 2.1227, "step": 17690 }, { "epoch": 0.9354932480642689, "grad_norm": 62.590545654296875, "learning_rate": 5e-06, "loss": 1.8676, "step": 17700 }, { "epoch": 0.9360217753230623, "grad_norm": 115.74051666259766, "learning_rate": 5e-06, "loss": 1.7257, "step": 17710 }, { "epoch": 0.9365503025818557, "grad_norm": 58.103885650634766, "learning_rate": 5e-06, "loss": 1.6401, "step": 17720 }, { "epoch": 0.937078829840649, "grad_norm": 56.089508056640625, "learning_rate": 5e-06, "loss": 1.6838, "step": 17730 }, { "epoch": 0.9376073570994424, "grad_norm": 63.57783126831055, "learning_rate": 5e-06, "loss": 1.5782, "step": 17740 }, { "epoch": 0.9381358843582358, "grad_norm": 77.26726531982422, "learning_rate": 5e-06, "loss": 1.8293, "step": 17750 }, { "epoch": 0.9386644116170292, "grad_norm": 63.147789001464844, "learning_rate": 5e-06, "loss": 1.6872, "step": 17760 }, { "epoch": 0.9391929388758226, "grad_norm": 64.12127685546875, "learning_rate": 5e-06, "loss": 1.883, "step": 17770 }, { "epoch": 0.9397214661346159, "grad_norm": 61.20237731933594, "learning_rate": 5e-06, "loss": 1.4219, "step": 17780 }, { "epoch": 0.9402499933934093, "grad_norm": 69.53561401367188, "learning_rate": 5e-06, "loss": 1.7884, "step": 17790 }, { "epoch": 0.9407785206522027, "grad_norm": 69.81160736083984, "learning_rate": 5e-06, "loss": 1.7514, "step": 17800 }, { "epoch": 0.9413070479109961, "grad_norm": 63.41395950317383, "learning_rate": 5e-06, "loss": 1.7383, "step": 17810 }, { "epoch": 0.9418355751697893, "grad_norm": 95.82567596435547, "learning_rate": 5e-06, "loss": 2.3517, "step": 17820 }, { "epoch": 0.9423641024285827, "grad_norm": 94.43267822265625, "learning_rate": 5e-06, "loss": 1.5621, "step": 17830 }, { "epoch": 0.9428926296873761, "grad_norm": 70.29798889160156, "learning_rate": 5e-06, "loss": 1.6863, "step": 17840 }, { "epoch": 0.9434211569461695, "grad_norm": 60.46529769897461, "learning_rate": 5e-06, "loss": 1.6157, "step": 17850 }, { "epoch": 0.9439496842049628, "grad_norm": 78.42765045166016, "learning_rate": 5e-06, "loss": 1.9705, "step": 17860 }, { "epoch": 0.9444782114637562, "grad_norm": 56.958248138427734, "learning_rate": 5e-06, "loss": 1.6345, "step": 17870 }, { "epoch": 0.9450067387225496, "grad_norm": 93.89167785644531, "learning_rate": 5e-06, "loss": 2.0832, "step": 17880 }, { "epoch": 0.945535265981343, "grad_norm": 67.82406616210938, "learning_rate": 5e-06, "loss": 1.8825, "step": 17890 }, { "epoch": 0.9460637932401363, "grad_norm": 100.53248596191406, "learning_rate": 5e-06, "loss": 1.7609, "step": 17900 }, { "epoch": 0.9465923204989297, "grad_norm": 53.5087776184082, "learning_rate": 5e-06, "loss": 2.0583, "step": 17910 }, { "epoch": 0.9471208477577231, "grad_norm": 65.57185363769531, "learning_rate": 5e-06, "loss": 1.9529, "step": 17920 }, { "epoch": 0.9476493750165165, "grad_norm": 66.77257537841797, "learning_rate": 5e-06, "loss": 1.8214, "step": 17930 }, { "epoch": 0.9481779022753098, "grad_norm": 51.23651123046875, "learning_rate": 5e-06, "loss": 1.5139, "step": 17940 }, { "epoch": 0.9487064295341032, "grad_norm": 88.1583023071289, "learning_rate": 5e-06, "loss": 1.9689, "step": 17950 }, { "epoch": 0.9492349567928966, "grad_norm": 70.31460571289062, "learning_rate": 5e-06, "loss": 1.5847, "step": 17960 }, { "epoch": 0.94976348405169, "grad_norm": 75.4488754272461, "learning_rate": 5e-06, "loss": 1.8031, "step": 17970 }, { "epoch": 0.9502920113104834, "grad_norm": 64.54553985595703, "learning_rate": 5e-06, "loss": 1.7591, "step": 17980 }, { "epoch": 0.9508205385692767, "grad_norm": 71.30224609375, "learning_rate": 5e-06, "loss": 1.6462, "step": 17990 }, { "epoch": 0.9513490658280701, "grad_norm": 81.52975463867188, "learning_rate": 5e-06, "loss": 1.7433, "step": 18000 }, { "epoch": 0.9513490658280701, "eval_loss": 1.6538974046707153, "eval_runtime": 36.1422, "eval_samples_per_second": 338.441, "eval_steps_per_second": 10.597, "eval_sts-dev_pearson_cosine": 0.8461158205580119, "eval_sts-dev_pearson_dot": 0.8184690479756196, "eval_sts-dev_pearson_euclidean": 0.8510691681770899, "eval_sts-dev_pearson_manhattan": 0.8503573372986275, "eval_sts-dev_pearson_max": 0.8510691681770899, "eval_sts-dev_spearman_cosine": 0.849381736664853, "eval_sts-dev_spearman_dot": 0.8136962656843764, "eval_sts-dev_spearman_euclidean": 0.8508960086370587, "eval_sts-dev_spearman_manhattan": 0.850014046748593, "eval_sts-dev_spearman_max": 0.8508960086370587, "step": 18000 }, { "epoch": 0.9518775930868635, "grad_norm": 62.933494567871094, "learning_rate": 5e-06, "loss": 1.7114, "step": 18010 }, { "epoch": 0.9524061203456569, "grad_norm": 81.93172454833984, "learning_rate": 5e-06, "loss": 1.6427, "step": 18020 }, { "epoch": 0.9529346476044502, "grad_norm": 84.15913391113281, "learning_rate": 5e-06, "loss": 1.8636, "step": 18030 }, { "epoch": 0.9534631748632436, "grad_norm": 72.96295928955078, "learning_rate": 5e-06, "loss": 2.0009, "step": 18040 }, { "epoch": 0.953991702122037, "grad_norm": 72.5213623046875, "learning_rate": 5e-06, "loss": 1.6258, "step": 18050 }, { "epoch": 0.9545202293808304, "grad_norm": 67.77568817138672, "learning_rate": 5e-06, "loss": 1.8953, "step": 18060 }, { "epoch": 0.9550487566396237, "grad_norm": 62.517784118652344, "learning_rate": 5e-06, "loss": 1.7324, "step": 18070 }, { "epoch": 0.955577283898417, "grad_norm": 67.72402954101562, "learning_rate": 5e-06, "loss": 1.8251, "step": 18080 }, { "epoch": 0.9561058111572104, "grad_norm": 64.83651733398438, "learning_rate": 5e-06, "loss": 1.5711, "step": 18090 }, { "epoch": 0.9566343384160038, "grad_norm": 81.25251007080078, "learning_rate": 5e-06, "loss": 1.6824, "step": 18100 }, { "epoch": 0.9571628656747971, "grad_norm": 47.9729118347168, "learning_rate": 5e-06, "loss": 1.6728, "step": 18110 }, { "epoch": 0.9576913929335905, "grad_norm": 57.89502716064453, "learning_rate": 5e-06, "loss": 1.2186, "step": 18120 }, { "epoch": 0.9582199201923839, "grad_norm": 61.322235107421875, "learning_rate": 5e-06, "loss": 1.9268, "step": 18130 }, { "epoch": 0.9587484474511773, "grad_norm": 65.123291015625, "learning_rate": 5e-06, "loss": 1.675, "step": 18140 }, { "epoch": 0.9592769747099706, "grad_norm": 72.3015365600586, "learning_rate": 5e-06, "loss": 1.8224, "step": 18150 }, { "epoch": 0.959805501968764, "grad_norm": 72.74051666259766, "learning_rate": 5e-06, "loss": 1.7558, "step": 18160 }, { "epoch": 0.9603340292275574, "grad_norm": 41.249298095703125, "learning_rate": 5e-06, "loss": 2.1751, "step": 18170 }, { "epoch": 0.9608625564863508, "grad_norm": 68.53234100341797, "learning_rate": 5e-06, "loss": 1.9822, "step": 18180 }, { "epoch": 0.9613910837451441, "grad_norm": 161.0165557861328, "learning_rate": 5e-06, "loss": 1.8495, "step": 18190 }, { "epoch": 0.9619196110039375, "grad_norm": 83.95246887207031, "learning_rate": 5e-06, "loss": 1.4417, "step": 18200 }, { "epoch": 0.9624481382627309, "grad_norm": 74.80828094482422, "learning_rate": 5e-06, "loss": 1.6955, "step": 18210 }, { "epoch": 0.9629766655215243, "grad_norm": 108.39993286132812, "learning_rate": 5e-06, "loss": 1.6708, "step": 18220 }, { "epoch": 0.9635051927803177, "grad_norm": 55.57883071899414, "learning_rate": 5e-06, "loss": 1.4396, "step": 18230 }, { "epoch": 0.964033720039111, "grad_norm": 101.95221710205078, "learning_rate": 5e-06, "loss": 1.5905, "step": 18240 }, { "epoch": 0.9645622472979044, "grad_norm": 55.03987503051758, "learning_rate": 5e-06, "loss": 1.9242, "step": 18250 }, { "epoch": 0.9650907745566978, "grad_norm": 94.01529693603516, "learning_rate": 5e-06, "loss": 1.7667, "step": 18260 }, { "epoch": 0.9656193018154912, "grad_norm": 40.03981399536133, "learning_rate": 5e-06, "loss": 1.8721, "step": 18270 }, { "epoch": 0.9661478290742845, "grad_norm": 86.55077362060547, "learning_rate": 5e-06, "loss": 1.5524, "step": 18280 }, { "epoch": 0.9666763563330779, "grad_norm": 50.98965072631836, "learning_rate": 5e-06, "loss": 1.5219, "step": 18290 }, { "epoch": 0.9672048835918713, "grad_norm": 61.3620491027832, "learning_rate": 5e-06, "loss": 1.9536, "step": 18300 }, { "epoch": 0.9677334108506647, "grad_norm": 82.0411148071289, "learning_rate": 5e-06, "loss": 2.1984, "step": 18310 }, { "epoch": 0.968261938109458, "grad_norm": 55.050071716308594, "learning_rate": 5e-06, "loss": 1.7033, "step": 18320 }, { "epoch": 0.9687904653682514, "grad_norm": 54.41343688964844, "learning_rate": 5e-06, "loss": 1.7183, "step": 18330 }, { "epoch": 0.9693189926270447, "grad_norm": 84.22246551513672, "learning_rate": 5e-06, "loss": 1.6733, "step": 18340 }, { "epoch": 0.9698475198858381, "grad_norm": 65.87133026123047, "learning_rate": 5e-06, "loss": 1.9187, "step": 18350 }, { "epoch": 0.9703760471446314, "grad_norm": 78.80633544921875, "learning_rate": 5e-06, "loss": 1.4939, "step": 18360 }, { "epoch": 0.9709045744034248, "grad_norm": 98.34077453613281, "learning_rate": 5e-06, "loss": 1.7776, "step": 18370 }, { "epoch": 0.9714331016622182, "grad_norm": 53.582027435302734, "learning_rate": 5e-06, "loss": 1.9275, "step": 18380 }, { "epoch": 0.9719616289210116, "grad_norm": 58.044410705566406, "learning_rate": 5e-06, "loss": 1.7542, "step": 18390 }, { "epoch": 0.9724901561798049, "grad_norm": 95.86946105957031, "learning_rate": 5e-06, "loss": 2.0857, "step": 18400 }, { "epoch": 0.9730186834385983, "grad_norm": 82.10986328125, "learning_rate": 5e-06, "loss": 1.8242, "step": 18410 }, { "epoch": 0.9735472106973917, "grad_norm": 73.49609375, "learning_rate": 5e-06, "loss": 1.7158, "step": 18420 }, { "epoch": 0.9740757379561851, "grad_norm": 30.76506233215332, "learning_rate": 5e-06, "loss": 1.5183, "step": 18430 }, { "epoch": 0.9746042652149784, "grad_norm": 82.57400512695312, "learning_rate": 5e-06, "loss": 1.7601, "step": 18440 }, { "epoch": 0.9751327924737718, "grad_norm": 59.98511505126953, "learning_rate": 5e-06, "loss": 1.6468, "step": 18450 }, { "epoch": 0.9756613197325652, "grad_norm": 65.49267578125, "learning_rate": 5e-06, "loss": 2.037, "step": 18460 }, { "epoch": 0.9761898469913586, "grad_norm": 86.62586212158203, "learning_rate": 5e-06, "loss": 1.8979, "step": 18470 }, { "epoch": 0.976718374250152, "grad_norm": 68.37364959716797, "learning_rate": 5e-06, "loss": 2.0807, "step": 18480 }, { "epoch": 0.9772469015089453, "grad_norm": 61.795509338378906, "learning_rate": 5e-06, "loss": 1.8125, "step": 18490 }, { "epoch": 0.9777754287677387, "grad_norm": 69.328369140625, "learning_rate": 5e-06, "loss": 1.7991, "step": 18500 }, { "epoch": 0.9783039560265321, "grad_norm": 109.5992202758789, "learning_rate": 5e-06, "loss": 2.0928, "step": 18510 }, { "epoch": 0.9788324832853255, "grad_norm": 60.21198654174805, "learning_rate": 5e-06, "loss": 1.7954, "step": 18520 }, { "epoch": 0.9793610105441188, "grad_norm": 59.982547760009766, "learning_rate": 5e-06, "loss": 1.544, "step": 18530 }, { "epoch": 0.9798895378029122, "grad_norm": 74.56318664550781, "learning_rate": 5e-06, "loss": 2.2206, "step": 18540 }, { "epoch": 0.9804180650617056, "grad_norm": 67.88633728027344, "learning_rate": 5e-06, "loss": 1.8799, "step": 18550 }, { "epoch": 0.980946592320499, "grad_norm": 50.281654357910156, "learning_rate": 5e-06, "loss": 1.3814, "step": 18560 }, { "epoch": 0.9814751195792923, "grad_norm": 138.64016723632812, "learning_rate": 5e-06, "loss": 1.4716, "step": 18570 }, { "epoch": 0.9820036468380857, "grad_norm": 66.08733367919922, "learning_rate": 5e-06, "loss": 1.5503, "step": 18580 }, { "epoch": 0.9825321740968791, "grad_norm": 100.16180419921875, "learning_rate": 5e-06, "loss": 1.8401, "step": 18590 }, { "epoch": 0.9830607013556725, "grad_norm": 75.07698822021484, "learning_rate": 5e-06, "loss": 1.7956, "step": 18600 }, { "epoch": 0.9835892286144657, "grad_norm": 41.83605194091797, "learning_rate": 5e-06, "loss": 1.5936, "step": 18610 }, { "epoch": 0.9841177558732591, "grad_norm": 84.19473266601562, "learning_rate": 5e-06, "loss": 1.9196, "step": 18620 }, { "epoch": 0.9846462831320525, "grad_norm": 65.51405334472656, "learning_rate": 5e-06, "loss": 1.7958, "step": 18630 }, { "epoch": 0.9851748103908459, "grad_norm": 56.14737319946289, "learning_rate": 5e-06, "loss": 1.7918, "step": 18640 }, { "epoch": 0.9857033376496392, "grad_norm": 144.53517150878906, "learning_rate": 5e-06, "loss": 1.6181, "step": 18650 }, { "epoch": 0.9862318649084326, "grad_norm": 47.99948501586914, "learning_rate": 5e-06, "loss": 1.7274, "step": 18660 }, { "epoch": 0.986760392167226, "grad_norm": 62.67123794555664, "learning_rate": 5e-06, "loss": 1.9447, "step": 18670 }, { "epoch": 0.9872889194260194, "grad_norm": 68.61297607421875, "learning_rate": 5e-06, "loss": 1.5821, "step": 18680 }, { "epoch": 0.9878174466848127, "grad_norm": 74.71617889404297, "learning_rate": 5e-06, "loss": 1.7727, "step": 18690 }, { "epoch": 0.9883459739436061, "grad_norm": 74.86211395263672, "learning_rate": 5e-06, "loss": 1.6737, "step": 18700 }, { "epoch": 0.9888745012023995, "grad_norm": 70.09609985351562, "learning_rate": 5e-06, "loss": 1.5682, "step": 18710 }, { "epoch": 0.9894030284611929, "grad_norm": 91.23912811279297, "learning_rate": 5e-06, "loss": 1.6721, "step": 18720 }, { "epoch": 0.9899315557199863, "grad_norm": 54.89178466796875, "learning_rate": 5e-06, "loss": 1.8396, "step": 18730 }, { "epoch": 0.9904600829787796, "grad_norm": 98.38113403320312, "learning_rate": 5e-06, "loss": 1.7965, "step": 18740 }, { "epoch": 0.990988610237573, "grad_norm": 61.69520950317383, "learning_rate": 5e-06, "loss": 1.7493, "step": 18750 }, { "epoch": 0.9915171374963664, "grad_norm": 67.14696502685547, "learning_rate": 5e-06, "loss": 1.7401, "step": 18760 }, { "epoch": 0.9920456647551598, "grad_norm": 55.91098403930664, "learning_rate": 5e-06, "loss": 1.6424, "step": 18770 }, { "epoch": 0.9925741920139531, "grad_norm": 66.02193450927734, "learning_rate": 5e-06, "loss": 1.8336, "step": 18780 }, { "epoch": 0.9931027192727465, "grad_norm": 65.38835906982422, "learning_rate": 5e-06, "loss": 2.1893, "step": 18790 }, { "epoch": 0.9936312465315399, "grad_norm": 60.45787048339844, "learning_rate": 5e-06, "loss": 1.5288, "step": 18800 }, { "epoch": 0.9941597737903333, "grad_norm": 47.93410873413086, "learning_rate": 5e-06, "loss": 1.6763, "step": 18810 }, { "epoch": 0.9946883010491266, "grad_norm": 120.63724517822266, "learning_rate": 5e-06, "loss": 1.7569, "step": 18820 }, { "epoch": 0.99521682830792, "grad_norm": 64.14657592773438, "learning_rate": 5e-06, "loss": 1.9731, "step": 18830 }, { "epoch": 0.9957453555667134, "grad_norm": 113.67466735839844, "learning_rate": 5e-06, "loss": 1.7607, "step": 18840 }, { "epoch": 0.9962738828255068, "grad_norm": 59.32378005981445, "learning_rate": 5e-06, "loss": 1.8642, "step": 18850 }, { "epoch": 0.9968024100843002, "grad_norm": 90.84257507324219, "learning_rate": 5e-06, "loss": 1.6514, "step": 18860 }, { "epoch": 0.9973309373430934, "grad_norm": 37.51726150512695, "learning_rate": 5e-06, "loss": 1.6382, "step": 18870 }, { "epoch": 0.9978594646018868, "grad_norm": 63.12926483154297, "learning_rate": 5e-06, "loss": 1.5428, "step": 18880 }, { "epoch": 0.9983879918606802, "grad_norm": 73.41537475585938, "learning_rate": 5e-06, "loss": 2.0089, "step": 18890 }, { "epoch": 0.9989165191194735, "grad_norm": 59.94205856323242, "learning_rate": 5e-06, "loss": 1.7, "step": 18900 }, { "epoch": 0.9994450463782669, "grad_norm": 82.19730377197266, "learning_rate": 5e-06, "loss": 2.1237, "step": 18910 }, { "epoch": 0.9999735736370603, "grad_norm": 88.88309478759766, "learning_rate": 5e-06, "loss": 1.7385, "step": 18920 }, { "epoch": 1.0005021008958537, "grad_norm": 85.11519622802734, "learning_rate": 5e-06, "loss": 1.5609, "step": 18930 }, { "epoch": 1.001030628154647, "grad_norm": 76.29631042480469, "learning_rate": 5e-06, "loss": 1.9145, "step": 18940 }, { "epoch": 1.0015591554134404, "grad_norm": 80.19965362548828, "learning_rate": 5e-06, "loss": 1.7598, "step": 18950 }, { "epoch": 1.0020876826722338, "grad_norm": 61.30357360839844, "learning_rate": 5e-06, "loss": 1.7214, "step": 18960 }, { "epoch": 1.0026162099310272, "grad_norm": 93.9766616821289, "learning_rate": 5e-06, "loss": 1.9426, "step": 18970 }, { "epoch": 1.0031447371898206, "grad_norm": 56.4688835144043, "learning_rate": 5e-06, "loss": 1.7236, "step": 18980 }, { "epoch": 1.003673264448614, "grad_norm": 72.55043029785156, "learning_rate": 5e-06, "loss": 1.5977, "step": 18990 }, { "epoch": 1.0042017917074073, "grad_norm": 60.67715835571289, "learning_rate": 5e-06, "loss": 1.9468, "step": 19000 }, { "epoch": 1.0042017917074073, "eval_loss": 1.6154643297195435, "eval_runtime": 37.2729, "eval_samples_per_second": 328.174, "eval_steps_per_second": 10.276, "eval_sts-dev_pearson_cosine": 0.8461628507794242, "eval_sts-dev_pearson_dot": 0.8257593181877219, "eval_sts-dev_pearson_euclidean": 0.8526556765350106, "eval_sts-dev_pearson_manhattan": 0.8521136995562998, "eval_sts-dev_pearson_max": 0.8526556765350106, "eval_sts-dev_spearman_cosine": 0.8510855390926232, "eval_sts-dev_spearman_dot": 0.8246456704203745, "eval_sts-dev_spearman_euclidean": 0.8535089427738403, "eval_sts-dev_spearman_manhattan": 0.8526087731405663, "eval_sts-dev_spearman_max": 0.8535089427738403, "step": 19000 }, { "epoch": 1.0047303189662007, "grad_norm": 74.05320739746094, "learning_rate": 5e-06, "loss": 1.7943, "step": 19010 }, { "epoch": 1.005258846224994, "grad_norm": 66.32478332519531, "learning_rate": 5e-06, "loss": 1.8036, "step": 19020 }, { "epoch": 1.0057873734837874, "grad_norm": 58.31193923950195, "learning_rate": 5e-06, "loss": 1.5286, "step": 19030 }, { "epoch": 1.0063159007425808, "grad_norm": 70.20801544189453, "learning_rate": 5e-06, "loss": 1.635, "step": 19040 }, { "epoch": 1.0068444280013742, "grad_norm": 48.55732345581055, "learning_rate": 5e-06, "loss": 1.9177, "step": 19050 }, { "epoch": 1.0073729552601676, "grad_norm": 53.32954788208008, "learning_rate": 5e-06, "loss": 1.477, "step": 19060 }, { "epoch": 1.007901482518961, "grad_norm": 59.331214904785156, "learning_rate": 5e-06, "loss": 1.8264, "step": 19070 }, { "epoch": 1.0084300097777543, "grad_norm": 79.7580337524414, "learning_rate": 5e-06, "loss": 1.8044, "step": 19080 }, { "epoch": 1.0089585370365477, "grad_norm": 65.61492919921875, "learning_rate": 5e-06, "loss": 1.8672, "step": 19090 }, { "epoch": 1.009487064295341, "grad_norm": 73.52339935302734, "learning_rate": 5e-06, "loss": 1.6947, "step": 19100 }, { "epoch": 1.0100155915541345, "grad_norm": 59.63999557495117, "learning_rate": 5e-06, "loss": 1.5617, "step": 19110 }, { "epoch": 1.0105441188129278, "grad_norm": 69.53165435791016, "learning_rate": 5e-06, "loss": 1.3444, "step": 19120 }, { "epoch": 1.0110726460717212, "grad_norm": 52.5617561340332, "learning_rate": 5e-06, "loss": 1.946, "step": 19130 }, { "epoch": 1.0116011733305146, "grad_norm": 50.58839797973633, "learning_rate": 5e-06, "loss": 1.625, "step": 19140 }, { "epoch": 1.012129700589308, "grad_norm": 69.39927673339844, "learning_rate": 5e-06, "loss": 1.7934, "step": 19150 }, { "epoch": 1.0126582278481013, "grad_norm": 84.1786880493164, "learning_rate": 5e-06, "loss": 2.289, "step": 19160 }, { "epoch": 1.0131867551068947, "grad_norm": 68.51372528076172, "learning_rate": 5e-06, "loss": 1.6586, "step": 19170 }, { "epoch": 1.013715282365688, "grad_norm": 62.74948501586914, "learning_rate": 5e-06, "loss": 1.625, "step": 19180 }, { "epoch": 1.0142438096244815, "grad_norm": 61.09156036376953, "learning_rate": 5e-06, "loss": 1.4939, "step": 19190 }, { "epoch": 1.0147723368832748, "grad_norm": 56.789878845214844, "learning_rate": 5e-06, "loss": 1.803, "step": 19200 }, { "epoch": 1.0153008641420682, "grad_norm": 69.90040588378906, "learning_rate": 5e-06, "loss": 1.595, "step": 19210 }, { "epoch": 1.0158293914008616, "grad_norm": 115.00761413574219, "learning_rate": 5e-06, "loss": 1.7796, "step": 19220 }, { "epoch": 1.016357918659655, "grad_norm": 64.43244934082031, "learning_rate": 5e-06, "loss": 1.855, "step": 19230 }, { "epoch": 1.0168864459184483, "grad_norm": 84.69046783447266, "learning_rate": 5e-06, "loss": 1.5564, "step": 19240 }, { "epoch": 1.0174149731772417, "grad_norm": 93.10693359375, "learning_rate": 5e-06, "loss": 1.7549, "step": 19250 }, { "epoch": 1.017943500436035, "grad_norm": 64.62483215332031, "learning_rate": 5e-06, "loss": 1.5979, "step": 19260 }, { "epoch": 1.0184720276948283, "grad_norm": 62.441795349121094, "learning_rate": 5e-06, "loss": 1.7476, "step": 19270 }, { "epoch": 1.0190005549536216, "grad_norm": 76.35992431640625, "learning_rate": 5e-06, "loss": 1.7281, "step": 19280 }, { "epoch": 1.019529082212415, "grad_norm": 66.0901870727539, "learning_rate": 5e-06, "loss": 1.7503, "step": 19290 }, { "epoch": 1.0200576094712084, "grad_norm": 54.53622817993164, "learning_rate": 5e-06, "loss": 1.7807, "step": 19300 }, { "epoch": 1.0205861367300018, "grad_norm": 70.22701263427734, "learning_rate": 5e-06, "loss": 1.6487, "step": 19310 }, { "epoch": 1.0211146639887951, "grad_norm": 51.557945251464844, "learning_rate": 5e-06, "loss": 1.5473, "step": 19320 }, { "epoch": 1.0216431912475885, "grad_norm": 118.04430389404297, "learning_rate": 5e-06, "loss": 1.7449, "step": 19330 }, { "epoch": 1.0221717185063819, "grad_norm": 49.40364456176758, "learning_rate": 5e-06, "loss": 1.5605, "step": 19340 }, { "epoch": 1.0227002457651753, "grad_norm": 64.87403106689453, "learning_rate": 5e-06, "loss": 2.0373, "step": 19350 }, { "epoch": 1.0232287730239686, "grad_norm": 75.12230682373047, "learning_rate": 5e-06, "loss": 1.5413, "step": 19360 }, { "epoch": 1.023757300282762, "grad_norm": 73.3877182006836, "learning_rate": 5e-06, "loss": 1.771, "step": 19370 }, { "epoch": 1.0242858275415554, "grad_norm": 61.60504150390625, "learning_rate": 5e-06, "loss": 1.6791, "step": 19380 }, { "epoch": 1.0248143548003488, "grad_norm": 78.63151550292969, "learning_rate": 5e-06, "loss": 1.7076, "step": 19390 }, { "epoch": 1.0253428820591421, "grad_norm": 71.5390625, "learning_rate": 5e-06, "loss": 1.5153, "step": 19400 }, { "epoch": 1.0258714093179355, "grad_norm": 65.05306243896484, "learning_rate": 5e-06, "loss": 1.7627, "step": 19410 }, { "epoch": 1.026399936576729, "grad_norm": 65.92372131347656, "learning_rate": 5e-06, "loss": 1.4709, "step": 19420 }, { "epoch": 1.0269284638355223, "grad_norm": 75.8219223022461, "learning_rate": 5e-06, "loss": 1.897, "step": 19430 }, { "epoch": 1.0274569910943157, "grad_norm": 68.1899185180664, "learning_rate": 5e-06, "loss": 1.4776, "step": 19440 }, { "epoch": 1.027985518353109, "grad_norm": 65.82808685302734, "learning_rate": 5e-06, "loss": 1.9244, "step": 19450 }, { "epoch": 1.0285140456119024, "grad_norm": 106.91645812988281, "learning_rate": 5e-06, "loss": 1.9142, "step": 19460 }, { "epoch": 1.0290425728706958, "grad_norm": 54.751487731933594, "learning_rate": 5e-06, "loss": 1.8086, "step": 19470 }, { "epoch": 1.0295711001294892, "grad_norm": 41.86207580566406, "learning_rate": 5e-06, "loss": 1.5908, "step": 19480 }, { "epoch": 1.0300996273882825, "grad_norm": 49.00918960571289, "learning_rate": 5e-06, "loss": 1.7014, "step": 19490 }, { "epoch": 1.030628154647076, "grad_norm": 78.99932098388672, "learning_rate": 5e-06, "loss": 1.6627, "step": 19500 }, { "epoch": 1.0311566819058693, "grad_norm": 63.12388229370117, "learning_rate": 5e-06, "loss": 1.8041, "step": 19510 }, { "epoch": 1.0316852091646627, "grad_norm": 76.511962890625, "learning_rate": 5e-06, "loss": 1.7912, "step": 19520 }, { "epoch": 1.032213736423456, "grad_norm": 55.3642692565918, "learning_rate": 5e-06, "loss": 1.9787, "step": 19530 }, { "epoch": 1.0327422636822494, "grad_norm": 65.67875671386719, "learning_rate": 5e-06, "loss": 1.6526, "step": 19540 }, { "epoch": 1.0332707909410428, "grad_norm": 128.41070556640625, "learning_rate": 5e-06, "loss": 1.583, "step": 19550 }, { "epoch": 1.0337993181998362, "grad_norm": 100.89557647705078, "learning_rate": 5e-06, "loss": 1.7301, "step": 19560 }, { "epoch": 1.0343278454586295, "grad_norm": 63.652679443359375, "learning_rate": 5e-06, "loss": 1.6326, "step": 19570 }, { "epoch": 1.034856372717423, "grad_norm": 65.28987884521484, "learning_rate": 5e-06, "loss": 1.8505, "step": 19580 }, { "epoch": 1.0353848999762163, "grad_norm": 105.275634765625, "learning_rate": 5e-06, "loss": 1.7066, "step": 19590 }, { "epoch": 1.0359134272350097, "grad_norm": 49.41904830932617, "learning_rate": 5e-06, "loss": 1.967, "step": 19600 }, { "epoch": 1.036441954493803, "grad_norm": 133.61781311035156, "learning_rate": 5e-06, "loss": 1.508, "step": 19610 }, { "epoch": 1.0369704817525964, "grad_norm": 74.15248107910156, "learning_rate": 5e-06, "loss": 1.5571, "step": 19620 }, { "epoch": 1.0374990090113898, "grad_norm": 52.67314147949219, "learning_rate": 5e-06, "loss": 1.6825, "step": 19630 }, { "epoch": 1.0380275362701832, "grad_norm": 49.3936767578125, "learning_rate": 5e-06, "loss": 1.7614, "step": 19640 }, { "epoch": 1.0385560635289766, "grad_norm": 55.71810531616211, "learning_rate": 5e-06, "loss": 1.8779, "step": 19650 }, { "epoch": 1.03908459078777, "grad_norm": 52.97697830200195, "learning_rate": 5e-06, "loss": 1.8772, "step": 19660 }, { "epoch": 1.0396131180465633, "grad_norm": 54.6786994934082, "learning_rate": 5e-06, "loss": 1.9542, "step": 19670 }, { "epoch": 1.0401416453053567, "grad_norm": 71.99957275390625, "learning_rate": 5e-06, "loss": 1.6511, "step": 19680 }, { "epoch": 1.04067017256415, "grad_norm": 49.89113998413086, "learning_rate": 5e-06, "loss": 1.3013, "step": 19690 }, { "epoch": 1.0411986998229434, "grad_norm": 60.815608978271484, "learning_rate": 5e-06, "loss": 1.7783, "step": 19700 }, { "epoch": 1.0417272270817368, "grad_norm": 68.30535125732422, "learning_rate": 5e-06, "loss": 1.8283, "step": 19710 }, { "epoch": 1.0422557543405302, "grad_norm": 51.34492111206055, "learning_rate": 5e-06, "loss": 1.5248, "step": 19720 }, { "epoch": 1.0427842815993236, "grad_norm": 113.33541870117188, "learning_rate": 5e-06, "loss": 1.631, "step": 19730 }, { "epoch": 1.043312808858117, "grad_norm": 63.291255950927734, "learning_rate": 5e-06, "loss": 1.7507, "step": 19740 }, { "epoch": 1.0438413361169103, "grad_norm": 80.29730224609375, "learning_rate": 5e-06, "loss": 1.3758, "step": 19750 }, { "epoch": 1.0443698633757037, "grad_norm": 75.53379821777344, "learning_rate": 5e-06, "loss": 1.364, "step": 19760 }, { "epoch": 1.044898390634497, "grad_norm": 103.18360900878906, "learning_rate": 5e-06, "loss": 1.8746, "step": 19770 }, { "epoch": 1.0454269178932905, "grad_norm": 63.51171875, "learning_rate": 5e-06, "loss": 1.8179, "step": 19780 }, { "epoch": 1.0459554451520838, "grad_norm": 79.95075988769531, "learning_rate": 5e-06, "loss": 1.7646, "step": 19790 }, { "epoch": 1.046483972410877, "grad_norm": 90.43829345703125, "learning_rate": 5e-06, "loss": 1.6245, "step": 19800 }, { "epoch": 1.0470124996696704, "grad_norm": 52.376304626464844, "learning_rate": 5e-06, "loss": 1.4596, "step": 19810 }, { "epoch": 1.0475410269284637, "grad_norm": 67.99281311035156, "learning_rate": 5e-06, "loss": 1.6083, "step": 19820 }, { "epoch": 1.0480695541872571, "grad_norm": 53.0586051940918, "learning_rate": 5e-06, "loss": 1.6018, "step": 19830 }, { "epoch": 1.0485980814460505, "grad_norm": 52.978485107421875, "learning_rate": 5e-06, "loss": 1.6879, "step": 19840 }, { "epoch": 1.0491266087048439, "grad_norm": 81.49010467529297, "learning_rate": 5e-06, "loss": 1.6813, "step": 19850 }, { "epoch": 1.0496551359636372, "grad_norm": 46.0621337890625, "learning_rate": 5e-06, "loss": 1.7314, "step": 19860 }, { "epoch": 1.0501836632224306, "grad_norm": 43.59175491333008, "learning_rate": 5e-06, "loss": 1.88, "step": 19870 }, { "epoch": 1.050712190481224, "grad_norm": 65.60623931884766, "learning_rate": 5e-06, "loss": 1.6504, "step": 19880 }, { "epoch": 1.0512407177400174, "grad_norm": 62.72813034057617, "learning_rate": 5e-06, "loss": 1.8158, "step": 19890 }, { "epoch": 1.0517692449988107, "grad_norm": 73.03489685058594, "learning_rate": 5e-06, "loss": 1.6179, "step": 19900 }, { "epoch": 1.0522977722576041, "grad_norm": 55.530029296875, "learning_rate": 5e-06, "loss": 1.6955, "step": 19910 }, { "epoch": 1.0528262995163975, "grad_norm": 72.14270782470703, "learning_rate": 5e-06, "loss": 1.7172, "step": 19920 }, { "epoch": 1.0533548267751909, "grad_norm": 96.40103912353516, "learning_rate": 5e-06, "loss": 1.9189, "step": 19930 }, { "epoch": 1.0538833540339843, "grad_norm": 75.95075988769531, "learning_rate": 5e-06, "loss": 1.4892, "step": 19940 }, { "epoch": 1.0544118812927776, "grad_norm": 56.67641067504883, "learning_rate": 5e-06, "loss": 1.5106, "step": 19950 }, { "epoch": 1.054940408551571, "grad_norm": 82.33507537841797, "learning_rate": 5e-06, "loss": 2.1007, "step": 19960 }, { "epoch": 1.0554689358103644, "grad_norm": 55.12810516357422, "learning_rate": 5e-06, "loss": 1.5462, "step": 19970 }, { "epoch": 1.0559974630691578, "grad_norm": 92.07797241210938, "learning_rate": 5e-06, "loss": 1.7133, "step": 19980 }, { "epoch": 1.0565259903279511, "grad_norm": 79.42606353759766, "learning_rate": 5e-06, "loss": 1.8632, "step": 19990 }, { "epoch": 1.0570545175867445, "grad_norm": 53.19036102294922, "learning_rate": 5e-06, "loss": 1.5161, "step": 20000 }, { "epoch": 1.0570545175867445, "eval_loss": 1.5687263011932373, "eval_runtime": 43.5749, "eval_samples_per_second": 280.712, "eval_steps_per_second": 8.789, "eval_sts-dev_pearson_cosine": 0.8480089541224924, "eval_sts-dev_pearson_dot": 0.8256693866667564, "eval_sts-dev_pearson_euclidean": 0.8527400049014211, "eval_sts-dev_pearson_manhattan": 0.8520790558615827, "eval_sts-dev_pearson_max": 0.8527400049014211, "eval_sts-dev_spearman_cosine": 0.8513277805300408, "eval_sts-dev_spearman_dot": 0.8231807291150073, "eval_sts-dev_spearman_euclidean": 0.8544083155296077, "eval_sts-dev_spearman_manhattan": 0.8533705661204449, "eval_sts-dev_spearman_max": 0.8544083155296077, "step": 20000 }, { "epoch": 1.057583044845538, "grad_norm": 61.550804138183594, "learning_rate": 5e-06, "loss": 1.3555, "step": 20010 }, { "epoch": 1.0581115721043313, "grad_norm": 63.16141128540039, "learning_rate": 5e-06, "loss": 1.5628, "step": 20020 }, { "epoch": 1.0586400993631246, "grad_norm": 93.58099365234375, "learning_rate": 5e-06, "loss": 1.709, "step": 20030 }, { "epoch": 1.059168626621918, "grad_norm": 47.25423049926758, "learning_rate": 5e-06, "loss": 1.3653, "step": 20040 }, { "epoch": 1.0596971538807114, "grad_norm": 50.487667083740234, "learning_rate": 5e-06, "loss": 1.5944, "step": 20050 }, { "epoch": 1.0602256811395048, "grad_norm": 119.19654846191406, "learning_rate": 5e-06, "loss": 1.7035, "step": 20060 }, { "epoch": 1.0607542083982981, "grad_norm": 59.170501708984375, "learning_rate": 5e-06, "loss": 1.7059, "step": 20070 }, { "epoch": 1.0612827356570915, "grad_norm": 75.49610900878906, "learning_rate": 5e-06, "loss": 1.6339, "step": 20080 }, { "epoch": 1.061811262915885, "grad_norm": 58.37449645996094, "learning_rate": 5e-06, "loss": 1.8403, "step": 20090 }, { "epoch": 1.0623397901746783, "grad_norm": 64.86131286621094, "learning_rate": 5e-06, "loss": 1.6215, "step": 20100 }, { "epoch": 1.0628683174334717, "grad_norm": 71.8331298828125, "learning_rate": 5e-06, "loss": 1.6984, "step": 20110 }, { "epoch": 1.063396844692265, "grad_norm": 52.03859329223633, "learning_rate": 5e-06, "loss": 2.1066, "step": 20120 }, { "epoch": 1.0639253719510584, "grad_norm": 85.51924133300781, "learning_rate": 5e-06, "loss": 1.6201, "step": 20130 }, { "epoch": 1.0644538992098518, "grad_norm": 67.3583755493164, "learning_rate": 5e-06, "loss": 1.6358, "step": 20140 }, { "epoch": 1.0649824264686452, "grad_norm": 46.52227783203125, "learning_rate": 5e-06, "loss": 1.6993, "step": 20150 }, { "epoch": 1.0655109537274385, "grad_norm": 73.10303497314453, "learning_rate": 5e-06, "loss": 1.6758, "step": 20160 }, { "epoch": 1.066039480986232, "grad_norm": 78.10368347167969, "learning_rate": 5e-06, "loss": 1.8271, "step": 20170 }, { "epoch": 1.0665680082450253, "grad_norm": 86.16529846191406, "learning_rate": 5e-06, "loss": 1.5273, "step": 20180 }, { "epoch": 1.0670965355038187, "grad_norm": 59.41505432128906, "learning_rate": 5e-06, "loss": 1.6082, "step": 20190 }, { "epoch": 1.067625062762612, "grad_norm": 66.69190216064453, "learning_rate": 5e-06, "loss": 1.9762, "step": 20200 }, { "epoch": 1.0681535900214054, "grad_norm": 109.7550277709961, "learning_rate": 5e-06, "loss": 1.6515, "step": 20210 }, { "epoch": 1.0686821172801988, "grad_norm": 78.79650115966797, "learning_rate": 5e-06, "loss": 1.525, "step": 20220 }, { "epoch": 1.0692106445389922, "grad_norm": 68.97999572753906, "learning_rate": 5e-06, "loss": 1.9859, "step": 20230 }, { "epoch": 1.0697391717977855, "grad_norm": 55.76803970336914, "learning_rate": 5e-06, "loss": 1.4932, "step": 20240 }, { "epoch": 1.070267699056579, "grad_norm": 95.22157287597656, "learning_rate": 5e-06, "loss": 1.8433, "step": 20250 }, { "epoch": 1.0707962263153723, "grad_norm": 47.664024353027344, "learning_rate": 5e-06, "loss": 1.6397, "step": 20260 }, { "epoch": 1.0713247535741657, "grad_norm": 64.46760559082031, "learning_rate": 5e-06, "loss": 1.9088, "step": 20270 }, { "epoch": 1.071853280832959, "grad_norm": 88.94802856445312, "learning_rate": 5e-06, "loss": 1.4865, "step": 20280 }, { "epoch": 1.0723818080917524, "grad_norm": 94.11003875732422, "learning_rate": 5e-06, "loss": 1.9197, "step": 20290 }, { "epoch": 1.0729103353505458, "grad_norm": 77.8308334350586, "learning_rate": 5e-06, "loss": 1.5259, "step": 20300 }, { "epoch": 1.073438862609339, "grad_norm": 53.82429122924805, "learning_rate": 5e-06, "loss": 2.1206, "step": 20310 }, { "epoch": 1.0739673898681326, "grad_norm": 59.74922180175781, "learning_rate": 5e-06, "loss": 1.6994, "step": 20320 }, { "epoch": 1.0744959171269257, "grad_norm": 62.82179260253906, "learning_rate": 5e-06, "loss": 1.9788, "step": 20330 }, { "epoch": 1.075024444385719, "grad_norm": 66.15186309814453, "learning_rate": 5e-06, "loss": 1.7489, "step": 20340 }, { "epoch": 1.0755529716445125, "grad_norm": 79.7841567993164, "learning_rate": 5e-06, "loss": 1.6585, "step": 20350 }, { "epoch": 1.0760814989033058, "grad_norm": 59.70399475097656, "learning_rate": 5e-06, "loss": 1.517, "step": 20360 }, { "epoch": 1.0766100261620992, "grad_norm": 62.04471969604492, "learning_rate": 5e-06, "loss": 1.7564, "step": 20370 }, { "epoch": 1.0771385534208926, "grad_norm": 64.9795150756836, "learning_rate": 5e-06, "loss": 1.5394, "step": 20380 }, { "epoch": 1.077667080679686, "grad_norm": 66.83293151855469, "learning_rate": 5e-06, "loss": 1.6789, "step": 20390 }, { "epoch": 1.0781956079384794, "grad_norm": 89.89036560058594, "learning_rate": 5e-06, "loss": 1.522, "step": 20400 }, { "epoch": 1.0787241351972727, "grad_norm": 73.50037384033203, "learning_rate": 5e-06, "loss": 1.7498, "step": 20410 }, { "epoch": 1.079252662456066, "grad_norm": 57.84537124633789, "learning_rate": 5e-06, "loss": 1.5933, "step": 20420 }, { "epoch": 1.0797811897148595, "grad_norm": 49.22832107543945, "learning_rate": 5e-06, "loss": 1.628, "step": 20430 }, { "epoch": 1.0803097169736529, "grad_norm": 50.937076568603516, "learning_rate": 5e-06, "loss": 1.8561, "step": 20440 }, { "epoch": 1.0808382442324462, "grad_norm": 51.98323440551758, "learning_rate": 5e-06, "loss": 1.6372, "step": 20450 }, { "epoch": 1.0813667714912396, "grad_norm": 90.21749114990234, "learning_rate": 5e-06, "loss": 1.9213, "step": 20460 }, { "epoch": 1.081895298750033, "grad_norm": 67.33564758300781, "learning_rate": 5e-06, "loss": 1.7589, "step": 20470 }, { "epoch": 1.0824238260088264, "grad_norm": 69.19122314453125, "learning_rate": 5e-06, "loss": 1.8318, "step": 20480 }, { "epoch": 1.0829523532676197, "grad_norm": 68.80204010009766, "learning_rate": 5e-06, "loss": 1.569, "step": 20490 }, { "epoch": 1.0834808805264131, "grad_norm": 76.30938720703125, "learning_rate": 5e-06, "loss": 1.7554, "step": 20500 }, { "epoch": 1.0840094077852065, "grad_norm": 75.3794174194336, "learning_rate": 5e-06, "loss": 1.6868, "step": 20510 }, { "epoch": 1.0845379350439999, "grad_norm": 44.30617141723633, "learning_rate": 5e-06, "loss": 1.5334, "step": 20520 }, { "epoch": 1.0850664623027932, "grad_norm": 65.1667251586914, "learning_rate": 5e-06, "loss": 1.6283, "step": 20530 }, { "epoch": 1.0855949895615866, "grad_norm": 90.13369750976562, "learning_rate": 5e-06, "loss": 1.7586, "step": 20540 }, { "epoch": 1.08612351682038, "grad_norm": 61.139404296875, "learning_rate": 5e-06, "loss": 1.6192, "step": 20550 }, { "epoch": 1.0866520440791734, "grad_norm": 58.249813079833984, "learning_rate": 5e-06, "loss": 1.2788, "step": 20560 }, { "epoch": 1.0871805713379668, "grad_norm": 117.83811950683594, "learning_rate": 5e-06, "loss": 1.3663, "step": 20570 }, { "epoch": 1.0877090985967601, "grad_norm": 53.250587463378906, "learning_rate": 5e-06, "loss": 1.6627, "step": 20580 }, { "epoch": 1.0882376258555535, "grad_norm": 61.68207931518555, "learning_rate": 5e-06, "loss": 1.5994, "step": 20590 }, { "epoch": 1.0887661531143469, "grad_norm": 85.24976348876953, "learning_rate": 5e-06, "loss": 1.8066, "step": 20600 }, { "epoch": 1.0892946803731403, "grad_norm": 67.10281372070312, "learning_rate": 5e-06, "loss": 1.7111, "step": 20610 }, { "epoch": 1.0898232076319336, "grad_norm": 44.361236572265625, "learning_rate": 5e-06, "loss": 1.7881, "step": 20620 }, { "epoch": 1.090351734890727, "grad_norm": 50.220401763916016, "learning_rate": 5e-06, "loss": 1.9963, "step": 20630 }, { "epoch": 1.0908802621495204, "grad_norm": 45.83828353881836, "learning_rate": 5e-06, "loss": 1.6344, "step": 20640 }, { "epoch": 1.0914087894083138, "grad_norm": 74.087158203125, "learning_rate": 5e-06, "loss": 1.6309, "step": 20650 }, { "epoch": 1.0919373166671071, "grad_norm": 77.12816619873047, "learning_rate": 5e-06, "loss": 1.8473, "step": 20660 }, { "epoch": 1.0924658439259005, "grad_norm": 58.92168045043945, "learning_rate": 5e-06, "loss": 1.5337, "step": 20670 }, { "epoch": 1.092994371184694, "grad_norm": 36.93387222290039, "learning_rate": 5e-06, "loss": 1.7403, "step": 20680 }, { "epoch": 1.0935228984434873, "grad_norm": 82.83270263671875, "learning_rate": 5e-06, "loss": 1.7306, "step": 20690 }, { "epoch": 1.0940514257022806, "grad_norm": 86.31376647949219, "learning_rate": 5e-06, "loss": 1.4644, "step": 20700 }, { "epoch": 1.094579952961074, "grad_norm": 65.71885681152344, "learning_rate": 5e-06, "loss": 1.4313, "step": 20710 }, { "epoch": 1.0951084802198674, "grad_norm": 73.45608520507812, "learning_rate": 5e-06, "loss": 1.3893, "step": 20720 }, { "epoch": 1.0956370074786608, "grad_norm": 106.78173065185547, "learning_rate": 5e-06, "loss": 1.8368, "step": 20730 }, { "epoch": 1.0961655347374542, "grad_norm": 69.12389373779297, "learning_rate": 5e-06, "loss": 1.8007, "step": 20740 }, { "epoch": 1.0966940619962475, "grad_norm": 69.0080337524414, "learning_rate": 5e-06, "loss": 1.6002, "step": 20750 }, { "epoch": 1.097222589255041, "grad_norm": 55.62864685058594, "learning_rate": 5e-06, "loss": 1.5816, "step": 20760 }, { "epoch": 1.0977511165138343, "grad_norm": 51.71942138671875, "learning_rate": 5e-06, "loss": 1.5099, "step": 20770 }, { "epoch": 1.0982796437726277, "grad_norm": 68.67630767822266, "learning_rate": 5e-06, "loss": 1.4142, "step": 20780 }, { "epoch": 1.098808171031421, "grad_norm": 53.53938293457031, "learning_rate": 5e-06, "loss": 1.7426, "step": 20790 }, { "epoch": 1.0993366982902144, "grad_norm": 69.35404205322266, "learning_rate": 5e-06, "loss": 1.7569, "step": 20800 }, { "epoch": 1.0998652255490078, "grad_norm": 81.33806610107422, "learning_rate": 5e-06, "loss": 1.5055, "step": 20810 }, { "epoch": 1.1003937528078012, "grad_norm": 65.60527801513672, "learning_rate": 5e-06, "loss": 1.6373, "step": 20820 }, { "epoch": 1.1009222800665945, "grad_norm": 66.5328598022461, "learning_rate": 5e-06, "loss": 1.485, "step": 20830 }, { "epoch": 1.1014508073253877, "grad_norm": 53.0748291015625, "learning_rate": 5e-06, "loss": 1.5492, "step": 20840 }, { "epoch": 1.1019793345841813, "grad_norm": 43.80344009399414, "learning_rate": 5e-06, "loss": 1.6076, "step": 20850 }, { "epoch": 1.1025078618429744, "grad_norm": 44.482547760009766, "learning_rate": 5e-06, "loss": 1.6379, "step": 20860 }, { "epoch": 1.1030363891017678, "grad_norm": 53.69398498535156, "learning_rate": 5e-06, "loss": 1.5603, "step": 20870 }, { "epoch": 1.1035649163605612, "grad_norm": 83.14002990722656, "learning_rate": 5e-06, "loss": 1.8746, "step": 20880 }, { "epoch": 1.1040934436193546, "grad_norm": 89.85289001464844, "learning_rate": 5e-06, "loss": 1.779, "step": 20890 }, { "epoch": 1.104621970878148, "grad_norm": 66.47588348388672, "learning_rate": 5e-06, "loss": 2.0438, "step": 20900 }, { "epoch": 1.1051504981369413, "grad_norm": 73.47222900390625, "learning_rate": 5e-06, "loss": 1.8393, "step": 20910 }, { "epoch": 1.1056790253957347, "grad_norm": 41.566646575927734, "learning_rate": 5e-06, "loss": 1.7395, "step": 20920 }, { "epoch": 1.106207552654528, "grad_norm": 64.18836212158203, "learning_rate": 5e-06, "loss": 1.8659, "step": 20930 }, { "epoch": 1.1067360799133215, "grad_norm": 58.12794876098633, "learning_rate": 5e-06, "loss": 1.6783, "step": 20940 }, { "epoch": 1.1072646071721148, "grad_norm": 52.35326385498047, "learning_rate": 5e-06, "loss": 1.4718, "step": 20950 }, { "epoch": 1.1077931344309082, "grad_norm": 47.03421401977539, "learning_rate": 5e-06, "loss": 1.6194, "step": 20960 }, { "epoch": 1.1083216616897016, "grad_norm": 41.19540786743164, "learning_rate": 5e-06, "loss": 1.5235, "step": 20970 }, { "epoch": 1.108850188948495, "grad_norm": 66.82785034179688, "learning_rate": 5e-06, "loss": 1.6527, "step": 20980 }, { "epoch": 1.1093787162072883, "grad_norm": 47.09027862548828, "learning_rate": 5e-06, "loss": 1.6115, "step": 20990 }, { "epoch": 1.1099072434660817, "grad_norm": 76.03390502929688, "learning_rate": 5e-06, "loss": 1.6226, "step": 21000 }, { "epoch": 1.1099072434660817, "eval_loss": 1.5360661745071411, "eval_runtime": 35.095, "eval_samples_per_second": 348.539, "eval_steps_per_second": 10.913, "eval_sts-dev_pearson_cosine": 0.8498528696363916, "eval_sts-dev_pearson_dot": 0.8255460366091665, "eval_sts-dev_pearson_euclidean": 0.8536752547960116, "eval_sts-dev_pearson_manhattan": 0.8532790928412783, "eval_sts-dev_pearson_max": 0.8536752547960116, "eval_sts-dev_spearman_cosine": 0.8526523870367271, "eval_sts-dev_spearman_dot": 0.8214235098567841, "eval_sts-dev_spearman_euclidean": 0.8551335701152877, "eval_sts-dev_spearman_manhattan": 0.8544152816034618, "eval_sts-dev_spearman_max": 0.8551335701152877, "step": 21000 }, { "epoch": 1.110435770724875, "grad_norm": 94.06511688232422, "learning_rate": 5e-06, "loss": 2.275, "step": 21010 }, { "epoch": 1.1109642979836685, "grad_norm": 76.14960479736328, "learning_rate": 5e-06, "loss": 1.9243, "step": 21020 }, { "epoch": 1.1114928252424618, "grad_norm": 68.49175262451172, "learning_rate": 5e-06, "loss": 1.5736, "step": 21030 }, { "epoch": 1.1120213525012552, "grad_norm": 60.52069091796875, "learning_rate": 5e-06, "loss": 1.4052, "step": 21040 }, { "epoch": 1.1125498797600486, "grad_norm": 52.27741241455078, "learning_rate": 5e-06, "loss": 1.6145, "step": 21050 }, { "epoch": 1.113078407018842, "grad_norm": 86.65371704101562, "learning_rate": 5e-06, "loss": 1.5643, "step": 21060 }, { "epoch": 1.1136069342776354, "grad_norm": 55.34230422973633, "learning_rate": 5e-06, "loss": 1.4715, "step": 21070 }, { "epoch": 1.1141354615364287, "grad_norm": 53.79825973510742, "learning_rate": 5e-06, "loss": 1.6852, "step": 21080 }, { "epoch": 1.114663988795222, "grad_norm": 54.73614501953125, "learning_rate": 5e-06, "loss": 1.8567, "step": 21090 }, { "epoch": 1.1151925160540155, "grad_norm": 57.84883499145508, "learning_rate": 5e-06, "loss": 1.8092, "step": 21100 }, { "epoch": 1.1157210433128089, "grad_norm": 62.82527542114258, "learning_rate": 5e-06, "loss": 1.3626, "step": 21110 }, { "epoch": 1.1162495705716022, "grad_norm": 68.06983947753906, "learning_rate": 5e-06, "loss": 1.4882, "step": 21120 }, { "epoch": 1.1167780978303956, "grad_norm": 64.51686096191406, "learning_rate": 5e-06, "loss": 1.5195, "step": 21130 }, { "epoch": 1.117306625089189, "grad_norm": 63.372161865234375, "learning_rate": 5e-06, "loss": 1.4166, "step": 21140 }, { "epoch": 1.1178351523479824, "grad_norm": 57.0896110534668, "learning_rate": 5e-06, "loss": 1.8308, "step": 21150 }, { "epoch": 1.1183636796067757, "grad_norm": 41.49800109863281, "learning_rate": 5e-06, "loss": 1.4529, "step": 21160 }, { "epoch": 1.1188922068655691, "grad_norm": 82.11111450195312, "learning_rate": 5e-06, "loss": 1.2849, "step": 21170 }, { "epoch": 1.1194207341243625, "grad_norm": 86.98870086669922, "learning_rate": 5e-06, "loss": 1.3687, "step": 21180 }, { "epoch": 1.1199492613831559, "grad_norm": 80.7470703125, "learning_rate": 5e-06, "loss": 1.5232, "step": 21190 }, { "epoch": 1.1204777886419492, "grad_norm": 54.3660774230957, "learning_rate": 5e-06, "loss": 1.759, "step": 21200 }, { "epoch": 1.1210063159007426, "grad_norm": 51.42577362060547, "learning_rate": 5e-06, "loss": 1.7228, "step": 21210 }, { "epoch": 1.121534843159536, "grad_norm": 74.4181137084961, "learning_rate": 5e-06, "loss": 1.5599, "step": 21220 }, { "epoch": 1.1220633704183294, "grad_norm": 49.194969177246094, "learning_rate": 5e-06, "loss": 1.407, "step": 21230 }, { "epoch": 1.1225918976771228, "grad_norm": 77.23960876464844, "learning_rate": 5e-06, "loss": 1.4676, "step": 21240 }, { "epoch": 1.1231204249359161, "grad_norm": 53.30349349975586, "learning_rate": 5e-06, "loss": 1.7872, "step": 21250 }, { "epoch": 1.1236489521947095, "grad_norm": 42.16461944580078, "learning_rate": 5e-06, "loss": 1.4734, "step": 21260 }, { "epoch": 1.1241774794535029, "grad_norm": 56.48383331298828, "learning_rate": 5e-06, "loss": 1.5221, "step": 21270 }, { "epoch": 1.1247060067122963, "grad_norm": 67.64414978027344, "learning_rate": 5e-06, "loss": 1.4075, "step": 21280 }, { "epoch": 1.1252345339710896, "grad_norm": 39.436954498291016, "learning_rate": 5e-06, "loss": 1.5024, "step": 21290 }, { "epoch": 1.125763061229883, "grad_norm": 72.107421875, "learning_rate": 5e-06, "loss": 1.7838, "step": 21300 }, { "epoch": 1.1262915884886764, "grad_norm": 55.23761749267578, "learning_rate": 5e-06, "loss": 1.7072, "step": 21310 }, { "epoch": 1.1268201157474698, "grad_norm": 61.949588775634766, "learning_rate": 5e-06, "loss": 1.4958, "step": 21320 }, { "epoch": 1.1273486430062631, "grad_norm": 60.870121002197266, "learning_rate": 5e-06, "loss": 1.3397, "step": 21330 }, { "epoch": 1.1278771702650565, "grad_norm": 73.87908172607422, "learning_rate": 5e-06, "loss": 1.4759, "step": 21340 }, { "epoch": 1.1284056975238497, "grad_norm": 65.42110443115234, "learning_rate": 5e-06, "loss": 1.4404, "step": 21350 }, { "epoch": 1.1289342247826433, "grad_norm": 55.472808837890625, "learning_rate": 5e-06, "loss": 1.7607, "step": 21360 }, { "epoch": 1.1294627520414364, "grad_norm": 48.190128326416016, "learning_rate": 5e-06, "loss": 1.4275, "step": 21370 }, { "epoch": 1.12999127930023, "grad_norm": 69.57451629638672, "learning_rate": 5e-06, "loss": 1.8529, "step": 21380 }, { "epoch": 1.1305198065590232, "grad_norm": 58.39519119262695, "learning_rate": 5e-06, "loss": 1.7842, "step": 21390 }, { "epoch": 1.1310483338178168, "grad_norm": 59.446266174316406, "learning_rate": 5e-06, "loss": 1.5802, "step": 21400 }, { "epoch": 1.13157686107661, "grad_norm": 81.92648315429688, "learning_rate": 5e-06, "loss": 1.5022, "step": 21410 }, { "epoch": 1.1321053883354033, "grad_norm": 60.63704299926758, "learning_rate": 5e-06, "loss": 1.5145, "step": 21420 }, { "epoch": 1.1326339155941967, "grad_norm": 56.11823654174805, "learning_rate": 5e-06, "loss": 1.7288, "step": 21430 }, { "epoch": 1.13316244285299, "grad_norm": 64.43216705322266, "learning_rate": 5e-06, "loss": 1.3174, "step": 21440 }, { "epoch": 1.1336909701117834, "grad_norm": 65.62098693847656, "learning_rate": 5e-06, "loss": 1.5402, "step": 21450 }, { "epoch": 1.1342194973705768, "grad_norm": 51.24263000488281, "learning_rate": 5e-06, "loss": 1.6394, "step": 21460 }, { "epoch": 1.1347480246293702, "grad_norm": 66.3845443725586, "learning_rate": 5e-06, "loss": 1.6072, "step": 21470 }, { "epoch": 1.1352765518881636, "grad_norm": 66.9349594116211, "learning_rate": 5e-06, "loss": 1.606, "step": 21480 }, { "epoch": 1.135805079146957, "grad_norm": 82.14518737792969, "learning_rate": 5e-06, "loss": 1.5787, "step": 21490 }, { "epoch": 1.1363336064057503, "grad_norm": 52.55522918701172, "learning_rate": 5e-06, "loss": 1.4997, "step": 21500 }, { "epoch": 1.1368621336645437, "grad_norm": 46.850502014160156, "learning_rate": 5e-06, "loss": 1.5384, "step": 21510 }, { "epoch": 1.137390660923337, "grad_norm": 67.95945739746094, "learning_rate": 5e-06, "loss": 1.2971, "step": 21520 }, { "epoch": 1.1379191881821304, "grad_norm": 84.88119506835938, "learning_rate": 5e-06, "loss": 1.6044, "step": 21530 }, { "epoch": 1.1384477154409238, "grad_norm": 59.69775390625, "learning_rate": 5e-06, "loss": 1.4903, "step": 21540 }, { "epoch": 1.1389762426997172, "grad_norm": 68.40158081054688, "learning_rate": 5e-06, "loss": 1.3285, "step": 21550 }, { "epoch": 1.1395047699585106, "grad_norm": 63.35823059082031, "learning_rate": 5e-06, "loss": 1.2393, "step": 21560 }, { "epoch": 1.140033297217304, "grad_norm": 65.6205062866211, "learning_rate": 5e-06, "loss": 1.7798, "step": 21570 }, { "epoch": 1.1405618244760973, "grad_norm": 56.19124221801758, "learning_rate": 5e-06, "loss": 1.5892, "step": 21580 }, { "epoch": 1.1410903517348907, "grad_norm": 42.88508605957031, "learning_rate": 5e-06, "loss": 1.3808, "step": 21590 }, { "epoch": 1.141618878993684, "grad_norm": 69.1708755493164, "learning_rate": 5e-06, "loss": 1.7878, "step": 21600 }, { "epoch": 1.1421474062524775, "grad_norm": 62.3067741394043, "learning_rate": 5e-06, "loss": 1.2497, "step": 21610 }, { "epoch": 1.1426759335112708, "grad_norm": 74.388916015625, "learning_rate": 5e-06, "loss": 1.827, "step": 21620 }, { "epoch": 1.1432044607700642, "grad_norm": 30.883779525756836, "learning_rate": 5e-06, "loss": 1.0514, "step": 21630 }, { "epoch": 1.1437329880288576, "grad_norm": 70.72525024414062, "learning_rate": 5e-06, "loss": 1.5599, "step": 21640 }, { "epoch": 1.144261515287651, "grad_norm": 51.78416442871094, "learning_rate": 5e-06, "loss": 1.2353, "step": 21650 }, { "epoch": 1.1447900425464443, "grad_norm": 51.21686553955078, "learning_rate": 5e-06, "loss": 1.21, "step": 21660 }, { "epoch": 1.1453185698052377, "grad_norm": 66.83330535888672, "learning_rate": 5e-06, "loss": 1.4579, "step": 21670 }, { "epoch": 1.145847097064031, "grad_norm": 45.98843002319336, "learning_rate": 5e-06, "loss": 1.3144, "step": 21680 }, { "epoch": 1.1463756243228245, "grad_norm": 73.24613189697266, "learning_rate": 5e-06, "loss": 1.3531, "step": 21690 }, { "epoch": 1.1469041515816178, "grad_norm": 69.94184112548828, "learning_rate": 5e-06, "loss": 1.467, "step": 21700 }, { "epoch": 1.1474326788404112, "grad_norm": 53.1980094909668, "learning_rate": 5e-06, "loss": 1.2986, "step": 21710 }, { "epoch": 1.1479612060992046, "grad_norm": 118.69573974609375, "learning_rate": 5e-06, "loss": 1.6754, "step": 21720 }, { "epoch": 1.148489733357998, "grad_norm": 63.42473220825195, "learning_rate": 5e-06, "loss": 1.4085, "step": 21730 }, { "epoch": 1.1490182606167914, "grad_norm": 50.930755615234375, "learning_rate": 5e-06, "loss": 1.3764, "step": 21740 }, { "epoch": 1.1495467878755847, "grad_norm": 42.18370819091797, "learning_rate": 5e-06, "loss": 1.6012, "step": 21750 }, { "epoch": 1.150075315134378, "grad_norm": 69.86210632324219, "learning_rate": 5e-06, "loss": 1.4372, "step": 21760 }, { "epoch": 1.1506038423931715, "grad_norm": 74.73133850097656, "learning_rate": 5e-06, "loss": 1.8417, "step": 21770 }, { "epoch": 1.1511323696519649, "grad_norm": 60.144779205322266, "learning_rate": 5e-06, "loss": 1.7074, "step": 21780 }, { "epoch": 1.1516608969107582, "grad_norm": 128.23577880859375, "learning_rate": 5e-06, "loss": 1.2727, "step": 21790 }, { "epoch": 1.1521894241695516, "grad_norm": 55.58311462402344, "learning_rate": 5e-06, "loss": 1.4441, "step": 21800 }, { "epoch": 1.152717951428345, "grad_norm": 92.6034164428711, "learning_rate": 5e-06, "loss": 1.7808, "step": 21810 }, { "epoch": 1.1532464786871384, "grad_norm": 56.298343658447266, "learning_rate": 5e-06, "loss": 1.7845, "step": 21820 }, { "epoch": 1.1537750059459317, "grad_norm": 57.905330657958984, "learning_rate": 5e-06, "loss": 1.3054, "step": 21830 }, { "epoch": 1.1543035332047251, "grad_norm": 88.90727996826172, "learning_rate": 5e-06, "loss": 1.693, "step": 21840 }, { "epoch": 1.1548320604635185, "grad_norm": 75.53192901611328, "learning_rate": 5e-06, "loss": 1.4493, "step": 21850 }, { "epoch": 1.1553605877223119, "grad_norm": 71.71808624267578, "learning_rate": 5e-06, "loss": 1.6362, "step": 21860 }, { "epoch": 1.1558891149811052, "grad_norm": 92.89088439941406, "learning_rate": 5e-06, "loss": 1.7614, "step": 21870 }, { "epoch": 1.1564176422398984, "grad_norm": 47.9373893737793, "learning_rate": 5e-06, "loss": 1.4422, "step": 21880 }, { "epoch": 1.156946169498692, "grad_norm": 70.05331420898438, "learning_rate": 5e-06, "loss": 1.7722, "step": 21890 }, { "epoch": 1.1574746967574852, "grad_norm": 77.17134094238281, "learning_rate": 5e-06, "loss": 1.7183, "step": 21900 }, { "epoch": 1.1580032240162788, "grad_norm": 57.18001937866211, "learning_rate": 5e-06, "loss": 1.7415, "step": 21910 }, { "epoch": 1.158531751275072, "grad_norm": 50.55309295654297, "learning_rate": 5e-06, "loss": 1.3152, "step": 21920 }, { "epoch": 1.1590602785338655, "grad_norm": 79.53939819335938, "learning_rate": 5e-06, "loss": 1.524, "step": 21930 }, { "epoch": 1.1595888057926587, "grad_norm": 69.69464874267578, "learning_rate": 5e-06, "loss": 1.4696, "step": 21940 }, { "epoch": 1.160117333051452, "grad_norm": 64.01190185546875, "learning_rate": 5e-06, "loss": 1.6045, "step": 21950 }, { "epoch": 1.1606458603102454, "grad_norm": 46.077842712402344, "learning_rate": 5e-06, "loss": 1.878, "step": 21960 }, { "epoch": 1.1611743875690388, "grad_norm": 72.71512603759766, "learning_rate": 5e-06, "loss": 1.382, "step": 21970 }, { "epoch": 1.1617029148278322, "grad_norm": 53.14492416381836, "learning_rate": 5e-06, "loss": 1.5358, "step": 21980 }, { "epoch": 1.1622314420866255, "grad_norm": 53.65512466430664, "learning_rate": 5e-06, "loss": 1.6478, "step": 21990 }, { "epoch": 1.162759969345419, "grad_norm": 55.812564849853516, "learning_rate": 5e-06, "loss": 1.1564, "step": 22000 }, { "epoch": 1.162759969345419, "eval_loss": 1.5222498178482056, "eval_runtime": 35.6301, "eval_samples_per_second": 343.305, "eval_steps_per_second": 10.749, "eval_sts-dev_pearson_cosine": 0.8509132680942801, "eval_sts-dev_pearson_dot": 0.8268333115091842, "eval_sts-dev_pearson_euclidean": 0.8549220742708739, "eval_sts-dev_pearson_manhattan": 0.8546759580395711, "eval_sts-dev_pearson_max": 0.8549220742708739, "eval_sts-dev_spearman_cosine": 0.8535966387933239, "eval_sts-dev_spearman_dot": 0.8228150573911904, "eval_sts-dev_spearman_euclidean": 0.8566844654819736, "eval_sts-dev_spearman_manhattan": 0.8562209858095549, "eval_sts-dev_spearman_max": 0.8566844654819736, "step": 22000 }, { "epoch": 1.1632884966042123, "grad_norm": 45.435543060302734, "learning_rate": 5e-06, "loss": 1.3046, "step": 22010 }, { "epoch": 1.1638170238630057, "grad_norm": 81.640869140625, "learning_rate": 5e-06, "loss": 1.3894, "step": 22020 }, { "epoch": 1.164345551121799, "grad_norm": 50.61136245727539, "learning_rate": 5e-06, "loss": 1.7519, "step": 22030 }, { "epoch": 1.1648740783805924, "grad_norm": 68.0390853881836, "learning_rate": 5e-06, "loss": 1.7389, "step": 22040 }, { "epoch": 1.1654026056393858, "grad_norm": 56.92253494262695, "learning_rate": 5e-06, "loss": 1.4793, "step": 22050 }, { "epoch": 1.1659311328981792, "grad_norm": 86.96500396728516, "learning_rate": 5e-06, "loss": 1.5564, "step": 22060 }, { "epoch": 1.1664596601569726, "grad_norm": 58.99906539916992, "learning_rate": 5e-06, "loss": 1.7952, "step": 22070 }, { "epoch": 1.166988187415766, "grad_norm": 74.33082580566406, "learning_rate": 5e-06, "loss": 1.6641, "step": 22080 }, { "epoch": 1.1675167146745593, "grad_norm": 57.745567321777344, "learning_rate": 5e-06, "loss": 1.6533, "step": 22090 }, { "epoch": 1.1680452419333527, "grad_norm": 44.80954360961914, "learning_rate": 5e-06, "loss": 1.3946, "step": 22100 }, { "epoch": 1.168573769192146, "grad_norm": 63.69842529296875, "learning_rate": 5e-06, "loss": 1.8869, "step": 22110 }, { "epoch": 1.1691022964509394, "grad_norm": 50.066078186035156, "learning_rate": 5e-06, "loss": 1.4963, "step": 22120 }, { "epoch": 1.1696308237097328, "grad_norm": 55.55636215209961, "learning_rate": 5e-06, "loss": 1.3594, "step": 22130 }, { "epoch": 1.1701593509685262, "grad_norm": 72.43209838867188, "learning_rate": 5e-06, "loss": 1.4071, "step": 22140 }, { "epoch": 1.1706878782273196, "grad_norm": 87.29956817626953, "learning_rate": 5e-06, "loss": 1.6883, "step": 22150 }, { "epoch": 1.171216405486113, "grad_norm": 89.79297637939453, "learning_rate": 5e-06, "loss": 1.4981, "step": 22160 }, { "epoch": 1.1717449327449063, "grad_norm": 112.26488494873047, "learning_rate": 5e-06, "loss": 1.9694, "step": 22170 }, { "epoch": 1.1722734600036997, "grad_norm": 70.07933807373047, "learning_rate": 5e-06, "loss": 1.8654, "step": 22180 }, { "epoch": 1.172801987262493, "grad_norm": 66.26919555664062, "learning_rate": 5e-06, "loss": 1.6728, "step": 22190 }, { "epoch": 1.1733305145212864, "grad_norm": 32.76993179321289, "learning_rate": 5e-06, "loss": 1.474, "step": 22200 }, { "epoch": 1.1738590417800798, "grad_norm": 51.89322280883789, "learning_rate": 5e-06, "loss": 1.6415, "step": 22210 }, { "epoch": 1.1743875690388732, "grad_norm": 86.6252670288086, "learning_rate": 5e-06, "loss": 1.8854, "step": 22220 }, { "epoch": 1.1749160962976666, "grad_norm": 56.12294387817383, "learning_rate": 5e-06, "loss": 1.5991, "step": 22230 }, { "epoch": 1.17544462355646, "grad_norm": 72.75732421875, "learning_rate": 5e-06, "loss": 1.4544, "step": 22240 }, { "epoch": 1.1759731508152533, "grad_norm": 83.40947723388672, "learning_rate": 5e-06, "loss": 1.6255, "step": 22250 }, { "epoch": 1.1765016780740467, "grad_norm": 78.4654312133789, "learning_rate": 5e-06, "loss": 1.2173, "step": 22260 }, { "epoch": 1.17703020533284, "grad_norm": 67.68689727783203, "learning_rate": 5e-06, "loss": 1.2648, "step": 22270 }, { "epoch": 1.1775587325916335, "grad_norm": 76.70039367675781, "learning_rate": 5e-06, "loss": 1.6016, "step": 22280 }, { "epoch": 1.1780872598504268, "grad_norm": 83.28260803222656, "learning_rate": 5e-06, "loss": 1.6287, "step": 22290 }, { "epoch": 1.1786157871092202, "grad_norm": 59.364593505859375, "learning_rate": 5e-06, "loss": 1.6206, "step": 22300 }, { "epoch": 1.1791443143680136, "grad_norm": 78.19251251220703, "learning_rate": 5e-06, "loss": 1.5625, "step": 22310 }, { "epoch": 1.179672841626807, "grad_norm": 68.34616088867188, "learning_rate": 5e-06, "loss": 1.6526, "step": 22320 }, { "epoch": 1.1802013688856003, "grad_norm": 93.49855041503906, "learning_rate": 5e-06, "loss": 1.543, "step": 22330 }, { "epoch": 1.1807298961443937, "grad_norm": 47.65638732910156, "learning_rate": 5e-06, "loss": 1.4574, "step": 22340 }, { "epoch": 1.181258423403187, "grad_norm": 47.642086029052734, "learning_rate": 5e-06, "loss": 1.8072, "step": 22350 }, { "epoch": 1.1817869506619805, "grad_norm": 66.62432098388672, "learning_rate": 5e-06, "loss": 1.648, "step": 22360 }, { "epoch": 1.1823154779207738, "grad_norm": 69.02597045898438, "learning_rate": 5e-06, "loss": 1.1758, "step": 22370 }, { "epoch": 1.1828440051795672, "grad_norm": 67.67928314208984, "learning_rate": 5e-06, "loss": 1.5644, "step": 22380 }, { "epoch": 1.1833725324383606, "grad_norm": 83.78124237060547, "learning_rate": 5e-06, "loss": 1.4037, "step": 22390 }, { "epoch": 1.183901059697154, "grad_norm": 60.452083587646484, "learning_rate": 5e-06, "loss": 1.6132, "step": 22400 }, { "epoch": 1.1844295869559471, "grad_norm": 65.6485595703125, "learning_rate": 5e-06, "loss": 1.5764, "step": 22410 }, { "epoch": 1.1849581142147407, "grad_norm": 84.18342590332031, "learning_rate": 5e-06, "loss": 1.9385, "step": 22420 }, { "epoch": 1.1854866414735339, "grad_norm": 51.445823669433594, "learning_rate": 5e-06, "loss": 1.3529, "step": 22430 }, { "epoch": 1.1860151687323275, "grad_norm": 60.69414138793945, "learning_rate": 5e-06, "loss": 1.6216, "step": 22440 }, { "epoch": 1.1865436959911206, "grad_norm": 62.84150314331055, "learning_rate": 5e-06, "loss": 1.4364, "step": 22450 }, { "epoch": 1.1870722232499142, "grad_norm": 46.89738082885742, "learning_rate": 5e-06, "loss": 1.5889, "step": 22460 }, { "epoch": 1.1876007505087074, "grad_norm": 60.836524963378906, "learning_rate": 5e-06, "loss": 1.4876, "step": 22470 }, { "epoch": 1.1881292777675008, "grad_norm": 57.73883056640625, "learning_rate": 5e-06, "loss": 1.5899, "step": 22480 }, { "epoch": 1.1886578050262941, "grad_norm": 52.75685501098633, "learning_rate": 5e-06, "loss": 1.6541, "step": 22490 }, { "epoch": 1.1891863322850875, "grad_norm": 67.66931915283203, "learning_rate": 5e-06, "loss": 1.8179, "step": 22500 }, { "epoch": 1.189714859543881, "grad_norm": 87.0078125, "learning_rate": 5e-06, "loss": 1.533, "step": 22510 }, { "epoch": 1.1902433868026743, "grad_norm": 76.17481231689453, "learning_rate": 5e-06, "loss": 1.5734, "step": 22520 }, { "epoch": 1.1907719140614677, "grad_norm": 64.62889099121094, "learning_rate": 5e-06, "loss": 1.6702, "step": 22530 }, { "epoch": 1.191300441320261, "grad_norm": 57.880653381347656, "learning_rate": 5e-06, "loss": 1.3706, "step": 22540 }, { "epoch": 1.1918289685790544, "grad_norm": 57.78266143798828, "learning_rate": 5e-06, "loss": 1.6624, "step": 22550 }, { "epoch": 1.1923574958378478, "grad_norm": 55.3774299621582, "learning_rate": 5e-06, "loss": 1.5412, "step": 22560 }, { "epoch": 1.1928860230966412, "grad_norm": 78.17341613769531, "learning_rate": 5e-06, "loss": 2.1131, "step": 22570 }, { "epoch": 1.1934145503554345, "grad_norm": 67.53116607666016, "learning_rate": 5e-06, "loss": 1.6169, "step": 22580 }, { "epoch": 1.193943077614228, "grad_norm": 97.38333129882812, "learning_rate": 5e-06, "loss": 1.3916, "step": 22590 }, { "epoch": 1.1944716048730213, "grad_norm": 62.4686393737793, "learning_rate": 5e-06, "loss": 1.4504, "step": 22600 }, { "epoch": 1.1950001321318147, "grad_norm": 45.44637680053711, "learning_rate": 5e-06, "loss": 1.5856, "step": 22610 }, { "epoch": 1.195528659390608, "grad_norm": 46.38928985595703, "learning_rate": 5e-06, "loss": 1.5399, "step": 22620 }, { "epoch": 1.1960571866494014, "grad_norm": 60.739341735839844, "learning_rate": 5e-06, "loss": 1.3562, "step": 22630 }, { "epoch": 1.1965857139081948, "grad_norm": 32.802276611328125, "learning_rate": 5e-06, "loss": 1.7046, "step": 22640 }, { "epoch": 1.1971142411669882, "grad_norm": 48.92223358154297, "learning_rate": 5e-06, "loss": 1.48, "step": 22650 }, { "epoch": 1.1976427684257815, "grad_norm": 83.73273468017578, "learning_rate": 5e-06, "loss": 1.2325, "step": 22660 }, { "epoch": 1.198171295684575, "grad_norm": 72.83482360839844, "learning_rate": 5e-06, "loss": 1.648, "step": 22670 }, { "epoch": 1.1986998229433683, "grad_norm": 46.470821380615234, "learning_rate": 5e-06, "loss": 1.1755, "step": 22680 }, { "epoch": 1.1992283502021617, "grad_norm": 43.598201751708984, "learning_rate": 5e-06, "loss": 1.4568, "step": 22690 }, { "epoch": 1.199756877460955, "grad_norm": 50.79483413696289, "learning_rate": 5e-06, "loss": 1.4289, "step": 22700 }, { "epoch": 1.2002854047197484, "grad_norm": 48.5257453918457, "learning_rate": 5e-06, "loss": 1.2826, "step": 22710 }, { "epoch": 1.2008139319785418, "grad_norm": 61.4022331237793, "learning_rate": 5e-06, "loss": 1.2282, "step": 22720 }, { "epoch": 1.2013424592373352, "grad_norm": 53.977317810058594, "learning_rate": 5e-06, "loss": 1.5748, "step": 22730 }, { "epoch": 1.2018709864961286, "grad_norm": 73.68207550048828, "learning_rate": 5e-06, "loss": 1.8343, "step": 22740 }, { "epoch": 1.202399513754922, "grad_norm": 50.798194885253906, "learning_rate": 5e-06, "loss": 1.2641, "step": 22750 }, { "epoch": 1.2029280410137153, "grad_norm": 49.6366081237793, "learning_rate": 5e-06, "loss": 1.3894, "step": 22760 }, { "epoch": 1.2034565682725087, "grad_norm": 61.10736846923828, "learning_rate": 5e-06, "loss": 1.7441, "step": 22770 }, { "epoch": 1.203985095531302, "grad_norm": 70.46121978759766, "learning_rate": 5e-06, "loss": 1.7159, "step": 22780 }, { "epoch": 1.2045136227900954, "grad_norm": 71.16852569580078, "learning_rate": 5e-06, "loss": 1.4067, "step": 22790 }, { "epoch": 1.2050421500488888, "grad_norm": 48.64091491699219, "learning_rate": 5e-06, "loss": 1.5815, "step": 22800 }, { "epoch": 1.2055706773076822, "grad_norm": 72.15678405761719, "learning_rate": 5e-06, "loss": 1.4729, "step": 22810 }, { "epoch": 1.2060992045664756, "grad_norm": 71.39237213134766, "learning_rate": 5e-06, "loss": 1.6041, "step": 22820 }, { "epoch": 1.206627731825269, "grad_norm": 58.31349563598633, "learning_rate": 5e-06, "loss": 1.3576, "step": 22830 }, { "epoch": 1.2071562590840623, "grad_norm": 84.56454467773438, "learning_rate": 5e-06, "loss": 1.4949, "step": 22840 }, { "epoch": 1.2076847863428557, "grad_norm": 82.9985122680664, "learning_rate": 5e-06, "loss": 1.8763, "step": 22850 }, { "epoch": 1.208213313601649, "grad_norm": 43.62775802612305, "learning_rate": 5e-06, "loss": 1.4443, "step": 22860 }, { "epoch": 1.2087418408604425, "grad_norm": 57.49103927612305, "learning_rate": 5e-06, "loss": 1.7527, "step": 22870 }, { "epoch": 1.2092703681192358, "grad_norm": 57.195579528808594, "learning_rate": 5e-06, "loss": 1.4757, "step": 22880 }, { "epoch": 1.2097988953780292, "grad_norm": 62.610774993896484, "learning_rate": 5e-06, "loss": 1.3766, "step": 22890 }, { "epoch": 1.2103274226368226, "grad_norm": 58.87638854980469, "learning_rate": 5e-06, "loss": 1.7621, "step": 22900 }, { "epoch": 1.210855949895616, "grad_norm": 66.76309204101562, "learning_rate": 5e-06, "loss": 1.4316, "step": 22910 }, { "epoch": 1.2113844771544093, "grad_norm": 71.41557312011719, "learning_rate": 5e-06, "loss": 1.4228, "step": 22920 }, { "epoch": 1.2119130044132027, "grad_norm": 63.16840362548828, "learning_rate": 5e-06, "loss": 1.7315, "step": 22930 }, { "epoch": 1.2124415316719959, "grad_norm": 58.57756805419922, "learning_rate": 5e-06, "loss": 1.6549, "step": 22940 }, { "epoch": 1.2129700589307895, "grad_norm": 55.429901123046875, "learning_rate": 5e-06, "loss": 1.3834, "step": 22950 }, { "epoch": 1.2134985861895826, "grad_norm": 61.803226470947266, "learning_rate": 5e-06, "loss": 1.9132, "step": 22960 }, { "epoch": 1.2140271134483762, "grad_norm": 49.9038200378418, "learning_rate": 5e-06, "loss": 1.3324, "step": 22970 }, { "epoch": 1.2145556407071694, "grad_norm": 66.08222198486328, "learning_rate": 5e-06, "loss": 1.4243, "step": 22980 }, { "epoch": 1.215084167965963, "grad_norm": 43.13277816772461, "learning_rate": 5e-06, "loss": 1.7628, "step": 22990 }, { "epoch": 1.2156126952247561, "grad_norm": 65.73213958740234, "learning_rate": 5e-06, "loss": 1.4594, "step": 23000 }, { "epoch": 1.2156126952247561, "eval_loss": 1.5045441389083862, "eval_runtime": 37.7531, "eval_samples_per_second": 324.0, "eval_steps_per_second": 10.145, "eval_sts-dev_pearson_cosine": 0.8427595860929125, "eval_sts-dev_pearson_dot": 0.8218036948936729, "eval_sts-dev_pearson_euclidean": 0.847606772384459, "eval_sts-dev_pearson_manhattan": 0.8475466330728196, "eval_sts-dev_pearson_max": 0.847606772384459, "eval_sts-dev_spearman_cosine": 0.8469592159348995, "eval_sts-dev_spearman_dot": 0.8207647025434359, "eval_sts-dev_spearman_euclidean": 0.8494667499944444, "eval_sts-dev_spearman_manhattan": 0.8490422091624402, "eval_sts-dev_spearman_max": 0.8494667499944444, "step": 23000 }, { "epoch": 1.2161412224835495, "grad_norm": 54.13270950317383, "learning_rate": 5e-06, "loss": 2.0221, "step": 23010 }, { "epoch": 1.2166697497423429, "grad_norm": 90.87173461914062, "learning_rate": 5e-06, "loss": 1.5786, "step": 23020 }, { "epoch": 1.2171982770011363, "grad_norm": 68.83836364746094, "learning_rate": 5e-06, "loss": 1.4147, "step": 23030 }, { "epoch": 1.2177268042599296, "grad_norm": 54.77288818359375, "learning_rate": 5e-06, "loss": 1.5746, "step": 23040 }, { "epoch": 1.218255331518723, "grad_norm": 81.71990203857422, "learning_rate": 5e-06, "loss": 1.6128, "step": 23050 }, { "epoch": 1.2187838587775164, "grad_norm": 53.83390426635742, "learning_rate": 5e-06, "loss": 1.1849, "step": 23060 }, { "epoch": 1.2193123860363098, "grad_norm": 35.060691833496094, "learning_rate": 5e-06, "loss": 1.548, "step": 23070 }, { "epoch": 1.2198409132951031, "grad_norm": 66.82648468017578, "learning_rate": 5e-06, "loss": 1.8365, "step": 23080 }, { "epoch": 1.2203694405538965, "grad_norm": 81.28726196289062, "learning_rate": 5e-06, "loss": 1.6589, "step": 23090 }, { "epoch": 1.2208979678126899, "grad_norm": 46.77308654785156, "learning_rate": 5e-06, "loss": 1.3788, "step": 23100 }, { "epoch": 1.2214264950714833, "grad_norm": 99.41949462890625, "learning_rate": 5e-06, "loss": 1.6532, "step": 23110 }, { "epoch": 1.2219550223302766, "grad_norm": 52.1894416809082, "learning_rate": 5e-06, "loss": 1.6037, "step": 23120 }, { "epoch": 1.22248354958907, "grad_norm": 48.1386833190918, "learning_rate": 5e-06, "loss": 1.3462, "step": 23130 }, { "epoch": 1.2230120768478634, "grad_norm": 43.09556579589844, "learning_rate": 5e-06, "loss": 1.4658, "step": 23140 }, { "epoch": 1.2235406041066568, "grad_norm": 63.94451904296875, "learning_rate": 5e-06, "loss": 1.7648, "step": 23150 }, { "epoch": 1.2240691313654501, "grad_norm": 56.423091888427734, "learning_rate": 5e-06, "loss": 1.6902, "step": 23160 }, { "epoch": 1.2245976586242435, "grad_norm": 88.77214813232422, "learning_rate": 5e-06, "loss": 1.54, "step": 23170 }, { "epoch": 1.225126185883037, "grad_norm": 94.4981689453125, "learning_rate": 5e-06, "loss": 1.5158, "step": 23180 }, { "epoch": 1.2256547131418303, "grad_norm": 49.624412536621094, "learning_rate": 5e-06, "loss": 1.7565, "step": 23190 }, { "epoch": 1.2261832404006237, "grad_norm": 87.0193862915039, "learning_rate": 5e-06, "loss": 1.3128, "step": 23200 }, { "epoch": 1.226711767659417, "grad_norm": 57.52167510986328, "learning_rate": 5e-06, "loss": 1.3797, "step": 23210 }, { "epoch": 1.2272402949182104, "grad_norm": 51.576751708984375, "learning_rate": 5e-06, "loss": 1.4834, "step": 23220 }, { "epoch": 1.2277688221770038, "grad_norm": 82.79314422607422, "learning_rate": 5e-06, "loss": 1.5852, "step": 23230 }, { "epoch": 1.2282973494357972, "grad_norm": 109.26705932617188, "learning_rate": 5e-06, "loss": 1.6496, "step": 23240 }, { "epoch": 1.2288258766945905, "grad_norm": 86.03628540039062, "learning_rate": 5e-06, "loss": 1.66, "step": 23250 }, { "epoch": 1.229354403953384, "grad_norm": 65.98100280761719, "learning_rate": 5e-06, "loss": 1.3799, "step": 23260 }, { "epoch": 1.2298829312121773, "grad_norm": 42.543174743652344, "learning_rate": 5e-06, "loss": 1.69, "step": 23270 }, { "epoch": 1.2304114584709707, "grad_norm": 80.81344604492188, "learning_rate": 5e-06, "loss": 1.7967, "step": 23280 }, { "epoch": 1.230939985729764, "grad_norm": 69.99148559570312, "learning_rate": 5e-06, "loss": 1.5631, "step": 23290 }, { "epoch": 1.2314685129885574, "grad_norm": 66.17514038085938, "learning_rate": 5e-06, "loss": 1.0637, "step": 23300 }, { "epoch": 1.2319970402473508, "grad_norm": 94.65254974365234, "learning_rate": 5e-06, "loss": 1.574, "step": 23310 }, { "epoch": 1.2325255675061442, "grad_norm": 75.22698211669922, "learning_rate": 5e-06, "loss": 1.2539, "step": 23320 }, { "epoch": 1.2330540947649375, "grad_norm": 44.77048873901367, "learning_rate": 5e-06, "loss": 1.3339, "step": 23330 }, { "epoch": 1.233582622023731, "grad_norm": 47.622440338134766, "learning_rate": 5e-06, "loss": 1.3181, "step": 23340 }, { "epoch": 1.2341111492825243, "grad_norm": 80.90107727050781, "learning_rate": 5e-06, "loss": 1.1472, "step": 23350 }, { "epoch": 1.2346396765413177, "grad_norm": 54.77308654785156, "learning_rate": 5e-06, "loss": 1.3426, "step": 23360 }, { "epoch": 1.235168203800111, "grad_norm": 91.4499740600586, "learning_rate": 5e-06, "loss": 1.3508, "step": 23370 }, { "epoch": 1.2356967310589044, "grad_norm": 58.157989501953125, "learning_rate": 5e-06, "loss": 1.6891, "step": 23380 }, { "epoch": 1.2362252583176978, "grad_norm": 70.58709716796875, "learning_rate": 5e-06, "loss": 1.3816, "step": 23390 }, { "epoch": 1.2367537855764912, "grad_norm": 68.98792266845703, "learning_rate": 5e-06, "loss": 1.5664, "step": 23400 }, { "epoch": 1.2372823128352846, "grad_norm": 68.83324432373047, "learning_rate": 5e-06, "loss": 1.4303, "step": 23410 }, { "epoch": 1.237810840094078, "grad_norm": 91.1779556274414, "learning_rate": 5e-06, "loss": 1.5342, "step": 23420 }, { "epoch": 1.2383393673528713, "grad_norm": 45.76639175415039, "learning_rate": 5e-06, "loss": 1.4571, "step": 23430 }, { "epoch": 1.2388678946116647, "grad_norm": 84.74471282958984, "learning_rate": 5e-06, "loss": 1.7536, "step": 23440 }, { "epoch": 1.239396421870458, "grad_norm": 79.35636138916016, "learning_rate": 5e-06, "loss": 1.1547, "step": 23450 }, { "epoch": 1.2399249491292514, "grad_norm": 56.0201301574707, "learning_rate": 5e-06, "loss": 1.7291, "step": 23460 }, { "epoch": 1.2404534763880446, "grad_norm": 56.090789794921875, "learning_rate": 5e-06, "loss": 1.6937, "step": 23470 }, { "epoch": 1.2409820036468382, "grad_norm": 44.399723052978516, "learning_rate": 5e-06, "loss": 1.3225, "step": 23480 }, { "epoch": 1.2415105309056313, "grad_norm": 64.66970825195312, "learning_rate": 5e-06, "loss": 1.4829, "step": 23490 }, { "epoch": 1.242039058164425, "grad_norm": 59.83877182006836, "learning_rate": 5e-06, "loss": 1.6112, "step": 23500 }, { "epoch": 1.242567585423218, "grad_norm": 57.39141082763672, "learning_rate": 5e-06, "loss": 1.3965, "step": 23510 }, { "epoch": 1.2430961126820115, "grad_norm": 50.26253890991211, "learning_rate": 5e-06, "loss": 1.4363, "step": 23520 }, { "epoch": 1.2436246399408049, "grad_norm": 84.82707977294922, "learning_rate": 5e-06, "loss": 1.3577, "step": 23530 }, { "epoch": 1.2441531671995982, "grad_norm": 77.51036071777344, "learning_rate": 5e-06, "loss": 1.4372, "step": 23540 }, { "epoch": 1.2446816944583916, "grad_norm": 64.34200286865234, "learning_rate": 5e-06, "loss": 1.6489, "step": 23550 }, { "epoch": 1.245210221717185, "grad_norm": 111.94435119628906, "learning_rate": 5e-06, "loss": 1.8099, "step": 23560 }, { "epoch": 1.2457387489759784, "grad_norm": 69.25333404541016, "learning_rate": 5e-06, "loss": 1.6027, "step": 23570 }, { "epoch": 1.2462672762347717, "grad_norm": 45.85417175292969, "learning_rate": 5e-06, "loss": 1.3124, "step": 23580 }, { "epoch": 1.2467958034935651, "grad_norm": 72.10799407958984, "learning_rate": 5e-06, "loss": 1.397, "step": 23590 }, { "epoch": 1.2473243307523585, "grad_norm": 48.26641845703125, "learning_rate": 5e-06, "loss": 1.5818, "step": 23600 }, { "epoch": 1.2478528580111519, "grad_norm": 55.39786911010742, "learning_rate": 5e-06, "loss": 1.5991, "step": 23610 }, { "epoch": 1.2483813852699452, "grad_norm": 56.691619873046875, "learning_rate": 5e-06, "loss": 1.5081, "step": 23620 }, { "epoch": 1.2489099125287386, "grad_norm": 49.10329055786133, "learning_rate": 5e-06, "loss": 1.5829, "step": 23630 }, { "epoch": 1.249438439787532, "grad_norm": 46.05482482910156, "learning_rate": 5e-06, "loss": 1.3827, "step": 23640 }, { "epoch": 1.2499669670463254, "grad_norm": 82.81712341308594, "learning_rate": 5e-06, "loss": 1.2787, "step": 23650 }, { "epoch": 1.2504954943051187, "grad_norm": 70.972412109375, "learning_rate": 5e-06, "loss": 1.3651, "step": 23660 }, { "epoch": 1.2510240215639121, "grad_norm": 49.77275085449219, "learning_rate": 5e-06, "loss": 1.4384, "step": 23670 }, { "epoch": 1.2515525488227055, "grad_norm": 52.70986557006836, "learning_rate": 5e-06, "loss": 1.6335, "step": 23680 }, { "epoch": 1.2520810760814989, "grad_norm": 71.83987426757812, "learning_rate": 5e-06, "loss": 1.4691, "step": 23690 }, { "epoch": 1.2526096033402923, "grad_norm": 48.44734191894531, "learning_rate": 5e-06, "loss": 1.3903, "step": 23700 }, { "epoch": 1.2531381305990856, "grad_norm": 64.8322982788086, "learning_rate": 5e-06, "loss": 1.6536, "step": 23710 }, { "epoch": 1.253666657857879, "grad_norm": 81.36043548583984, "learning_rate": 5e-06, "loss": 1.9183, "step": 23720 }, { "epoch": 1.2541951851166724, "grad_norm": 61.59522247314453, "learning_rate": 5e-06, "loss": 1.482, "step": 23730 }, { "epoch": 1.2547237123754658, "grad_norm": 68.02114868164062, "learning_rate": 5e-06, "loss": 1.5305, "step": 23740 }, { "epoch": 1.2552522396342591, "grad_norm": 46.31548309326172, "learning_rate": 5e-06, "loss": 1.5465, "step": 23750 }, { "epoch": 1.2557807668930525, "grad_norm": 55.55765914916992, "learning_rate": 5e-06, "loss": 1.2089, "step": 23760 }, { "epoch": 1.256309294151846, "grad_norm": 80.1974868774414, "learning_rate": 5e-06, "loss": 1.4456, "step": 23770 }, { "epoch": 1.2568378214106393, "grad_norm": 44.33974838256836, "learning_rate": 5e-06, "loss": 1.3315, "step": 23780 }, { "epoch": 1.2573663486694326, "grad_norm": 114.98141479492188, "learning_rate": 5e-06, "loss": 1.6925, "step": 23790 }, { "epoch": 1.257894875928226, "grad_norm": 48.792293548583984, "learning_rate": 5e-06, "loss": 1.7401, "step": 23800 }, { "epoch": 1.2584234031870194, "grad_norm": 75.30497741699219, "learning_rate": 5e-06, "loss": 1.4995, "step": 23810 }, { "epoch": 1.2589519304458128, "grad_norm": 91.4151840209961, "learning_rate": 5e-06, "loss": 2.0269, "step": 23820 }, { "epoch": 1.2594804577046061, "grad_norm": 61.48943328857422, "learning_rate": 5e-06, "loss": 1.4116, "step": 23830 }, { "epoch": 1.2600089849633995, "grad_norm": 77.19662475585938, "learning_rate": 5e-06, "loss": 1.4924, "step": 23840 }, { "epoch": 1.260537512222193, "grad_norm": 67.03270721435547, "learning_rate": 5e-06, "loss": 1.6459, "step": 23850 }, { "epoch": 1.2610660394809863, "grad_norm": 55.951351165771484, "learning_rate": 5e-06, "loss": 1.669, "step": 23860 }, { "epoch": 1.2615945667397797, "grad_norm": 78.4507064819336, "learning_rate": 5e-06, "loss": 1.6199, "step": 23870 }, { "epoch": 1.262123093998573, "grad_norm": 75.57986450195312, "learning_rate": 5e-06, "loss": 1.6155, "step": 23880 }, { "epoch": 1.2626516212573664, "grad_norm": 76.7987060546875, "learning_rate": 5e-06, "loss": 1.4747, "step": 23890 }, { "epoch": 1.2631801485161598, "grad_norm": 57.26061248779297, "learning_rate": 5e-06, "loss": 1.5306, "step": 23900 }, { "epoch": 1.2637086757749532, "grad_norm": 75.8906021118164, "learning_rate": 5e-06, "loss": 1.4404, "step": 23910 }, { "epoch": 1.2642372030337465, "grad_norm": 59.73893356323242, "learning_rate": 5e-06, "loss": 1.3568, "step": 23920 }, { "epoch": 1.26476573029254, "grad_norm": 59.92659378051758, "learning_rate": 5e-06, "loss": 1.4682, "step": 23930 }, { "epoch": 1.2652942575513333, "grad_norm": 84.04439544677734, "learning_rate": 5e-06, "loss": 1.3834, "step": 23940 }, { "epoch": 1.2658227848101267, "grad_norm": 44.4686279296875, "learning_rate": 5e-06, "loss": 1.1565, "step": 23950 }, { "epoch": 1.2663513120689198, "grad_norm": 43.95203399658203, "learning_rate": 5e-06, "loss": 1.6335, "step": 23960 }, { "epoch": 1.2668798393277134, "grad_norm": 65.3744888305664, "learning_rate": 5e-06, "loss": 1.6412, "step": 23970 }, { "epoch": 1.2674083665865066, "grad_norm": 83.34220123291016, "learning_rate": 5e-06, "loss": 1.1935, "step": 23980 }, { "epoch": 1.2679368938453002, "grad_norm": 55.29001235961914, "learning_rate": 5e-06, "loss": 1.6408, "step": 23990 }, { "epoch": 1.2684654211040933, "grad_norm": 51.52555465698242, "learning_rate": 5e-06, "loss": 1.8679, "step": 24000 }, { "epoch": 1.2684654211040933, "eval_loss": 1.4838783740997314, "eval_runtime": 40.7318, "eval_samples_per_second": 300.306, "eval_steps_per_second": 9.403, "eval_sts-dev_pearson_cosine": 0.84420911535689, "eval_sts-dev_pearson_dot": 0.8219374326313151, "eval_sts-dev_pearson_euclidean": 0.8453872411038212, "eval_sts-dev_pearson_manhattan": 0.8452065645452951, "eval_sts-dev_pearson_max": 0.8453872411038212, "eval_sts-dev_spearman_cosine": 0.8455070288548043, "eval_sts-dev_spearman_dot": 0.8186335287839567, "eval_sts-dev_spearman_euclidean": 0.8472682072648682, "eval_sts-dev_spearman_manhattan": 0.8468707531614159, "eval_sts-dev_spearman_max": 0.8472682072648682, "step": 24000 }, { "epoch": 1.268993948362887, "grad_norm": 51.32797622680664, "learning_rate": 5e-06, "loss": 1.2345, "step": 24010 }, { "epoch": 1.26952247562168, "grad_norm": 69.45034790039062, "learning_rate": 5e-06, "loss": 1.5423, "step": 24020 }, { "epoch": 1.2700510028804737, "grad_norm": 65.94514465332031, "learning_rate": 5e-06, "loss": 1.4329, "step": 24030 }, { "epoch": 1.2705795301392668, "grad_norm": 40.59288024902344, "learning_rate": 5e-06, "loss": 1.4964, "step": 24040 }, { "epoch": 1.2711080573980604, "grad_norm": 61.404903411865234, "learning_rate": 5e-06, "loss": 1.2816, "step": 24050 }, { "epoch": 1.2716365846568536, "grad_norm": 55.11246871948242, "learning_rate": 5e-06, "loss": 1.754, "step": 24060 }, { "epoch": 1.2721651119156472, "grad_norm": 91.05042266845703, "learning_rate": 5e-06, "loss": 1.4131, "step": 24070 }, { "epoch": 1.2726936391744403, "grad_norm": 54.2865104675293, "learning_rate": 5e-06, "loss": 1.4338, "step": 24080 }, { "epoch": 1.2732221664332337, "grad_norm": 67.10845947265625, "learning_rate": 5e-06, "loss": 1.5281, "step": 24090 }, { "epoch": 1.273750693692027, "grad_norm": 58.678157806396484, "learning_rate": 5e-06, "loss": 1.5777, "step": 24100 }, { "epoch": 1.2742792209508205, "grad_norm": 88.20071411132812, "learning_rate": 5e-06, "loss": 1.5843, "step": 24110 }, { "epoch": 1.2748077482096138, "grad_norm": 50.56340408325195, "learning_rate": 5e-06, "loss": 1.4727, "step": 24120 }, { "epoch": 1.2753362754684072, "grad_norm": 79.74798583984375, "learning_rate": 5e-06, "loss": 1.831, "step": 24130 }, { "epoch": 1.2758648027272006, "grad_norm": 65.79623413085938, "learning_rate": 5e-06, "loss": 1.4905, "step": 24140 }, { "epoch": 1.276393329985994, "grad_norm": 47.02473449707031, "learning_rate": 5e-06, "loss": 1.1664, "step": 24150 }, { "epoch": 1.2769218572447874, "grad_norm": 40.60451126098633, "learning_rate": 5e-06, "loss": 1.1604, "step": 24160 }, { "epoch": 1.2774503845035807, "grad_norm": 131.04690551757812, "learning_rate": 5e-06, "loss": 1.4499, "step": 24170 }, { "epoch": 1.277978911762374, "grad_norm": 57.667667388916016, "learning_rate": 5e-06, "loss": 1.6462, "step": 24180 }, { "epoch": 1.2785074390211675, "grad_norm": 67.74263000488281, "learning_rate": 5e-06, "loss": 1.2424, "step": 24190 }, { "epoch": 1.2790359662799609, "grad_norm": 81.2696762084961, "learning_rate": 5e-06, "loss": 1.4765, "step": 24200 }, { "epoch": 1.2795644935387542, "grad_norm": 75.9720458984375, "learning_rate": 5e-06, "loss": 1.4381, "step": 24210 }, { "epoch": 1.2800930207975476, "grad_norm": 69.16962432861328, "learning_rate": 5e-06, "loss": 1.1718, "step": 24220 }, { "epoch": 1.280621548056341, "grad_norm": 66.76806640625, "learning_rate": 5e-06, "loss": 1.2451, "step": 24230 }, { "epoch": 1.2811500753151344, "grad_norm": 66.08653259277344, "learning_rate": 5e-06, "loss": 1.2855, "step": 24240 }, { "epoch": 1.2816786025739277, "grad_norm": 68.12284851074219, "learning_rate": 5e-06, "loss": 1.7129, "step": 24250 }, { "epoch": 1.2822071298327211, "grad_norm": 41.21940612792969, "learning_rate": 5e-06, "loss": 1.427, "step": 24260 }, { "epoch": 1.2827356570915145, "grad_norm": 57.304412841796875, "learning_rate": 5e-06, "loss": 1.3829, "step": 24270 }, { "epoch": 1.2832641843503079, "grad_norm": 51.532108306884766, "learning_rate": 5e-06, "loss": 1.484, "step": 24280 }, { "epoch": 1.2837927116091012, "grad_norm": 46.56349182128906, "learning_rate": 5e-06, "loss": 1.4946, "step": 24290 }, { "epoch": 1.2843212388678946, "grad_norm": 65.61511993408203, "learning_rate": 5e-06, "loss": 1.3242, "step": 24300 }, { "epoch": 1.284849766126688, "grad_norm": 46.743629455566406, "learning_rate": 5e-06, "loss": 1.2939, "step": 24310 }, { "epoch": 1.2853782933854814, "grad_norm": 76.52010345458984, "learning_rate": 5e-06, "loss": 1.4601, "step": 24320 }, { "epoch": 1.2859068206442748, "grad_norm": 49.890628814697266, "learning_rate": 5e-06, "loss": 1.3317, "step": 24330 }, { "epoch": 1.2864353479030681, "grad_norm": 70.73870086669922, "learning_rate": 5e-06, "loss": 1.4661, "step": 24340 }, { "epoch": 1.2869638751618615, "grad_norm": 100.16010284423828, "learning_rate": 5e-06, "loss": 1.4598, "step": 24350 }, { "epoch": 1.2874924024206549, "grad_norm": 51.192298889160156, "learning_rate": 5e-06, "loss": 1.334, "step": 24360 }, { "epoch": 1.2880209296794483, "grad_norm": 78.57093048095703, "learning_rate": 5e-06, "loss": 1.9361, "step": 24370 }, { "epoch": 1.2885494569382416, "grad_norm": 42.41552734375, "learning_rate": 5e-06, "loss": 1.3312, "step": 24380 }, { "epoch": 1.289077984197035, "grad_norm": 48.376583099365234, "learning_rate": 5e-06, "loss": 1.2395, "step": 24390 }, { "epoch": 1.2896065114558284, "grad_norm": 49.0567512512207, "learning_rate": 5e-06, "loss": 1.3484, "step": 24400 }, { "epoch": 1.2901350387146218, "grad_norm": 38.125892639160156, "learning_rate": 5e-06, "loss": 1.4992, "step": 24410 }, { "epoch": 1.2906635659734151, "grad_norm": 70.17932891845703, "learning_rate": 5e-06, "loss": 1.7151, "step": 24420 }, { "epoch": 1.2911920932322085, "grad_norm": 71.09930419921875, "learning_rate": 5e-06, "loss": 1.5678, "step": 24430 }, { "epoch": 1.291720620491002, "grad_norm": 42.619407653808594, "learning_rate": 5e-06, "loss": 1.4111, "step": 24440 }, { "epoch": 1.2922491477497953, "grad_norm": 54.41280746459961, "learning_rate": 5e-06, "loss": 1.7984, "step": 24450 }, { "epoch": 1.2927776750085886, "grad_norm": 69.99049377441406, "learning_rate": 5e-06, "loss": 1.534, "step": 24460 }, { "epoch": 1.293306202267382, "grad_norm": 50.6290168762207, "learning_rate": 5e-06, "loss": 1.3395, "step": 24470 }, { "epoch": 1.2938347295261754, "grad_norm": 56.694068908691406, "learning_rate": 5e-06, "loss": 1.7618, "step": 24480 }, { "epoch": 1.2943632567849686, "grad_norm": 59.233604431152344, "learning_rate": 5e-06, "loss": 1.7568, "step": 24490 }, { "epoch": 1.2948917840437622, "grad_norm": 66.48570251464844, "learning_rate": 5e-06, "loss": 1.3838, "step": 24500 }, { "epoch": 1.2954203113025553, "grad_norm": 60.72438430786133, "learning_rate": 5e-06, "loss": 1.5302, "step": 24510 }, { "epoch": 1.295948838561349, "grad_norm": 92.30548858642578, "learning_rate": 5e-06, "loss": 1.1729, "step": 24520 }, { "epoch": 1.296477365820142, "grad_norm": 40.84986877441406, "learning_rate": 5e-06, "loss": 1.2757, "step": 24530 }, { "epoch": 1.2970058930789357, "grad_norm": 67.60579681396484, "learning_rate": 5e-06, "loss": 1.6456, "step": 24540 }, { "epoch": 1.2975344203377288, "grad_norm": 57.70671844482422, "learning_rate": 5e-06, "loss": 1.3255, "step": 24550 }, { "epoch": 1.2980629475965224, "grad_norm": 97.07897186279297, "learning_rate": 5e-06, "loss": 1.6877, "step": 24560 }, { "epoch": 1.2985914748553156, "grad_norm": 67.52340698242188, "learning_rate": 5e-06, "loss": 1.2505, "step": 24570 }, { "epoch": 1.2991200021141092, "grad_norm": 66.02325439453125, "learning_rate": 5e-06, "loss": 1.9114, "step": 24580 }, { "epoch": 1.2996485293729023, "grad_norm": 76.71765899658203, "learning_rate": 5e-06, "loss": 1.5337, "step": 24590 }, { "epoch": 1.300177056631696, "grad_norm": 39.19451904296875, "learning_rate": 5e-06, "loss": 1.3042, "step": 24600 }, { "epoch": 1.300705583890489, "grad_norm": 55.134830474853516, "learning_rate": 5e-06, "loss": 1.3593, "step": 24610 }, { "epoch": 1.3012341111492824, "grad_norm": 51.307010650634766, "learning_rate": 5e-06, "loss": 1.6391, "step": 24620 }, { "epoch": 1.3017626384080758, "grad_norm": 47.21104049682617, "learning_rate": 5e-06, "loss": 1.7355, "step": 24630 }, { "epoch": 1.3022911656668692, "grad_norm": 59.905792236328125, "learning_rate": 5e-06, "loss": 1.4151, "step": 24640 }, { "epoch": 1.3028196929256626, "grad_norm": 58.648677825927734, "learning_rate": 5e-06, "loss": 1.5191, "step": 24650 }, { "epoch": 1.303348220184456, "grad_norm": 79.67315673828125, "learning_rate": 5e-06, "loss": 1.3479, "step": 24660 }, { "epoch": 1.3038767474432493, "grad_norm": 50.91302490234375, "learning_rate": 5e-06, "loss": 1.0733, "step": 24670 }, { "epoch": 1.3044052747020427, "grad_norm": 58.91108322143555, "learning_rate": 5e-06, "loss": 1.3179, "step": 24680 }, { "epoch": 1.304933801960836, "grad_norm": 65.13612365722656, "learning_rate": 5e-06, "loss": 1.2871, "step": 24690 }, { "epoch": 1.3054623292196295, "grad_norm": 66.4959945678711, "learning_rate": 5e-06, "loss": 1.6102, "step": 24700 }, { "epoch": 1.3059908564784228, "grad_norm": 56.64645004272461, "learning_rate": 5e-06, "loss": 1.3411, "step": 24710 }, { "epoch": 1.3065193837372162, "grad_norm": 76.11676788330078, "learning_rate": 5e-06, "loss": 1.9813, "step": 24720 }, { "epoch": 1.3070479109960096, "grad_norm": 68.68077850341797, "learning_rate": 5e-06, "loss": 1.4611, "step": 24730 }, { "epoch": 1.307576438254803, "grad_norm": 41.17323303222656, "learning_rate": 5e-06, "loss": 1.2729, "step": 24740 }, { "epoch": 1.3081049655135963, "grad_norm": 53.5901985168457, "learning_rate": 5e-06, "loss": 1.3011, "step": 24750 }, { "epoch": 1.3086334927723897, "grad_norm": 60.68828201293945, "learning_rate": 5e-06, "loss": 1.3226, "step": 24760 }, { "epoch": 1.309162020031183, "grad_norm": 64.06889343261719, "learning_rate": 5e-06, "loss": 1.231, "step": 24770 }, { "epoch": 1.3096905472899765, "grad_norm": 74.11122131347656, "learning_rate": 5e-06, "loss": 1.6548, "step": 24780 }, { "epoch": 1.3102190745487698, "grad_norm": 41.492462158203125, "learning_rate": 5e-06, "loss": 1.3681, "step": 24790 }, { "epoch": 1.3107476018075632, "grad_norm": 77.52572631835938, "learning_rate": 5e-06, "loss": 1.5209, "step": 24800 }, { "epoch": 1.3112761290663566, "grad_norm": 49.02819061279297, "learning_rate": 5e-06, "loss": 1.3897, "step": 24810 }, { "epoch": 1.31180465632515, "grad_norm": 47.476558685302734, "learning_rate": 5e-06, "loss": 1.2927, "step": 24820 }, { "epoch": 1.3123331835839434, "grad_norm": 45.144447326660156, "learning_rate": 5e-06, "loss": 1.438, "step": 24830 }, { "epoch": 1.3128617108427367, "grad_norm": 55.618141174316406, "learning_rate": 5e-06, "loss": 1.4782, "step": 24840 }, { "epoch": 1.31339023810153, "grad_norm": 72.65625762939453, "learning_rate": 5e-06, "loss": 1.2344, "step": 24850 }, { "epoch": 1.3139187653603235, "grad_norm": 64.18146514892578, "learning_rate": 5e-06, "loss": 1.2544, "step": 24860 }, { "epoch": 1.3144472926191169, "grad_norm": 91.0538558959961, "learning_rate": 5e-06, "loss": 1.6832, "step": 24870 }, { "epoch": 1.3149758198779102, "grad_norm": 56.63141632080078, "learning_rate": 5e-06, "loss": 1.4857, "step": 24880 }, { "epoch": 1.3155043471367036, "grad_norm": 75.94349670410156, "learning_rate": 5e-06, "loss": 1.8513, "step": 24890 }, { "epoch": 1.316032874395497, "grad_norm": 59.09703826904297, "learning_rate": 5e-06, "loss": 1.8892, "step": 24900 }, { "epoch": 1.3165614016542904, "grad_norm": 46.61307907104492, "learning_rate": 5e-06, "loss": 1.4113, "step": 24910 }, { "epoch": 1.3170899289130837, "grad_norm": 76.98562622070312, "learning_rate": 5e-06, "loss": 1.5046, "step": 24920 }, { "epoch": 1.3176184561718771, "grad_norm": 55.50628662109375, "learning_rate": 5e-06, "loss": 1.4845, "step": 24930 }, { "epoch": 1.3181469834306705, "grad_norm": 72.85601043701172, "learning_rate": 5e-06, "loss": 1.5415, "step": 24940 }, { "epoch": 1.3186755106894639, "grad_norm": 58.68897247314453, "learning_rate": 5e-06, "loss": 1.2223, "step": 24950 }, { "epoch": 1.3192040379482572, "grad_norm": 56.7653923034668, "learning_rate": 5e-06, "loss": 1.4067, "step": 24960 }, { "epoch": 1.3197325652070506, "grad_norm": 58.75193786621094, "learning_rate": 5e-06, "loss": 1.5571, "step": 24970 }, { "epoch": 1.320261092465844, "grad_norm": 58.4222297668457, "learning_rate": 5e-06, "loss": 1.6593, "step": 24980 }, { "epoch": 1.3207896197246374, "grad_norm": 28.297916412353516, "learning_rate": 5e-06, "loss": 1.3667, "step": 24990 }, { "epoch": 1.3213181469834308, "grad_norm": 48.949440002441406, "learning_rate": 5e-06, "loss": 1.5286, "step": 25000 }, { "epoch": 1.3213181469834308, "eval_loss": 1.4606750011444092, "eval_runtime": 43.6553, "eval_samples_per_second": 280.195, "eval_steps_per_second": 8.773, "eval_sts-dev_pearson_cosine": 0.8477219702021799, "eval_sts-dev_pearson_dot": 0.8231717959561617, "eval_sts-dev_pearson_euclidean": 0.8498216063754267, "eval_sts-dev_pearson_manhattan": 0.8495382375952092, "eval_sts-dev_pearson_max": 0.8498216063754267, "eval_sts-dev_spearman_cosine": 0.8494834667923653, "eval_sts-dev_spearman_dot": 0.8192522549276806, "eval_sts-dev_spearman_euclidean": 0.8515284569698439, "eval_sts-dev_spearman_manhattan": 0.8510590450138414, "eval_sts-dev_spearman_max": 0.8515284569698439, "step": 25000 }, { "epoch": 1.3218466742422241, "grad_norm": 69.14653778076172, "learning_rate": 5e-06, "loss": 1.1452, "step": 25010 }, { "epoch": 1.3223752015010173, "grad_norm": 66.19212341308594, "learning_rate": 5e-06, "loss": 1.1184, "step": 25020 }, { "epoch": 1.3229037287598109, "grad_norm": 72.83991241455078, "learning_rate": 5e-06, "loss": 1.4739, "step": 25030 }, { "epoch": 1.323432256018604, "grad_norm": 63.667720794677734, "learning_rate": 5e-06, "loss": 1.5799, "step": 25040 }, { "epoch": 1.3239607832773976, "grad_norm": 44.643798828125, "learning_rate": 5e-06, "loss": 1.7396, "step": 25050 }, { "epoch": 1.3244893105361908, "grad_norm": 53.441593170166016, "learning_rate": 5e-06, "loss": 1.569, "step": 25060 }, { "epoch": 1.3250178377949844, "grad_norm": 64.82489013671875, "learning_rate": 5e-06, "loss": 1.4496, "step": 25070 }, { "epoch": 1.3255463650537775, "grad_norm": 91.54017639160156, "learning_rate": 5e-06, "loss": 1.1513, "step": 25080 }, { "epoch": 1.3260748923125711, "grad_norm": 96.18951416015625, "learning_rate": 5e-06, "loss": 1.8867, "step": 25090 }, { "epoch": 1.3266034195713643, "grad_norm": 67.40808868408203, "learning_rate": 5e-06, "loss": 1.7041, "step": 25100 }, { "epoch": 1.327131946830158, "grad_norm": 63.15839767456055, "learning_rate": 5e-06, "loss": 1.1778, "step": 25110 }, { "epoch": 1.327660474088951, "grad_norm": 83.40582275390625, "learning_rate": 5e-06, "loss": 1.5666, "step": 25120 }, { "epoch": 1.3281890013477446, "grad_norm": 72.3972396850586, "learning_rate": 5e-06, "loss": 1.5312, "step": 25130 }, { "epoch": 1.3287175286065378, "grad_norm": 68.5000228881836, "learning_rate": 5e-06, "loss": 1.562, "step": 25140 }, { "epoch": 1.3292460558653312, "grad_norm": 112.0470962524414, "learning_rate": 5e-06, "loss": 1.2771, "step": 25150 }, { "epoch": 1.3297745831241246, "grad_norm": 58.26575469970703, "learning_rate": 5e-06, "loss": 1.409, "step": 25160 }, { "epoch": 1.330303110382918, "grad_norm": 89.56118774414062, "learning_rate": 5e-06, "loss": 1.2321, "step": 25170 }, { "epoch": 1.3308316376417113, "grad_norm": 66.79156494140625, "learning_rate": 5e-06, "loss": 1.3509, "step": 25180 }, { "epoch": 1.3313601649005047, "grad_norm": 77.19434356689453, "learning_rate": 5e-06, "loss": 1.3957, "step": 25190 }, { "epoch": 1.331888692159298, "grad_norm": 69.89852905273438, "learning_rate": 5e-06, "loss": 1.8125, "step": 25200 }, { "epoch": 1.3324172194180914, "grad_norm": 58.18629455566406, "learning_rate": 5e-06, "loss": 1.2641, "step": 25210 }, { "epoch": 1.3329457466768848, "grad_norm": 67.71128845214844, "learning_rate": 5e-06, "loss": 1.4939, "step": 25220 }, { "epoch": 1.3334742739356782, "grad_norm": 75.219970703125, "learning_rate": 5e-06, "loss": 1.4762, "step": 25230 }, { "epoch": 1.3340028011944716, "grad_norm": 70.41415405273438, "learning_rate": 5e-06, "loss": 1.6714, "step": 25240 }, { "epoch": 1.334531328453265, "grad_norm": 68.10598754882812, "learning_rate": 5e-06, "loss": 1.5896, "step": 25250 }, { "epoch": 1.3350598557120583, "grad_norm": 49.198516845703125, "learning_rate": 5e-06, "loss": 1.3392, "step": 25260 }, { "epoch": 1.3355883829708517, "grad_norm": 78.09276580810547, "learning_rate": 5e-06, "loss": 1.6169, "step": 25270 }, { "epoch": 1.336116910229645, "grad_norm": 60.19392013549805, "learning_rate": 5e-06, "loss": 1.2268, "step": 25280 }, { "epoch": 1.3366454374884384, "grad_norm": 45.8262825012207, "learning_rate": 5e-06, "loss": 1.3449, "step": 25290 }, { "epoch": 1.3371739647472318, "grad_norm": 66.22686767578125, "learning_rate": 5e-06, "loss": 1.6658, "step": 25300 }, { "epoch": 1.3377024920060252, "grad_norm": 73.95735931396484, "learning_rate": 5e-06, "loss": 1.1999, "step": 25310 }, { "epoch": 1.3382310192648186, "grad_norm": 62.1511116027832, "learning_rate": 5e-06, "loss": 1.5433, "step": 25320 }, { "epoch": 1.338759546523612, "grad_norm": 77.07608795166016, "learning_rate": 5e-06, "loss": 1.7803, "step": 25330 }, { "epoch": 1.3392880737824053, "grad_norm": 33.66920852661133, "learning_rate": 5e-06, "loss": 1.3803, "step": 25340 }, { "epoch": 1.3398166010411987, "grad_norm": 77.32083129882812, "learning_rate": 5e-06, "loss": 1.4552, "step": 25350 }, { "epoch": 1.340345128299992, "grad_norm": 42.81805419921875, "learning_rate": 5e-06, "loss": 1.3971, "step": 25360 }, { "epoch": 1.3408736555587855, "grad_norm": 69.6436538696289, "learning_rate": 5e-06, "loss": 1.5523, "step": 25370 }, { "epoch": 1.3414021828175788, "grad_norm": 47.458160400390625, "learning_rate": 5e-06, "loss": 1.5628, "step": 25380 }, { "epoch": 1.3419307100763722, "grad_norm": 62.330413818359375, "learning_rate": 5e-06, "loss": 1.3276, "step": 25390 }, { "epoch": 1.3424592373351656, "grad_norm": 62.44997787475586, "learning_rate": 5e-06, "loss": 1.5946, "step": 25400 }, { "epoch": 1.342987764593959, "grad_norm": 42.908870697021484, "learning_rate": 5e-06, "loss": 1.2601, "step": 25410 }, { "epoch": 1.3435162918527523, "grad_norm": 48.972232818603516, "learning_rate": 5e-06, "loss": 1.4832, "step": 25420 }, { "epoch": 1.3440448191115457, "grad_norm": 103.5062484741211, "learning_rate": 5e-06, "loss": 1.4746, "step": 25430 }, { "epoch": 1.344573346370339, "grad_norm": 61.38337326049805, "learning_rate": 5e-06, "loss": 1.2503, "step": 25440 }, { "epoch": 1.3451018736291325, "grad_norm": 52.28123092651367, "learning_rate": 5e-06, "loss": 1.3221, "step": 25450 }, { "epoch": 1.3456304008879258, "grad_norm": 43.01793670654297, "learning_rate": 5e-06, "loss": 1.2428, "step": 25460 }, { "epoch": 1.3461589281467192, "grad_norm": 71.15867614746094, "learning_rate": 5e-06, "loss": 1.3487, "step": 25470 }, { "epoch": 1.3466874554055126, "grad_norm": 76.68399810791016, "learning_rate": 5e-06, "loss": 1.3073, "step": 25480 }, { "epoch": 1.347215982664306, "grad_norm": 72.68294525146484, "learning_rate": 5e-06, "loss": 1.6948, "step": 25490 }, { "epoch": 1.3477445099230994, "grad_norm": 44.39487075805664, "learning_rate": 5e-06, "loss": 1.2797, "step": 25500 }, { "epoch": 1.3482730371818927, "grad_norm": 89.91090393066406, "learning_rate": 5e-06, "loss": 1.5442, "step": 25510 }, { "epoch": 1.348801564440686, "grad_norm": 65.53841400146484, "learning_rate": 5e-06, "loss": 1.6778, "step": 25520 }, { "epoch": 1.3493300916994795, "grad_norm": 79.81978607177734, "learning_rate": 5e-06, "loss": 1.2776, "step": 25530 }, { "epoch": 1.3498586189582729, "grad_norm": 67.57823181152344, "learning_rate": 5e-06, "loss": 1.2785, "step": 25540 }, { "epoch": 1.350387146217066, "grad_norm": 68.23186492919922, "learning_rate": 5e-06, "loss": 1.4395, "step": 25550 }, { "epoch": 1.3509156734758596, "grad_norm": 99.14285278320312, "learning_rate": 5e-06, "loss": 1.3791, "step": 25560 }, { "epoch": 1.3514442007346528, "grad_norm": 74.54215240478516, "learning_rate": 5e-06, "loss": 1.3621, "step": 25570 }, { "epoch": 1.3519727279934464, "grad_norm": 91.33808898925781, "learning_rate": 5e-06, "loss": 1.7119, "step": 25580 }, { "epoch": 1.3525012552522395, "grad_norm": 50.29890823364258, "learning_rate": 5e-06, "loss": 1.2933, "step": 25590 }, { "epoch": 1.3530297825110331, "grad_norm": 58.37818145751953, "learning_rate": 5e-06, "loss": 1.5599, "step": 25600 }, { "epoch": 1.3535583097698263, "grad_norm": 30.571218490600586, "learning_rate": 5e-06, "loss": 1.3483, "step": 25610 }, { "epoch": 1.3540868370286199, "grad_norm": 72.60337829589844, "learning_rate": 5e-06, "loss": 1.4263, "step": 25620 }, { "epoch": 1.354615364287413, "grad_norm": 88.77098846435547, "learning_rate": 5e-06, "loss": 1.6627, "step": 25630 }, { "epoch": 1.3551438915462066, "grad_norm": 50.37411117553711, "learning_rate": 5e-06, "loss": 1.2777, "step": 25640 }, { "epoch": 1.3556724188049998, "grad_norm": 62.765281677246094, "learning_rate": 5e-06, "loss": 1.2762, "step": 25650 }, { "epoch": 1.3562009460637934, "grad_norm": 89.9432601928711, "learning_rate": 5e-06, "loss": 1.6065, "step": 25660 }, { "epoch": 1.3567294733225865, "grad_norm": 63.35468673706055, "learning_rate": 5e-06, "loss": 1.4379, "step": 25670 }, { "epoch": 1.35725800058138, "grad_norm": 59.51318359375, "learning_rate": 5e-06, "loss": 1.4076, "step": 25680 }, { "epoch": 1.3577865278401733, "grad_norm": 65.0709228515625, "learning_rate": 5e-06, "loss": 1.4773, "step": 25690 }, { "epoch": 1.3583150550989667, "grad_norm": 65.53413391113281, "learning_rate": 5e-06, "loss": 1.443, "step": 25700 }, { "epoch": 1.35884358235776, "grad_norm": 55.34664535522461, "learning_rate": 5e-06, "loss": 1.329, "step": 25710 }, { "epoch": 1.3593721096165534, "grad_norm": 45.915306091308594, "learning_rate": 5e-06, "loss": 1.1682, "step": 25720 }, { "epoch": 1.3599006368753468, "grad_norm": 49.234676361083984, "learning_rate": 5e-06, "loss": 1.5466, "step": 25730 }, { "epoch": 1.3604291641341402, "grad_norm": 96.20394897460938, "learning_rate": 5e-06, "loss": 1.3015, "step": 25740 }, { "epoch": 1.3609576913929335, "grad_norm": 93.49691772460938, "learning_rate": 5e-06, "loss": 1.3706, "step": 25750 }, { "epoch": 1.361486218651727, "grad_norm": 60.12972640991211, "learning_rate": 5e-06, "loss": 1.4179, "step": 25760 }, { "epoch": 1.3620147459105203, "grad_norm": 68.01089477539062, "learning_rate": 5e-06, "loss": 1.4155, "step": 25770 }, { "epoch": 1.3625432731693137, "grad_norm": 43.34279251098633, "learning_rate": 5e-06, "loss": 1.3235, "step": 25780 }, { "epoch": 1.363071800428107, "grad_norm": 68.63853454589844, "learning_rate": 5e-06, "loss": 1.2118, "step": 25790 }, { "epoch": 1.3636003276869004, "grad_norm": 59.83198547363281, "learning_rate": 5e-06, "loss": 1.4118, "step": 25800 }, { "epoch": 1.3641288549456938, "grad_norm": 68.70552825927734, "learning_rate": 5e-06, "loss": 1.3452, "step": 25810 }, { "epoch": 1.3646573822044872, "grad_norm": 63.74447250366211, "learning_rate": 5e-06, "loss": 1.3974, "step": 25820 }, { "epoch": 1.3651859094632806, "grad_norm": 70.64337158203125, "learning_rate": 5e-06, "loss": 1.2098, "step": 25830 }, { "epoch": 1.365714436722074, "grad_norm": 68.93479919433594, "learning_rate": 5e-06, "loss": 1.4508, "step": 25840 }, { "epoch": 1.3662429639808673, "grad_norm": 98.18553924560547, "learning_rate": 5e-06, "loss": 1.4671, "step": 25850 }, { "epoch": 1.3667714912396607, "grad_norm": 72.63155364990234, "learning_rate": 5e-06, "loss": 1.2897, "step": 25860 }, { "epoch": 1.367300018498454, "grad_norm": 61.19690704345703, "learning_rate": 5e-06, "loss": 1.8245, "step": 25870 }, { "epoch": 1.3678285457572474, "grad_norm": 39.3768424987793, "learning_rate": 5e-06, "loss": 1.4318, "step": 25880 }, { "epoch": 1.3683570730160408, "grad_norm": 57.610469818115234, "learning_rate": 5e-06, "loss": 1.4373, "step": 25890 }, { "epoch": 1.3688856002748342, "grad_norm": 47.71773910522461, "learning_rate": 5e-06, "loss": 1.4317, "step": 25900 }, { "epoch": 1.3694141275336276, "grad_norm": 68.4867935180664, "learning_rate": 5e-06, "loss": 1.2225, "step": 25910 }, { "epoch": 1.369942654792421, "grad_norm": 38.598384857177734, "learning_rate": 5e-06, "loss": 1.3764, "step": 25920 }, { "epoch": 1.3704711820512143, "grad_norm": 65.88053894042969, "learning_rate": 5e-06, "loss": 1.5223, "step": 25930 }, { "epoch": 1.3709997093100077, "grad_norm": 49.127140045166016, "learning_rate": 5e-06, "loss": 1.4253, "step": 25940 }, { "epoch": 1.371528236568801, "grad_norm": 70.79894256591797, "learning_rate": 5e-06, "loss": 1.6002, "step": 25950 }, { "epoch": 1.3720567638275944, "grad_norm": 70.8016586303711, "learning_rate": 5e-06, "loss": 1.7215, "step": 25960 }, { "epoch": 1.3725852910863878, "grad_norm": 66.50489044189453, "learning_rate": 5e-06, "loss": 1.4485, "step": 25970 }, { "epoch": 1.3731138183451812, "grad_norm": 68.29978942871094, "learning_rate": 5e-06, "loss": 1.5346, "step": 25980 }, { "epoch": 1.3736423456039746, "grad_norm": 62.675357818603516, "learning_rate": 5e-06, "loss": 1.139, "step": 25990 }, { "epoch": 1.374170872862768, "grad_norm": 47.147804260253906, "learning_rate": 5e-06, "loss": 1.3574, "step": 26000 }, { "epoch": 1.374170872862768, "eval_loss": 1.4332319498062134, "eval_runtime": 35.8317, "eval_samples_per_second": 341.374, "eval_steps_per_second": 10.689, "eval_sts-dev_pearson_cosine": 0.8476705066822213, "eval_sts-dev_pearson_dot": 0.8282369660968805, "eval_sts-dev_pearson_euclidean": 0.8495478518583333, "eval_sts-dev_pearson_manhattan": 0.8493810399074369, "eval_sts-dev_pearson_max": 0.8495478518583333, "eval_sts-dev_spearman_cosine": 0.8494014112465526, "eval_sts-dev_spearman_dot": 0.8245036230114486, "eval_sts-dev_spearman_euclidean": 0.8498380586344962, "eval_sts-dev_spearman_manhattan": 0.8493311295221174, "eval_sts-dev_spearman_max": 0.8498380586344962, "step": 26000 }, { "epoch": 1.3746994001215613, "grad_norm": 84.86600494384766, "learning_rate": 5e-06, "loss": 1.413, "step": 26010 }, { "epoch": 1.3752279273803547, "grad_norm": 80.62078857421875, "learning_rate": 5e-06, "loss": 1.5234, "step": 26020 }, { "epoch": 1.375756454639148, "grad_norm": 44.674625396728516, "learning_rate": 5e-06, "loss": 1.3617, "step": 26030 }, { "epoch": 1.3762849818979415, "grad_norm": 55.07393264770508, "learning_rate": 5e-06, "loss": 1.2622, "step": 26040 }, { "epoch": 1.3768135091567348, "grad_norm": 84.41937255859375, "learning_rate": 5e-06, "loss": 1.5531, "step": 26050 }, { "epoch": 1.377342036415528, "grad_norm": 40.893558502197266, "learning_rate": 5e-06, "loss": 1.4444, "step": 26060 }, { "epoch": 1.3778705636743216, "grad_norm": 80.69245147705078, "learning_rate": 5e-06, "loss": 1.5764, "step": 26070 }, { "epoch": 1.3783990909331147, "grad_norm": 53.1085319519043, "learning_rate": 5e-06, "loss": 1.1335, "step": 26080 }, { "epoch": 1.3789276181919083, "grad_norm": 71.07862854003906, "learning_rate": 5e-06, "loss": 1.2873, "step": 26090 }, { "epoch": 1.3794561454507015, "grad_norm": 63.140647888183594, "learning_rate": 5e-06, "loss": 1.7056, "step": 26100 }, { "epoch": 1.379984672709495, "grad_norm": 84.41468048095703, "learning_rate": 5e-06, "loss": 1.3879, "step": 26110 }, { "epoch": 1.3805131999682883, "grad_norm": 44.587013244628906, "learning_rate": 5e-06, "loss": 1.3744, "step": 26120 }, { "epoch": 1.3810417272270819, "grad_norm": 53.12594985961914, "learning_rate": 5e-06, "loss": 1.3928, "step": 26130 }, { "epoch": 1.381570254485875, "grad_norm": 76.42822265625, "learning_rate": 5e-06, "loss": 1.52, "step": 26140 }, { "epoch": 1.3820987817446686, "grad_norm": 51.9161376953125, "learning_rate": 5e-06, "loss": 1.3164, "step": 26150 }, { "epoch": 1.3826273090034618, "grad_norm": 53.820438385009766, "learning_rate": 5e-06, "loss": 1.2963, "step": 26160 }, { "epoch": 1.3831558362622554, "grad_norm": 47.894859313964844, "learning_rate": 5e-06, "loss": 1.6346, "step": 26170 }, { "epoch": 1.3836843635210485, "grad_norm": 55.403038024902344, "learning_rate": 5e-06, "loss": 1.2227, "step": 26180 }, { "epoch": 1.384212890779842, "grad_norm": 62.149166107177734, "learning_rate": 5e-06, "loss": 1.3057, "step": 26190 }, { "epoch": 1.3847414180386353, "grad_norm": 61.94588851928711, "learning_rate": 5e-06, "loss": 1.2474, "step": 26200 }, { "epoch": 1.3852699452974286, "grad_norm": 65.23636627197266, "learning_rate": 5e-06, "loss": 1.1822, "step": 26210 }, { "epoch": 1.385798472556222, "grad_norm": 55.17327117919922, "learning_rate": 5e-06, "loss": 1.4227, "step": 26220 }, { "epoch": 1.3863269998150154, "grad_norm": 43.22939682006836, "learning_rate": 5e-06, "loss": 1.5859, "step": 26230 }, { "epoch": 1.3868555270738088, "grad_norm": 48.25136184692383, "learning_rate": 5e-06, "loss": 1.1431, "step": 26240 }, { "epoch": 1.3873840543326021, "grad_norm": 87.05284118652344, "learning_rate": 5e-06, "loss": 1.4138, "step": 26250 }, { "epoch": 1.3879125815913955, "grad_norm": 66.72940826416016, "learning_rate": 5e-06, "loss": 1.346, "step": 26260 }, { "epoch": 1.388441108850189, "grad_norm": 43.77520751953125, "learning_rate": 5e-06, "loss": 1.6111, "step": 26270 }, { "epoch": 1.3889696361089823, "grad_norm": 57.103763580322266, "learning_rate": 5e-06, "loss": 1.3444, "step": 26280 }, { "epoch": 1.3894981633677757, "grad_norm": 59.59187316894531, "learning_rate": 5e-06, "loss": 1.5087, "step": 26290 }, { "epoch": 1.390026690626569, "grad_norm": 47.62262725830078, "learning_rate": 5e-06, "loss": 1.3695, "step": 26300 }, { "epoch": 1.3905552178853624, "grad_norm": 61.805580139160156, "learning_rate": 5e-06, "loss": 1.5192, "step": 26310 }, { "epoch": 1.3910837451441558, "grad_norm": 50.83667755126953, "learning_rate": 5e-06, "loss": 0.9394, "step": 26320 }, { "epoch": 1.3916122724029492, "grad_norm": 54.8495979309082, "learning_rate": 5e-06, "loss": 1.6369, "step": 26330 }, { "epoch": 1.3921407996617425, "grad_norm": 50.81016540527344, "learning_rate": 5e-06, "loss": 1.467, "step": 26340 }, { "epoch": 1.392669326920536, "grad_norm": 71.46753692626953, "learning_rate": 5e-06, "loss": 1.3525, "step": 26350 }, { "epoch": 1.3931978541793293, "grad_norm": 53.55111312866211, "learning_rate": 5e-06, "loss": 1.5752, "step": 26360 }, { "epoch": 1.3937263814381227, "grad_norm": 63.387062072753906, "learning_rate": 5e-06, "loss": 1.299, "step": 26370 }, { "epoch": 1.394254908696916, "grad_norm": 52.16547393798828, "learning_rate": 5e-06, "loss": 1.3364, "step": 26380 }, { "epoch": 1.3947834359557094, "grad_norm": 55.919822692871094, "learning_rate": 5e-06, "loss": 1.4143, "step": 26390 }, { "epoch": 1.3953119632145028, "grad_norm": 72.7220458984375, "learning_rate": 5e-06, "loss": 1.2453, "step": 26400 }, { "epoch": 1.3958404904732962, "grad_norm": 55.329830169677734, "learning_rate": 5e-06, "loss": 1.3289, "step": 26410 }, { "epoch": 1.3963690177320895, "grad_norm": 82.56898498535156, "learning_rate": 5e-06, "loss": 1.441, "step": 26420 }, { "epoch": 1.396897544990883, "grad_norm": 66.53010559082031, "learning_rate": 5e-06, "loss": 1.5168, "step": 26430 }, { "epoch": 1.3974260722496763, "grad_norm": 50.42685317993164, "learning_rate": 5e-06, "loss": 1.7217, "step": 26440 }, { "epoch": 1.3979545995084697, "grad_norm": 49.1522102355957, "learning_rate": 5e-06, "loss": 1.2579, "step": 26450 }, { "epoch": 1.398483126767263, "grad_norm": 98.51691436767578, "learning_rate": 5e-06, "loss": 1.4011, "step": 26460 }, { "epoch": 1.3990116540260564, "grad_norm": 63.603309631347656, "learning_rate": 5e-06, "loss": 1.532, "step": 26470 }, { "epoch": 1.3995401812848498, "grad_norm": 52.62284851074219, "learning_rate": 5e-06, "loss": 0.9462, "step": 26480 }, { "epoch": 1.4000687085436432, "grad_norm": 64.18932342529297, "learning_rate": 5e-06, "loss": 1.4849, "step": 26490 }, { "epoch": 1.4005972358024366, "grad_norm": 58.68248748779297, "learning_rate": 5e-06, "loss": 1.0011, "step": 26500 }, { "epoch": 1.40112576306123, "grad_norm": 85.86737823486328, "learning_rate": 5e-06, "loss": 1.3581, "step": 26510 }, { "epoch": 1.4016542903200233, "grad_norm": 51.310150146484375, "learning_rate": 5e-06, "loss": 1.4546, "step": 26520 }, { "epoch": 1.4021828175788167, "grad_norm": 65.8616943359375, "learning_rate": 5e-06, "loss": 1.6453, "step": 26530 }, { "epoch": 1.40271134483761, "grad_norm": 61.066444396972656, "learning_rate": 5e-06, "loss": 1.3827, "step": 26540 }, { "epoch": 1.4032398720964034, "grad_norm": 76.20526123046875, "learning_rate": 5e-06, "loss": 1.4386, "step": 26550 }, { "epoch": 1.4037683993551968, "grad_norm": 71.34507751464844, "learning_rate": 5e-06, "loss": 1.6454, "step": 26560 }, { "epoch": 1.4042969266139902, "grad_norm": 89.59974670410156, "learning_rate": 5e-06, "loss": 1.4464, "step": 26570 }, { "epoch": 1.4048254538727836, "grad_norm": 99.05787658691406, "learning_rate": 5e-06, "loss": 1.562, "step": 26580 }, { "epoch": 1.4053539811315767, "grad_norm": 58.2246208190918, "learning_rate": 5e-06, "loss": 1.6585, "step": 26590 }, { "epoch": 1.4058825083903703, "grad_norm": 89.72025299072266, "learning_rate": 5e-06, "loss": 1.4662, "step": 26600 }, { "epoch": 1.4064110356491635, "grad_norm": 64.04378509521484, "learning_rate": 5e-06, "loss": 1.484, "step": 26610 }, { "epoch": 1.406939562907957, "grad_norm": 48.10454177856445, "learning_rate": 5e-06, "loss": 1.3523, "step": 26620 }, { "epoch": 1.4074680901667502, "grad_norm": 43.99842834472656, "learning_rate": 5e-06, "loss": 1.5854, "step": 26630 }, { "epoch": 1.4079966174255438, "grad_norm": 69.51238250732422, "learning_rate": 5e-06, "loss": 1.3088, "step": 26640 }, { "epoch": 1.408525144684337, "grad_norm": 73.61998748779297, "learning_rate": 5e-06, "loss": 1.6039, "step": 26650 }, { "epoch": 1.4090536719431306, "grad_norm": 71.82689666748047, "learning_rate": 5e-06, "loss": 1.3545, "step": 26660 }, { "epoch": 1.4095821992019237, "grad_norm": 83.68892669677734, "learning_rate": 5e-06, "loss": 1.5303, "step": 26670 }, { "epoch": 1.4101107264607173, "grad_norm": 69.83113861083984, "learning_rate": 5e-06, "loss": 1.4031, "step": 26680 }, { "epoch": 1.4106392537195105, "grad_norm": 102.68572998046875, "learning_rate": 5e-06, "loss": 1.4035, "step": 26690 }, { "epoch": 1.411167780978304, "grad_norm": 79.48609924316406, "learning_rate": 5e-06, "loss": 1.443, "step": 26700 }, { "epoch": 1.4116963082370972, "grad_norm": 50.052066802978516, "learning_rate": 5e-06, "loss": 1.3533, "step": 26710 }, { "epoch": 1.4122248354958906, "grad_norm": 101.26806640625, "learning_rate": 5e-06, "loss": 1.6663, "step": 26720 }, { "epoch": 1.412753362754684, "grad_norm": 69.98069763183594, "learning_rate": 5e-06, "loss": 1.6035, "step": 26730 }, { "epoch": 1.4132818900134774, "grad_norm": 70.42622375488281, "learning_rate": 5e-06, "loss": 1.6864, "step": 26740 }, { "epoch": 1.4138104172722707, "grad_norm": 48.981651306152344, "learning_rate": 5e-06, "loss": 1.1615, "step": 26750 }, { "epoch": 1.4143389445310641, "grad_norm": 131.1712188720703, "learning_rate": 5e-06, "loss": 1.334, "step": 26760 }, { "epoch": 1.4148674717898575, "grad_norm": 54.12564468383789, "learning_rate": 5e-06, "loss": 1.2026, "step": 26770 }, { "epoch": 1.4153959990486509, "grad_norm": 78.52925109863281, "learning_rate": 5e-06, "loss": 1.4036, "step": 26780 }, { "epoch": 1.4159245263074443, "grad_norm": 68.8085708618164, "learning_rate": 5e-06, "loss": 1.2631, "step": 26790 }, { "epoch": 1.4164530535662376, "grad_norm": 51.5068244934082, "learning_rate": 5e-06, "loss": 1.3384, "step": 26800 }, { "epoch": 1.416981580825031, "grad_norm": 44.22559356689453, "learning_rate": 5e-06, "loss": 1.0239, "step": 26810 }, { "epoch": 1.4175101080838244, "grad_norm": 40.01795959472656, "learning_rate": 5e-06, "loss": 1.4007, "step": 26820 }, { "epoch": 1.4180386353426178, "grad_norm": 62.4746208190918, "learning_rate": 5e-06, "loss": 1.2957, "step": 26830 }, { "epoch": 1.4185671626014111, "grad_norm": 42.94112777709961, "learning_rate": 5e-06, "loss": 1.1866, "step": 26840 }, { "epoch": 1.4190956898602045, "grad_norm": 31.254697799682617, "learning_rate": 5e-06, "loss": 1.3631, "step": 26850 }, { "epoch": 1.4196242171189979, "grad_norm": 56.444889068603516, "learning_rate": 5e-06, "loss": 1.2776, "step": 26860 }, { "epoch": 1.4201527443777913, "grad_norm": 40.88205337524414, "learning_rate": 5e-06, "loss": 1.2961, "step": 26870 }, { "epoch": 1.4206812716365846, "grad_norm": 57.00086975097656, "learning_rate": 5e-06, "loss": 1.225, "step": 26880 }, { "epoch": 1.421209798895378, "grad_norm": 74.32014465332031, "learning_rate": 5e-06, "loss": 1.0656, "step": 26890 }, { "epoch": 1.4217383261541714, "grad_norm": 85.57426452636719, "learning_rate": 5e-06, "loss": 1.6237, "step": 26900 }, { "epoch": 1.4222668534129648, "grad_norm": 102.16463470458984, "learning_rate": 5e-06, "loss": 1.5179, "step": 26910 }, { "epoch": 1.4227953806717581, "grad_norm": 23.57941246032715, "learning_rate": 5e-06, "loss": 1.2943, "step": 26920 }, { "epoch": 1.4233239079305515, "grad_norm": 58.260520935058594, "learning_rate": 5e-06, "loss": 1.6227, "step": 26930 }, { "epoch": 1.423852435189345, "grad_norm": 79.81748962402344, "learning_rate": 5e-06, "loss": 1.4059, "step": 26940 }, { "epoch": 1.4243809624481383, "grad_norm": 57.70566940307617, "learning_rate": 5e-06, "loss": 1.6413, "step": 26950 }, { "epoch": 1.4249094897069317, "grad_norm": 80.11104583740234, "learning_rate": 5e-06, "loss": 1.283, "step": 26960 }, { "epoch": 1.425438016965725, "grad_norm": 85.52108001708984, "learning_rate": 5e-06, "loss": 1.9268, "step": 26970 }, { "epoch": 1.4259665442245184, "grad_norm": 48.68708801269531, "learning_rate": 5e-06, "loss": 1.3124, "step": 26980 }, { "epoch": 1.4264950714833118, "grad_norm": 60.646949768066406, "learning_rate": 5e-06, "loss": 1.4371, "step": 26990 }, { "epoch": 1.4270235987421052, "grad_norm": 60.01787185668945, "learning_rate": 5e-06, "loss": 1.2538, "step": 27000 }, { "epoch": 1.4270235987421052, "eval_loss": 1.4315805435180664, "eval_runtime": 41.2017, "eval_samples_per_second": 296.881, "eval_steps_per_second": 9.296, "eval_sts-dev_pearson_cosine": 0.8452312608953445, "eval_sts-dev_pearson_dot": 0.8222097038870045, "eval_sts-dev_pearson_euclidean": 0.8473390550004385, "eval_sts-dev_pearson_manhattan": 0.8468586533192901, "eval_sts-dev_pearson_max": 0.8473390550004385, "eval_sts-dev_spearman_cosine": 0.847166331770994, "eval_sts-dev_spearman_dot": 0.8193886387317536, "eval_sts-dev_spearman_euclidean": 0.8496942123232349, "eval_sts-dev_spearman_manhattan": 0.8489607443539252, "eval_sts-dev_spearman_max": 0.8496942123232349, "step": 27000 }, { "epoch": 1.4275521260008985, "grad_norm": 37.31453323364258, "learning_rate": 5e-06, "loss": 1.3188, "step": 27010 }, { "epoch": 1.428080653259692, "grad_norm": 54.16949462890625, "learning_rate": 5e-06, "loss": 1.3011, "step": 27020 }, { "epoch": 1.4286091805184853, "grad_norm": 43.587425231933594, "learning_rate": 5e-06, "loss": 1.7466, "step": 27030 }, { "epoch": 1.4291377077772787, "grad_norm": 69.0407943725586, "learning_rate": 5e-06, "loss": 1.5376, "step": 27040 }, { "epoch": 1.429666235036072, "grad_norm": 52.597991943359375, "learning_rate": 5e-06, "loss": 1.2929, "step": 27050 }, { "epoch": 1.4301947622948654, "grad_norm": 48.14737319946289, "learning_rate": 5e-06, "loss": 1.4944, "step": 27060 }, { "epoch": 1.4307232895536588, "grad_norm": 58.386817932128906, "learning_rate": 5e-06, "loss": 1.5083, "step": 27070 }, { "epoch": 1.4312518168124522, "grad_norm": 67.6727294921875, "learning_rate": 5e-06, "loss": 1.1396, "step": 27080 }, { "epoch": 1.4317803440712455, "grad_norm": 74.73783874511719, "learning_rate": 5e-06, "loss": 1.3856, "step": 27090 }, { "epoch": 1.432308871330039, "grad_norm": 58.34941482543945, "learning_rate": 5e-06, "loss": 1.4283, "step": 27100 }, { "epoch": 1.4328373985888323, "grad_norm": 33.018123626708984, "learning_rate": 5e-06, "loss": 1.4789, "step": 27110 }, { "epoch": 1.4333659258476255, "grad_norm": 70.13162994384766, "learning_rate": 5e-06, "loss": 1.3563, "step": 27120 }, { "epoch": 1.433894453106419, "grad_norm": 51.19402313232422, "learning_rate": 5e-06, "loss": 1.2665, "step": 27130 }, { "epoch": 1.4344229803652122, "grad_norm": 77.98644256591797, "learning_rate": 5e-06, "loss": 1.3829, "step": 27140 }, { "epoch": 1.4349515076240058, "grad_norm": 55.62959671020508, "learning_rate": 5e-06, "loss": 1.5097, "step": 27150 }, { "epoch": 1.435480034882799, "grad_norm": 78.89651489257812, "learning_rate": 5e-06, "loss": 1.4863, "step": 27160 }, { "epoch": 1.4360085621415926, "grad_norm": 53.27971649169922, "learning_rate": 5e-06, "loss": 1.4228, "step": 27170 }, { "epoch": 1.4365370894003857, "grad_norm": 53.27082824707031, "learning_rate": 5e-06, "loss": 1.4487, "step": 27180 }, { "epoch": 1.4370656166591793, "grad_norm": 47.34971237182617, "learning_rate": 5e-06, "loss": 1.3047, "step": 27190 }, { "epoch": 1.4375941439179725, "grad_norm": 34.1762580871582, "learning_rate": 5e-06, "loss": 0.9655, "step": 27200 }, { "epoch": 1.438122671176766, "grad_norm": 46.15986251831055, "learning_rate": 5e-06, "loss": 1.4538, "step": 27210 }, { "epoch": 1.4386511984355592, "grad_norm": 60.206512451171875, "learning_rate": 5e-06, "loss": 1.7073, "step": 27220 }, { "epoch": 1.4391797256943528, "grad_norm": 71.33903503417969, "learning_rate": 5e-06, "loss": 1.5169, "step": 27230 }, { "epoch": 1.439708252953146, "grad_norm": 68.44621276855469, "learning_rate": 5e-06, "loss": 1.1307, "step": 27240 }, { "epoch": 1.4402367802119393, "grad_norm": 56.586936950683594, "learning_rate": 5e-06, "loss": 1.2221, "step": 27250 }, { "epoch": 1.4407653074707327, "grad_norm": 67.57179260253906, "learning_rate": 5e-06, "loss": 1.5551, "step": 27260 }, { "epoch": 1.441293834729526, "grad_norm": 52.97224807739258, "learning_rate": 5e-06, "loss": 1.2932, "step": 27270 }, { "epoch": 1.4418223619883195, "grad_norm": 57.22117233276367, "learning_rate": 5e-06, "loss": 1.3686, "step": 27280 }, { "epoch": 1.4423508892471129, "grad_norm": 65.64624786376953, "learning_rate": 5e-06, "loss": 1.2983, "step": 27290 }, { "epoch": 1.4428794165059062, "grad_norm": 44.94557571411133, "learning_rate": 5e-06, "loss": 1.4017, "step": 27300 }, { "epoch": 1.4434079437646996, "grad_norm": 67.75517272949219, "learning_rate": 5e-06, "loss": 1.7345, "step": 27310 }, { "epoch": 1.443936471023493, "grad_norm": 69.31488800048828, "learning_rate": 5e-06, "loss": 1.2136, "step": 27320 }, { "epoch": 1.4444649982822864, "grad_norm": 61.72981643676758, "learning_rate": 5e-06, "loss": 1.5219, "step": 27330 }, { "epoch": 1.4449935255410797, "grad_norm": 69.48900604248047, "learning_rate": 5e-06, "loss": 1.4502, "step": 27340 }, { "epoch": 1.4455220527998731, "grad_norm": 66.30689239501953, "learning_rate": 5e-06, "loss": 1.4887, "step": 27350 }, { "epoch": 1.4460505800586665, "grad_norm": 40.157188415527344, "learning_rate": 5e-06, "loss": 1.2679, "step": 27360 }, { "epoch": 1.4465791073174599, "grad_norm": 45.79176712036133, "learning_rate": 5e-06, "loss": 1.3586, "step": 27370 }, { "epoch": 1.4471076345762532, "grad_norm": 57.3416748046875, "learning_rate": 5e-06, "loss": 1.2728, "step": 27380 }, { "epoch": 1.4476361618350466, "grad_norm": 63.48541259765625, "learning_rate": 5e-06, "loss": 1.2759, "step": 27390 }, { "epoch": 1.44816468909384, "grad_norm": 72.09097290039062, "learning_rate": 5e-06, "loss": 1.9543, "step": 27400 }, { "epoch": 1.4486932163526334, "grad_norm": 83.44795227050781, "learning_rate": 5e-06, "loss": 1.4094, "step": 27410 }, { "epoch": 1.4492217436114267, "grad_norm": 59.34762191772461, "learning_rate": 5e-06, "loss": 1.332, "step": 27420 }, { "epoch": 1.4497502708702201, "grad_norm": 53.53961181640625, "learning_rate": 5e-06, "loss": 1.5266, "step": 27430 }, { "epoch": 1.4502787981290135, "grad_norm": 68.25826263427734, "learning_rate": 5e-06, "loss": 1.2271, "step": 27440 }, { "epoch": 1.4508073253878069, "grad_norm": 67.78778076171875, "learning_rate": 5e-06, "loss": 1.3237, "step": 27450 }, { "epoch": 1.4513358526466003, "grad_norm": 72.86115264892578, "learning_rate": 5e-06, "loss": 1.3955, "step": 27460 }, { "epoch": 1.4518643799053936, "grad_norm": 38.24332809448242, "learning_rate": 5e-06, "loss": 1.4082, "step": 27470 }, { "epoch": 1.452392907164187, "grad_norm": 56.53731918334961, "learning_rate": 5e-06, "loss": 1.2653, "step": 27480 }, { "epoch": 1.4529214344229804, "grad_norm": 66.32444763183594, "learning_rate": 5e-06, "loss": 1.404, "step": 27490 }, { "epoch": 1.4534499616817738, "grad_norm": 73.50299072265625, "learning_rate": 5e-06, "loss": 1.2553, "step": 27500 }, { "epoch": 1.4539784889405671, "grad_norm": 49.33648681640625, "learning_rate": 5e-06, "loss": 1.3003, "step": 27510 }, { "epoch": 1.4545070161993605, "grad_norm": 63.72330856323242, "learning_rate": 5e-06, "loss": 1.3, "step": 27520 }, { "epoch": 1.455035543458154, "grad_norm": 47.22054672241211, "learning_rate": 5e-06, "loss": 1.292, "step": 27530 }, { "epoch": 1.4555640707169473, "grad_norm": 62.81514358520508, "learning_rate": 5e-06, "loss": 1.592, "step": 27540 }, { "epoch": 1.4560925979757406, "grad_norm": 42.2614631652832, "learning_rate": 5e-06, "loss": 1.3063, "step": 27550 }, { "epoch": 1.456621125234534, "grad_norm": 77.90274047851562, "learning_rate": 5e-06, "loss": 1.2689, "step": 27560 }, { "epoch": 1.4571496524933274, "grad_norm": 65.83458709716797, "learning_rate": 5e-06, "loss": 1.3459, "step": 27570 }, { "epoch": 1.4576781797521208, "grad_norm": 92.39794158935547, "learning_rate": 5e-06, "loss": 1.7483, "step": 27580 }, { "epoch": 1.4582067070109141, "grad_norm": 41.64563751220703, "learning_rate": 5e-06, "loss": 1.4471, "step": 27590 }, { "epoch": 1.4587352342697075, "grad_norm": 64.26761627197266, "learning_rate": 5e-06, "loss": 1.5654, "step": 27600 }, { "epoch": 1.459263761528501, "grad_norm": 56.29517364501953, "learning_rate": 5e-06, "loss": 1.3693, "step": 27610 }, { "epoch": 1.4597922887872943, "grad_norm": 98.33228302001953, "learning_rate": 5e-06, "loss": 1.4332, "step": 27620 }, { "epoch": 1.4603208160460877, "grad_norm": 62.04802703857422, "learning_rate": 5e-06, "loss": 1.3175, "step": 27630 }, { "epoch": 1.460849343304881, "grad_norm": 39.1851806640625, "learning_rate": 5e-06, "loss": 1.1179, "step": 27640 }, { "epoch": 1.4613778705636742, "grad_norm": 45.58676528930664, "learning_rate": 5e-06, "loss": 1.3653, "step": 27650 }, { "epoch": 1.4619063978224678, "grad_norm": 93.98681640625, "learning_rate": 5e-06, "loss": 1.6226, "step": 27660 }, { "epoch": 1.462434925081261, "grad_norm": 51.1893196105957, "learning_rate": 5e-06, "loss": 1.1513, "step": 27670 }, { "epoch": 1.4629634523400545, "grad_norm": 67.12871551513672, "learning_rate": 5e-06, "loss": 1.7157, "step": 27680 }, { "epoch": 1.4634919795988477, "grad_norm": 39.640716552734375, "learning_rate": 5e-06, "loss": 1.2262, "step": 27690 }, { "epoch": 1.4640205068576413, "grad_norm": 72.05549621582031, "learning_rate": 5e-06, "loss": 1.4235, "step": 27700 }, { "epoch": 1.4645490341164344, "grad_norm": 65.13201141357422, "learning_rate": 5e-06, "loss": 1.7397, "step": 27710 }, { "epoch": 1.465077561375228, "grad_norm": 49.99568557739258, "learning_rate": 5e-06, "loss": 1.3928, "step": 27720 }, { "epoch": 1.4656060886340212, "grad_norm": 53.932525634765625, "learning_rate": 5e-06, "loss": 1.2622, "step": 27730 }, { "epoch": 1.4661346158928148, "grad_norm": 54.242393493652344, "learning_rate": 5e-06, "loss": 1.6273, "step": 27740 }, { "epoch": 1.466663143151608, "grad_norm": 53.63741683959961, "learning_rate": 5e-06, "loss": 1.3208, "step": 27750 }, { "epoch": 1.4671916704104015, "grad_norm": 59.44740676879883, "learning_rate": 5e-06, "loss": 1.358, "step": 27760 }, { "epoch": 1.4677201976691947, "grad_norm": 51.524452209472656, "learning_rate": 5e-06, "loss": 1.2138, "step": 27770 }, { "epoch": 1.468248724927988, "grad_norm": 70.0676498413086, "learning_rate": 5e-06, "loss": 1.3929, "step": 27780 }, { "epoch": 1.4687772521867815, "grad_norm": 83.3761978149414, "learning_rate": 5e-06, "loss": 1.4409, "step": 27790 }, { "epoch": 1.4693057794455748, "grad_norm": 116.24015045166016, "learning_rate": 5e-06, "loss": 1.0861, "step": 27800 }, { "epoch": 1.4698343067043682, "grad_norm": 48.75605392456055, "learning_rate": 5e-06, "loss": 1.7136, "step": 27810 }, { "epoch": 1.4703628339631616, "grad_norm": 65.49051666259766, "learning_rate": 5e-06, "loss": 1.1485, "step": 27820 }, { "epoch": 1.470891361221955, "grad_norm": 37.05879592895508, "learning_rate": 5e-06, "loss": 1.2034, "step": 27830 }, { "epoch": 1.4714198884807483, "grad_norm": 69.75357818603516, "learning_rate": 5e-06, "loss": 1.1466, "step": 27840 }, { "epoch": 1.4719484157395417, "grad_norm": 84.42174530029297, "learning_rate": 5e-06, "loss": 1.5648, "step": 27850 }, { "epoch": 1.472476942998335, "grad_norm": 55.741493225097656, "learning_rate": 5e-06, "loss": 1.6465, "step": 27860 }, { "epoch": 1.4730054702571285, "grad_norm": 35.6137809753418, "learning_rate": 5e-06, "loss": 1.2695, "step": 27870 }, { "epoch": 1.4735339975159218, "grad_norm": 104.27767181396484, "learning_rate": 5e-06, "loss": 1.4641, "step": 27880 }, { "epoch": 1.4740625247747152, "grad_norm": 67.0749282836914, "learning_rate": 5e-06, "loss": 1.0531, "step": 27890 }, { "epoch": 1.4745910520335086, "grad_norm": 63.6508903503418, "learning_rate": 5e-06, "loss": 1.4085, "step": 27900 }, { "epoch": 1.475119579292302, "grad_norm": 130.92929077148438, "learning_rate": 5e-06, "loss": 1.5541, "step": 27910 }, { "epoch": 1.4756481065510954, "grad_norm": 85.19059753417969, "learning_rate": 5e-06, "loss": 1.4059, "step": 27920 }, { "epoch": 1.4761766338098887, "grad_norm": 106.1278305053711, "learning_rate": 5e-06, "loss": 1.6293, "step": 27930 }, { "epoch": 1.476705161068682, "grad_norm": 33.52511215209961, "learning_rate": 5e-06, "loss": 1.4452, "step": 27940 }, { "epoch": 1.4772336883274755, "grad_norm": 38.918296813964844, "learning_rate": 5e-06, "loss": 1.0246, "step": 27950 }, { "epoch": 1.4777622155862689, "grad_norm": 68.29440307617188, "learning_rate": 5e-06, "loss": 1.67, "step": 27960 }, { "epoch": 1.4782907428450622, "grad_norm": 55.81547546386719, "learning_rate": 5e-06, "loss": 1.6882, "step": 27970 }, { "epoch": 1.4788192701038556, "grad_norm": 50.82730484008789, "learning_rate": 5e-06, "loss": 1.3278, "step": 27980 }, { "epoch": 1.479347797362649, "grad_norm": 54.76054382324219, "learning_rate": 5e-06, "loss": 1.3573, "step": 27990 }, { "epoch": 1.4798763246214424, "grad_norm": 81.99494934082031, "learning_rate": 5e-06, "loss": 1.4006, "step": 28000 }, { "epoch": 1.4798763246214424, "eval_loss": 1.424312949180603, "eval_runtime": 43.9625, "eval_samples_per_second": 278.237, "eval_steps_per_second": 8.712, "eval_sts-dev_pearson_cosine": 0.8475671314519161, "eval_sts-dev_pearson_dot": 0.823742782894056, "eval_sts-dev_pearson_euclidean": 0.8490732485844006, "eval_sts-dev_pearson_manhattan": 0.8490532571516202, "eval_sts-dev_pearson_max": 0.8490732485844006, "eval_sts-dev_spearman_cosine": 0.8486615356421636, "eval_sts-dev_spearman_dot": 0.8194653456136903, "eval_sts-dev_spearman_euclidean": 0.8507945263749397, "eval_sts-dev_spearman_manhattan": 0.8505135044576224, "eval_sts-dev_spearman_max": 0.8507945263749397, "step": 28000 }, { "epoch": 1.4804048518802357, "grad_norm": 34.724769592285156, "learning_rate": 5e-06, "loss": 1.1666, "step": 28010 }, { "epoch": 1.4809333791390291, "grad_norm": 64.38105010986328, "learning_rate": 5e-06, "loss": 1.4033, "step": 28020 }, { "epoch": 1.4814619063978225, "grad_norm": 95.48521423339844, "learning_rate": 5e-06, "loss": 1.2223, "step": 28030 }, { "epoch": 1.4819904336566159, "grad_norm": 40.612083435058594, "learning_rate": 5e-06, "loss": 1.4116, "step": 28040 }, { "epoch": 1.4825189609154092, "grad_norm": 56.812774658203125, "learning_rate": 5e-06, "loss": 1.2024, "step": 28050 }, { "epoch": 1.4830474881742026, "grad_norm": 39.177616119384766, "learning_rate": 5e-06, "loss": 1.0628, "step": 28060 }, { "epoch": 1.483576015432996, "grad_norm": 82.16516876220703, "learning_rate": 5e-06, "loss": 1.5662, "step": 28070 }, { "epoch": 1.4841045426917894, "grad_norm": 62.763370513916016, "learning_rate": 5e-06, "loss": 1.7964, "step": 28080 }, { "epoch": 1.4846330699505828, "grad_norm": 57.33951950073242, "learning_rate": 5e-06, "loss": 1.3182, "step": 28090 }, { "epoch": 1.4851615972093761, "grad_norm": 55.333641052246094, "learning_rate": 5e-06, "loss": 1.2904, "step": 28100 }, { "epoch": 1.4856901244681695, "grad_norm": 55.093475341796875, "learning_rate": 5e-06, "loss": 1.2873, "step": 28110 }, { "epoch": 1.4862186517269629, "grad_norm": 68.03559875488281, "learning_rate": 5e-06, "loss": 1.4562, "step": 28120 }, { "epoch": 1.4867471789857563, "grad_norm": 69.48151397705078, "learning_rate": 5e-06, "loss": 1.3453, "step": 28130 }, { "epoch": 1.4872757062445496, "grad_norm": 66.18547821044922, "learning_rate": 5e-06, "loss": 1.36, "step": 28140 }, { "epoch": 1.487804233503343, "grad_norm": 105.41734313964844, "learning_rate": 5e-06, "loss": 1.5814, "step": 28150 }, { "epoch": 1.4883327607621364, "grad_norm": 57.71015167236328, "learning_rate": 5e-06, "loss": 1.3312, "step": 28160 }, { "epoch": 1.4888612880209298, "grad_norm": 75.33016967773438, "learning_rate": 5e-06, "loss": 1.4769, "step": 28170 }, { "epoch": 1.489389815279723, "grad_norm": 47.441184997558594, "learning_rate": 5e-06, "loss": 1.225, "step": 28180 }, { "epoch": 1.4899183425385165, "grad_norm": 49.03916549682617, "learning_rate": 5e-06, "loss": 1.0336, "step": 28190 }, { "epoch": 1.4904468697973097, "grad_norm": 61.521114349365234, "learning_rate": 5e-06, "loss": 1.7573, "step": 28200 }, { "epoch": 1.4909753970561033, "grad_norm": 65.26546478271484, "learning_rate": 5e-06, "loss": 1.2867, "step": 28210 }, { "epoch": 1.4915039243148964, "grad_norm": 58.49119186401367, "learning_rate": 5e-06, "loss": 1.0467, "step": 28220 }, { "epoch": 1.49203245157369, "grad_norm": 48.40713882446289, "learning_rate": 5e-06, "loss": 1.1809, "step": 28230 }, { "epoch": 1.4925609788324832, "grad_norm": 79.72087860107422, "learning_rate": 5e-06, "loss": 1.5552, "step": 28240 }, { "epoch": 1.4930895060912768, "grad_norm": 58.82041549682617, "learning_rate": 5e-06, "loss": 1.3388, "step": 28250 }, { "epoch": 1.49361803335007, "grad_norm": 54.60784912109375, "learning_rate": 5e-06, "loss": 1.4909, "step": 28260 }, { "epoch": 1.4941465606088635, "grad_norm": 58.273658752441406, "learning_rate": 5e-06, "loss": 1.2941, "step": 28270 }, { "epoch": 1.4946750878676567, "grad_norm": 46.02553176879883, "learning_rate": 5e-06, "loss": 1.276, "step": 28280 }, { "epoch": 1.4952036151264503, "grad_norm": 72.70234680175781, "learning_rate": 5e-06, "loss": 1.3418, "step": 28290 }, { "epoch": 1.4957321423852434, "grad_norm": 57.3893928527832, "learning_rate": 5e-06, "loss": 1.3582, "step": 28300 }, { "epoch": 1.4962606696440368, "grad_norm": 91.43766784667969, "learning_rate": 5e-06, "loss": 1.48, "step": 28310 }, { "epoch": 1.4967891969028302, "grad_norm": 40.84992980957031, "learning_rate": 5e-06, "loss": 1.1547, "step": 28320 }, { "epoch": 1.4973177241616236, "grad_norm": 86.01266479492188, "learning_rate": 5e-06, "loss": 1.1756, "step": 28330 }, { "epoch": 1.497846251420417, "grad_norm": 94.09688568115234, "learning_rate": 5e-06, "loss": 1.5643, "step": 28340 }, { "epoch": 1.4983747786792103, "grad_norm": 54.34334182739258, "learning_rate": 5e-06, "loss": 1.4705, "step": 28350 }, { "epoch": 1.4989033059380037, "grad_norm": 65.8447036743164, "learning_rate": 5e-06, "loss": 1.3764, "step": 28360 }, { "epoch": 1.499431833196797, "grad_norm": 65.57823181152344, "learning_rate": 5e-06, "loss": 1.4432, "step": 28370 }, { "epoch": 1.4999603604555904, "grad_norm": 62.98934555053711, "learning_rate": 5e-06, "loss": 1.708, "step": 28380 }, { "epoch": 1.5004888877143838, "grad_norm": 56.20134353637695, "learning_rate": 5e-06, "loss": 1.5633, "step": 28390 }, { "epoch": 1.5010174149731772, "grad_norm": 41.46079635620117, "learning_rate": 5e-06, "loss": 1.3087, "step": 28400 }, { "epoch": 1.5015459422319706, "grad_norm": 49.8424186706543, "learning_rate": 5e-06, "loss": 1.5833, "step": 28410 }, { "epoch": 1.502074469490764, "grad_norm": 81.19650268554688, "learning_rate": 5e-06, "loss": 1.2828, "step": 28420 }, { "epoch": 1.5026029967495573, "grad_norm": 89.36222839355469, "learning_rate": 5e-06, "loss": 1.6615, "step": 28430 }, { "epoch": 1.5031315240083507, "grad_norm": 87.85689544677734, "learning_rate": 5e-06, "loss": 1.4284, "step": 28440 }, { "epoch": 1.503660051267144, "grad_norm": 51.303131103515625, "learning_rate": 5e-06, "loss": 1.3255, "step": 28450 }, { "epoch": 1.5041885785259375, "grad_norm": 42.8609619140625, "learning_rate": 5e-06, "loss": 1.2125, "step": 28460 }, { "epoch": 1.5047171057847308, "grad_norm": 63.70766830444336, "learning_rate": 5e-06, "loss": 1.6029, "step": 28470 }, { "epoch": 1.5052456330435242, "grad_norm": 36.165374755859375, "learning_rate": 5e-06, "loss": 1.2059, "step": 28480 }, { "epoch": 1.5057741603023176, "grad_norm": 50.310279846191406, "learning_rate": 5e-06, "loss": 1.3605, "step": 28490 }, { "epoch": 1.506302687561111, "grad_norm": 74.14556884765625, "learning_rate": 5e-06, "loss": 1.3147, "step": 28500 }, { "epoch": 1.5068312148199043, "grad_norm": 45.76654815673828, "learning_rate": 5e-06, "loss": 1.22, "step": 28510 }, { "epoch": 1.5073597420786977, "grad_norm": 130.4104461669922, "learning_rate": 5e-06, "loss": 1.3008, "step": 28520 }, { "epoch": 1.507888269337491, "grad_norm": 61.308433532714844, "learning_rate": 5e-06, "loss": 1.2339, "step": 28530 }, { "epoch": 1.5084167965962845, "grad_norm": 74.47866821289062, "learning_rate": 5e-06, "loss": 1.5565, "step": 28540 }, { "epoch": 1.5089453238550778, "grad_norm": 50.253028869628906, "learning_rate": 5e-06, "loss": 0.9561, "step": 28550 }, { "epoch": 1.5094738511138712, "grad_norm": 45.2442626953125, "learning_rate": 5e-06, "loss": 1.0637, "step": 28560 }, { "epoch": 1.5100023783726646, "grad_norm": 54.66096496582031, "learning_rate": 5e-06, "loss": 0.9163, "step": 28570 }, { "epoch": 1.510530905631458, "grad_norm": 62.0748405456543, "learning_rate": 5e-06, "loss": 1.492, "step": 28580 }, { "epoch": 1.5110594328902514, "grad_norm": 47.624229431152344, "learning_rate": 5e-06, "loss": 1.6573, "step": 28590 }, { "epoch": 1.5115879601490447, "grad_norm": 100.84827423095703, "learning_rate": 5e-06, "loss": 1.012, "step": 28600 }, { "epoch": 1.512116487407838, "grad_norm": 73.55967712402344, "learning_rate": 5e-06, "loss": 1.8371, "step": 28610 }, { "epoch": 1.5126450146666315, "grad_norm": 54.431419372558594, "learning_rate": 5e-06, "loss": 1.0637, "step": 28620 }, { "epoch": 1.5131735419254249, "grad_norm": 89.09457397460938, "learning_rate": 5e-06, "loss": 1.2612, "step": 28630 }, { "epoch": 1.5137020691842182, "grad_norm": 45.31324005126953, "learning_rate": 5e-06, "loss": 1.2912, "step": 28640 }, { "epoch": 1.5142305964430114, "grad_norm": 56.79197311401367, "learning_rate": 5e-06, "loss": 1.444, "step": 28650 }, { "epoch": 1.514759123701805, "grad_norm": 53.99131393432617, "learning_rate": 5e-06, "loss": 1.2882, "step": 28660 }, { "epoch": 1.5152876509605981, "grad_norm": 81.87152862548828, "learning_rate": 5e-06, "loss": 1.2342, "step": 28670 }, { "epoch": 1.5158161782193917, "grad_norm": 48.053466796875, "learning_rate": 5e-06, "loss": 1.1033, "step": 28680 }, { "epoch": 1.516344705478185, "grad_norm": 41.046260833740234, "learning_rate": 5e-06, "loss": 1.3634, "step": 28690 }, { "epoch": 1.5168732327369785, "grad_norm": 43.97895812988281, "learning_rate": 5e-06, "loss": 1.4685, "step": 28700 }, { "epoch": 1.5174017599957716, "grad_norm": 61.24790954589844, "learning_rate": 5e-06, "loss": 1.3428, "step": 28710 }, { "epoch": 1.5179302872545652, "grad_norm": 82.01283264160156, "learning_rate": 5e-06, "loss": 1.4891, "step": 28720 }, { "epoch": 1.5184588145133584, "grad_norm": 82.50606536865234, "learning_rate": 5e-06, "loss": 1.4593, "step": 28730 }, { "epoch": 1.518987341772152, "grad_norm": 50.53139114379883, "learning_rate": 5e-06, "loss": 1.2211, "step": 28740 }, { "epoch": 1.5195158690309452, "grad_norm": 70.88553619384766, "learning_rate": 5e-06, "loss": 1.273, "step": 28750 }, { "epoch": 1.5200443962897388, "grad_norm": 62.51484298706055, "learning_rate": 5e-06, "loss": 1.6652, "step": 28760 }, { "epoch": 1.520572923548532, "grad_norm": 53.149898529052734, "learning_rate": 5e-06, "loss": 1.0775, "step": 28770 }, { "epoch": 1.5211014508073255, "grad_norm": 50.20707702636719, "learning_rate": 5e-06, "loss": 1.3706, "step": 28780 }, { "epoch": 1.5216299780661187, "grad_norm": 79.36941528320312, "learning_rate": 5e-06, "loss": 1.2885, "step": 28790 }, { "epoch": 1.5221585053249123, "grad_norm": 83.21510314941406, "learning_rate": 5e-06, "loss": 1.1947, "step": 28800 }, { "epoch": 1.5226870325837054, "grad_norm": 59.86106872558594, "learning_rate": 5e-06, "loss": 1.2479, "step": 28810 }, { "epoch": 1.523215559842499, "grad_norm": 61.751041412353516, "learning_rate": 5e-06, "loss": 1.216, "step": 28820 }, { "epoch": 1.5237440871012922, "grad_norm": 61.09697341918945, "learning_rate": 5e-06, "loss": 1.4325, "step": 28830 }, { "epoch": 1.5242726143600858, "grad_norm": 72.9396743774414, "learning_rate": 5e-06, "loss": 1.2599, "step": 28840 }, { "epoch": 1.524801141618879, "grad_norm": 63.131561279296875, "learning_rate": 5e-06, "loss": 1.4321, "step": 28850 }, { "epoch": 1.5253296688776725, "grad_norm": 57.14971160888672, "learning_rate": 5e-06, "loss": 1.4621, "step": 28860 }, { "epoch": 1.5258581961364657, "grad_norm": 62.21672058105469, "learning_rate": 5e-06, "loss": 1.6373, "step": 28870 }, { "epoch": 1.5263867233952593, "grad_norm": 57.179542541503906, "learning_rate": 5e-06, "loss": 1.1104, "step": 28880 }, { "epoch": 1.5269152506540524, "grad_norm": 32.715431213378906, "learning_rate": 5e-06, "loss": 1.3115, "step": 28890 }, { "epoch": 1.5274437779128458, "grad_norm": 69.98564147949219, "learning_rate": 5e-06, "loss": 1.4168, "step": 28900 }, { "epoch": 1.5279723051716392, "grad_norm": 76.98065948486328, "learning_rate": 5e-06, "loss": 1.0979, "step": 28910 }, { "epoch": 1.5285008324304326, "grad_norm": 63.83252716064453, "learning_rate": 5e-06, "loss": 0.9241, "step": 28920 }, { "epoch": 1.529029359689226, "grad_norm": 83.17121887207031, "learning_rate": 5e-06, "loss": 1.4943, "step": 28930 }, { "epoch": 1.5295578869480193, "grad_norm": 54.745033264160156, "learning_rate": 5e-06, "loss": 1.3642, "step": 28940 }, { "epoch": 1.5300864142068127, "grad_norm": 62.85832977294922, "learning_rate": 5e-06, "loss": 1.4283, "step": 28950 }, { "epoch": 1.530614941465606, "grad_norm": 44.60282516479492, "learning_rate": 5e-06, "loss": 1.1918, "step": 28960 }, { "epoch": 1.5311434687243994, "grad_norm": 43.56437301635742, "learning_rate": 5e-06, "loss": 1.049, "step": 28970 }, { "epoch": 1.5316719959831928, "grad_norm": 56.842166900634766, "learning_rate": 5e-06, "loss": 1.5246, "step": 28980 }, { "epoch": 1.5322005232419862, "grad_norm": 68.25231170654297, "learning_rate": 5e-06, "loss": 1.5828, "step": 28990 }, { "epoch": 1.5327290505007796, "grad_norm": 85.36454772949219, "learning_rate": 5e-06, "loss": 1.3583, "step": 29000 }, { "epoch": 1.5327290505007796, "eval_loss": 1.3613390922546387, "eval_runtime": 37.7684, "eval_samples_per_second": 323.869, "eval_steps_per_second": 10.141, "eval_sts-dev_pearson_cosine": 0.8520245163022129, "eval_sts-dev_pearson_dot": 0.8304029649502981, "eval_sts-dev_pearson_euclidean": 0.8546904751591031, "eval_sts-dev_pearson_manhattan": 0.8546480322587109, "eval_sts-dev_pearson_max": 0.8546904751591031, "eval_sts-dev_spearman_cosine": 0.8548428295018294, "eval_sts-dev_spearman_dot": 0.8277232585529897, "eval_sts-dev_spearman_euclidean": 0.8563509711462763, "eval_sts-dev_spearman_manhattan": 0.8560985715186185, "eval_sts-dev_spearman_max": 0.8563509711462763, "step": 29000 }, { "epoch": 1.533257577759573, "grad_norm": 62.06280517578125, "learning_rate": 5e-06, "loss": 1.4107, "step": 29010 }, { "epoch": 1.5337861050183663, "grad_norm": 63.77236557006836, "learning_rate": 5e-06, "loss": 1.367, "step": 29020 }, { "epoch": 1.5343146322771597, "grad_norm": 64.857421875, "learning_rate": 5e-06, "loss": 1.2938, "step": 29030 }, { "epoch": 1.534843159535953, "grad_norm": 59.565673828125, "learning_rate": 5e-06, "loss": 1.1928, "step": 29040 }, { "epoch": 1.5353716867947464, "grad_norm": 50.4073600769043, "learning_rate": 5e-06, "loss": 1.1693, "step": 29050 }, { "epoch": 1.5359002140535398, "grad_norm": 77.35919189453125, "learning_rate": 5e-06, "loss": 1.7625, "step": 29060 }, { "epoch": 1.5364287413123332, "grad_norm": 60.9693603515625, "learning_rate": 5e-06, "loss": 1.1027, "step": 29070 }, { "epoch": 1.5369572685711266, "grad_norm": 59.57616424560547, "learning_rate": 5e-06, "loss": 1.4129, "step": 29080 }, { "epoch": 1.53748579582992, "grad_norm": 49.261356353759766, "learning_rate": 5e-06, "loss": 1.1334, "step": 29090 }, { "epoch": 1.5380143230887133, "grad_norm": 44.37168884277344, "learning_rate": 5e-06, "loss": 1.3103, "step": 29100 }, { "epoch": 1.5385428503475067, "grad_norm": 86.40380096435547, "learning_rate": 5e-06, "loss": 1.2289, "step": 29110 }, { "epoch": 1.5390713776063, "grad_norm": 67.26834106445312, "learning_rate": 5e-06, "loss": 1.3065, "step": 29120 }, { "epoch": 1.5395999048650935, "grad_norm": 88.5871810913086, "learning_rate": 5e-06, "loss": 1.2779, "step": 29130 }, { "epoch": 1.5401284321238868, "grad_norm": 81.9888687133789, "learning_rate": 5e-06, "loss": 1.5304, "step": 29140 }, { "epoch": 1.5406569593826802, "grad_norm": 58.399269104003906, "learning_rate": 5e-06, "loss": 1.2395, "step": 29150 }, { "epoch": 1.5411854866414734, "grad_norm": 47.48738479614258, "learning_rate": 5e-06, "loss": 1.4167, "step": 29160 }, { "epoch": 1.541714013900267, "grad_norm": 57.73063659667969, "learning_rate": 5e-06, "loss": 1.394, "step": 29170 }, { "epoch": 1.5422425411590601, "grad_norm": 50.39829635620117, "learning_rate": 5e-06, "loss": 1.2224, "step": 29180 }, { "epoch": 1.5427710684178537, "grad_norm": 69.90660095214844, "learning_rate": 5e-06, "loss": 1.4047, "step": 29190 }, { "epoch": 1.5432995956766469, "grad_norm": 74.64360046386719, "learning_rate": 5e-06, "loss": 1.319, "step": 29200 }, { "epoch": 1.5438281229354405, "grad_norm": 63.51150131225586, "learning_rate": 5e-06, "loss": 1.4587, "step": 29210 }, { "epoch": 1.5443566501942336, "grad_norm": 67.5613021850586, "learning_rate": 5e-06, "loss": 1.2331, "step": 29220 }, { "epoch": 1.5448851774530272, "grad_norm": 42.991878509521484, "learning_rate": 5e-06, "loss": 1.3932, "step": 29230 }, { "epoch": 1.5454137047118204, "grad_norm": 58.267486572265625, "learning_rate": 5e-06, "loss": 1.2036, "step": 29240 }, { "epoch": 1.545942231970614, "grad_norm": 64.47954559326172, "learning_rate": 5e-06, "loss": 1.4563, "step": 29250 }, { "epoch": 1.5464707592294071, "grad_norm": 66.20953369140625, "learning_rate": 5e-06, "loss": 1.8403, "step": 29260 }, { "epoch": 1.5469992864882007, "grad_norm": 40.45256042480469, "learning_rate": 5e-06, "loss": 1.2212, "step": 29270 }, { "epoch": 1.5475278137469939, "grad_norm": 60.60327911376953, "learning_rate": 5e-06, "loss": 1.6303, "step": 29280 }, { "epoch": 1.5480563410057875, "grad_norm": 82.14712524414062, "learning_rate": 5e-06, "loss": 1.2943, "step": 29290 }, { "epoch": 1.5485848682645806, "grad_norm": 42.39307403564453, "learning_rate": 5e-06, "loss": 1.0606, "step": 29300 }, { "epoch": 1.5491133955233742, "grad_norm": 57.900447845458984, "learning_rate": 5e-06, "loss": 1.6208, "step": 29310 }, { "epoch": 1.5496419227821674, "grad_norm": 102.95925903320312, "learning_rate": 5e-06, "loss": 1.3564, "step": 29320 }, { "epoch": 1.550170450040961, "grad_norm": 65.96255493164062, "learning_rate": 5e-06, "loss": 1.2581, "step": 29330 }, { "epoch": 1.5506989772997541, "grad_norm": 99.89388275146484, "learning_rate": 5e-06, "loss": 1.9439, "step": 29340 }, { "epoch": 1.5512275045585477, "grad_norm": 48.726470947265625, "learning_rate": 5e-06, "loss": 1.3945, "step": 29350 }, { "epoch": 1.551756031817341, "grad_norm": 61.88914489746094, "learning_rate": 5e-06, "loss": 1.2926, "step": 29360 }, { "epoch": 1.5522845590761345, "grad_norm": 64.81912994384766, "learning_rate": 5e-06, "loss": 1.017, "step": 29370 }, { "epoch": 1.5528130863349277, "grad_norm": 53.38954544067383, "learning_rate": 5e-06, "loss": 1.4857, "step": 29380 }, { "epoch": 1.5533416135937212, "grad_norm": 49.10043716430664, "learning_rate": 5e-06, "loss": 1.2426, "step": 29390 }, { "epoch": 1.5538701408525144, "grad_norm": 64.7593765258789, "learning_rate": 5e-06, "loss": 1.5468, "step": 29400 }, { "epoch": 1.554398668111308, "grad_norm": 52.50968933105469, "learning_rate": 5e-06, "loss": 1.3178, "step": 29410 }, { "epoch": 1.5549271953701012, "grad_norm": 65.10122680664062, "learning_rate": 5e-06, "loss": 1.3493, "step": 29420 }, { "epoch": 1.5554557226288945, "grad_norm": 64.429443359375, "learning_rate": 5e-06, "loss": 1.3996, "step": 29430 }, { "epoch": 1.555984249887688, "grad_norm": 70.8336181640625, "learning_rate": 5e-06, "loss": 1.3744, "step": 29440 }, { "epoch": 1.5565127771464813, "grad_norm": 64.8896713256836, "learning_rate": 5e-06, "loss": 1.4004, "step": 29450 }, { "epoch": 1.5570413044052747, "grad_norm": 35.8710823059082, "learning_rate": 5e-06, "loss": 1.1241, "step": 29460 }, { "epoch": 1.557569831664068, "grad_norm": 50.84837341308594, "learning_rate": 5e-06, "loss": 1.4627, "step": 29470 }, { "epoch": 1.5580983589228614, "grad_norm": 48.35868453979492, "learning_rate": 5e-06, "loss": 1.3611, "step": 29480 }, { "epoch": 1.5586268861816548, "grad_norm": 80.40444946289062, "learning_rate": 5e-06, "loss": 1.658, "step": 29490 }, { "epoch": 1.5591554134404482, "grad_norm": 62.14797592163086, "learning_rate": 5e-06, "loss": 1.5111, "step": 29500 }, { "epoch": 1.5596839406992415, "grad_norm": 56.41849899291992, "learning_rate": 5e-06, "loss": 1.5428, "step": 29510 }, { "epoch": 1.560212467958035, "grad_norm": 51.37146759033203, "learning_rate": 5e-06, "loss": 0.9378, "step": 29520 }, { "epoch": 1.5607409952168283, "grad_norm": 74.2347412109375, "learning_rate": 5e-06, "loss": 1.1632, "step": 29530 }, { "epoch": 1.5612695224756217, "grad_norm": 70.72118377685547, "learning_rate": 5e-06, "loss": 1.3896, "step": 29540 }, { "epoch": 1.561798049734415, "grad_norm": 69.27629089355469, "learning_rate": 5e-06, "loss": 1.0985, "step": 29550 }, { "epoch": 1.5623265769932084, "grad_norm": 77.54151153564453, "learning_rate": 5e-06, "loss": 1.6079, "step": 29560 }, { "epoch": 1.5628551042520018, "grad_norm": 60.94023895263672, "learning_rate": 5e-06, "loss": 1.4736, "step": 29570 }, { "epoch": 1.5633836315107952, "grad_norm": 66.24431610107422, "learning_rate": 5e-06, "loss": 1.345, "step": 29580 }, { "epoch": 1.5639121587695886, "grad_norm": 63.217315673828125, "learning_rate": 5e-06, "loss": 1.3821, "step": 29590 }, { "epoch": 1.564440686028382, "grad_norm": 58.715492248535156, "learning_rate": 5e-06, "loss": 1.5268, "step": 29600 }, { "epoch": 1.5649692132871753, "grad_norm": 50.138065338134766, "learning_rate": 5e-06, "loss": 1.3407, "step": 29610 }, { "epoch": 1.5654977405459687, "grad_norm": 40.153648376464844, "learning_rate": 5e-06, "loss": 1.2444, "step": 29620 }, { "epoch": 1.566026267804762, "grad_norm": 76.60353088378906, "learning_rate": 5e-06, "loss": 1.6328, "step": 29630 }, { "epoch": 1.5665547950635554, "grad_norm": 66.57382202148438, "learning_rate": 5e-06, "loss": 1.5231, "step": 29640 }, { "epoch": 1.5670833223223488, "grad_norm": 46.95983123779297, "learning_rate": 5e-06, "loss": 1.0568, "step": 29650 }, { "epoch": 1.5676118495811422, "grad_norm": 49.712032318115234, "learning_rate": 5e-06, "loss": 1.1481, "step": 29660 }, { "epoch": 1.5681403768399356, "grad_norm": 111.58517456054688, "learning_rate": 5e-06, "loss": 1.3669, "step": 29670 }, { "epoch": 1.568668904098729, "grad_norm": 100.73651885986328, "learning_rate": 5e-06, "loss": 1.5947, "step": 29680 }, { "epoch": 1.569197431357522, "grad_norm": 40.81956100463867, "learning_rate": 5e-06, "loss": 1.3724, "step": 29690 }, { "epoch": 1.5697259586163157, "grad_norm": 55.898258209228516, "learning_rate": 5e-06, "loss": 1.1477, "step": 29700 }, { "epoch": 1.5702544858751089, "grad_norm": 52.74946594238281, "learning_rate": 5e-06, "loss": 1.5329, "step": 29710 }, { "epoch": 1.5707830131339025, "grad_norm": 47.5594367980957, "learning_rate": 5e-06, "loss": 1.2207, "step": 29720 }, { "epoch": 1.5713115403926956, "grad_norm": 44.3275146484375, "learning_rate": 5e-06, "loss": 1.3829, "step": 29730 }, { "epoch": 1.5718400676514892, "grad_norm": 50.91968536376953, "learning_rate": 5e-06, "loss": 1.3129, "step": 29740 }, { "epoch": 1.5723685949102824, "grad_norm": 54.54525375366211, "learning_rate": 5e-06, "loss": 1.5572, "step": 29750 }, { "epoch": 1.572897122169076, "grad_norm": 68.40715789794922, "learning_rate": 5e-06, "loss": 1.6106, "step": 29760 }, { "epoch": 1.573425649427869, "grad_norm": 48.00406265258789, "learning_rate": 5e-06, "loss": 1.2388, "step": 29770 }, { "epoch": 1.5739541766866627, "grad_norm": 35.843101501464844, "learning_rate": 5e-06, "loss": 1.4725, "step": 29780 }, { "epoch": 1.5744827039454559, "grad_norm": 44.799930572509766, "learning_rate": 5e-06, "loss": 1.1048, "step": 29790 }, { "epoch": 1.5750112312042495, "grad_norm": 53.73744583129883, "learning_rate": 5e-06, "loss": 1.5576, "step": 29800 }, { "epoch": 1.5755397584630426, "grad_norm": 54.23309326171875, "learning_rate": 5e-06, "loss": 1.0512, "step": 29810 }, { "epoch": 1.5760682857218362, "grad_norm": 58.76167678833008, "learning_rate": 5e-06, "loss": 1.2913, "step": 29820 }, { "epoch": 1.5765968129806294, "grad_norm": 101.67318725585938, "learning_rate": 5e-06, "loss": 1.3817, "step": 29830 }, { "epoch": 1.577125340239423, "grad_norm": 47.368263244628906, "learning_rate": 5e-06, "loss": 1.4043, "step": 29840 }, { "epoch": 1.5776538674982161, "grad_norm": 67.45284271240234, "learning_rate": 5e-06, "loss": 1.157, "step": 29850 }, { "epoch": 1.5781823947570097, "grad_norm": 47.79405975341797, "learning_rate": 5e-06, "loss": 1.5192, "step": 29860 }, { "epoch": 1.5787109220158029, "grad_norm": 30.019922256469727, "learning_rate": 5e-06, "loss": 0.9336, "step": 29870 }, { "epoch": 1.5792394492745965, "grad_norm": 53.92766189575195, "learning_rate": 5e-06, "loss": 1.2002, "step": 29880 }, { "epoch": 1.5797679765333896, "grad_norm": 42.521934509277344, "learning_rate": 5e-06, "loss": 1.2019, "step": 29890 }, { "epoch": 1.5802965037921832, "grad_norm": 60.6068000793457, "learning_rate": 5e-06, "loss": 1.349, "step": 29900 }, { "epoch": 1.5808250310509764, "grad_norm": 70.68143463134766, "learning_rate": 5e-06, "loss": 1.4147, "step": 29910 }, { "epoch": 1.58135355830977, "grad_norm": 33.84113311767578, "learning_rate": 5e-06, "loss": 1.3247, "step": 29920 }, { "epoch": 1.5818820855685631, "grad_norm": 54.916019439697266, "learning_rate": 5e-06, "loss": 1.4987, "step": 29930 }, { "epoch": 1.5824106128273567, "grad_norm": 47.99659729003906, "learning_rate": 5e-06, "loss": 1.4946, "step": 29940 }, { "epoch": 1.5829391400861499, "grad_norm": 73.64134979248047, "learning_rate": 5e-06, "loss": 1.255, "step": 29950 }, { "epoch": 1.5834676673449433, "grad_norm": 61.11103439331055, "learning_rate": 5e-06, "loss": 1.4548, "step": 29960 }, { "epoch": 1.5839961946037366, "grad_norm": 60.55287170410156, "learning_rate": 5e-06, "loss": 1.2363, "step": 29970 }, { "epoch": 1.58452472186253, "grad_norm": 43.860191345214844, "learning_rate": 5e-06, "loss": 1.2792, "step": 29980 }, { "epoch": 1.5850532491213234, "grad_norm": 58.432613372802734, "learning_rate": 5e-06, "loss": 1.3211, "step": 29990 }, { "epoch": 1.5855817763801168, "grad_norm": 57.26395034790039, "learning_rate": 5e-06, "loss": 1.2269, "step": 30000 }, { "epoch": 1.5855817763801168, "eval_loss": 1.3787765502929688, "eval_runtime": 35.0324, "eval_samples_per_second": 349.163, "eval_steps_per_second": 10.933, "eval_sts-dev_pearson_cosine": 0.8485993432063514, "eval_sts-dev_pearson_dot": 0.821508705767839, "eval_sts-dev_pearson_euclidean": 0.8503866777056015, "eval_sts-dev_pearson_manhattan": 0.8503971233067591, "eval_sts-dev_pearson_max": 0.8503971233067591, "eval_sts-dev_spearman_cosine": 0.8511564231200586, "eval_sts-dev_spearman_dot": 0.8179834579769948, "eval_sts-dev_spearman_euclidean": 0.8526551602530507, "eval_sts-dev_spearman_manhattan": 0.8524113761374241, "eval_sts-dev_spearman_max": 0.8526551602530507, "step": 30000 }, { "epoch": 1.5861103036389101, "grad_norm": 53.835060119628906, "learning_rate": 5e-06, "loss": 1.2721, "step": 30010 }, { "epoch": 1.5866388308977035, "grad_norm": 35.983680725097656, "learning_rate": 5e-06, "loss": 1.1382, "step": 30020 }, { "epoch": 1.587167358156497, "grad_norm": 59.50437545776367, "learning_rate": 5e-06, "loss": 1.5108, "step": 30030 }, { "epoch": 1.5876958854152903, "grad_norm": 44.929264068603516, "learning_rate": 5e-06, "loss": 1.1075, "step": 30040 }, { "epoch": 1.5882244126740837, "grad_norm": 30.593053817749023, "learning_rate": 5e-06, "loss": 1.0788, "step": 30050 }, { "epoch": 1.588752939932877, "grad_norm": 53.479862213134766, "learning_rate": 5e-06, "loss": 1.095, "step": 30060 }, { "epoch": 1.5892814671916704, "grad_norm": 81.60130310058594, "learning_rate": 5e-06, "loss": 1.383, "step": 30070 }, { "epoch": 1.5898099944504638, "grad_norm": 55.262882232666016, "learning_rate": 5e-06, "loss": 1.1837, "step": 30080 }, { "epoch": 1.5903385217092572, "grad_norm": 39.386810302734375, "learning_rate": 5e-06, "loss": 1.7932, "step": 30090 }, { "epoch": 1.5908670489680505, "grad_norm": 74.49403381347656, "learning_rate": 5e-06, "loss": 1.4518, "step": 30100 }, { "epoch": 1.591395576226844, "grad_norm": 69.48778533935547, "learning_rate": 5e-06, "loss": 1.091, "step": 30110 }, { "epoch": 1.5919241034856373, "grad_norm": 70.13570404052734, "learning_rate": 5e-06, "loss": 1.4514, "step": 30120 }, { "epoch": 1.5924526307444307, "grad_norm": 70.60948944091797, "learning_rate": 5e-06, "loss": 1.38, "step": 30130 }, { "epoch": 1.592981158003224, "grad_norm": 50.875789642333984, "learning_rate": 5e-06, "loss": 1.2101, "step": 30140 }, { "epoch": 1.5935096852620174, "grad_norm": 71.30445098876953, "learning_rate": 5e-06, "loss": 1.2978, "step": 30150 }, { "epoch": 1.5940382125208108, "grad_norm": 73.96467590332031, "learning_rate": 5e-06, "loss": 1.7752, "step": 30160 }, { "epoch": 1.5945667397796042, "grad_norm": 45.15138244628906, "learning_rate": 5e-06, "loss": 1.0281, "step": 30170 }, { "epoch": 1.5950952670383975, "grad_norm": 35.121315002441406, "learning_rate": 5e-06, "loss": 1.2781, "step": 30180 }, { "epoch": 1.595623794297191, "grad_norm": 48.5032844543457, "learning_rate": 5e-06, "loss": 1.5373, "step": 30190 }, { "epoch": 1.5961523215559843, "grad_norm": 52.103736877441406, "learning_rate": 5e-06, "loss": 1.3072, "step": 30200 }, { "epoch": 1.5966808488147777, "grad_norm": 40.23427963256836, "learning_rate": 5e-06, "loss": 1.1249, "step": 30210 }, { "epoch": 1.5972093760735708, "grad_norm": 45.18414306640625, "learning_rate": 5e-06, "loss": 1.0415, "step": 30220 }, { "epoch": 1.5977379033323644, "grad_norm": 69.71050262451172, "learning_rate": 5e-06, "loss": 1.4108, "step": 30230 }, { "epoch": 1.5982664305911576, "grad_norm": 58.08211135864258, "learning_rate": 5e-06, "loss": 1.3541, "step": 30240 }, { "epoch": 1.5987949578499512, "grad_norm": 52.97196578979492, "learning_rate": 5e-06, "loss": 1.2935, "step": 30250 }, { "epoch": 1.5993234851087443, "grad_norm": 42.14303207397461, "learning_rate": 5e-06, "loss": 1.5353, "step": 30260 }, { "epoch": 1.599852012367538, "grad_norm": 56.6043701171875, "learning_rate": 5e-06, "loss": 1.4228, "step": 30270 }, { "epoch": 1.600380539626331, "grad_norm": 69.85105895996094, "learning_rate": 5e-06, "loss": 1.1913, "step": 30280 }, { "epoch": 1.6009090668851247, "grad_norm": 69.61620330810547, "learning_rate": 5e-06, "loss": 1.3376, "step": 30290 }, { "epoch": 1.6014375941439178, "grad_norm": 52.623634338378906, "learning_rate": 5e-06, "loss": 1.2142, "step": 30300 }, { "epoch": 1.6019661214027114, "grad_norm": 53.93655014038086, "learning_rate": 5e-06, "loss": 1.3493, "step": 30310 }, { "epoch": 1.6024946486615046, "grad_norm": 72.93072509765625, "learning_rate": 5e-06, "loss": 1.5418, "step": 30320 }, { "epoch": 1.6030231759202982, "grad_norm": 78.06774139404297, "learning_rate": 5e-06, "loss": 1.3862, "step": 30330 }, { "epoch": 1.6035517031790913, "grad_norm": 51.48137664794922, "learning_rate": 5e-06, "loss": 1.222, "step": 30340 }, { "epoch": 1.604080230437885, "grad_norm": 59.20948028564453, "learning_rate": 5e-06, "loss": 1.1782, "step": 30350 }, { "epoch": 1.604608757696678, "grad_norm": 57.863460540771484, "learning_rate": 5e-06, "loss": 0.9806, "step": 30360 }, { "epoch": 1.6051372849554717, "grad_norm": 70.3982162475586, "learning_rate": 5e-06, "loss": 1.2864, "step": 30370 }, { "epoch": 1.6056658122142649, "grad_norm": 71.55467987060547, "learning_rate": 5e-06, "loss": 1.1533, "step": 30380 }, { "epoch": 1.6061943394730585, "grad_norm": 73.92507934570312, "learning_rate": 5e-06, "loss": 1.531, "step": 30390 }, { "epoch": 1.6067228667318516, "grad_norm": 77.72015380859375, "learning_rate": 5e-06, "loss": 1.3097, "step": 30400 }, { "epoch": 1.6072513939906452, "grad_norm": 48.148719787597656, "learning_rate": 5e-06, "loss": 1.2264, "step": 30410 }, { "epoch": 1.6077799212494384, "grad_norm": 40.28392028808594, "learning_rate": 5e-06, "loss": 1.2539, "step": 30420 }, { "epoch": 1.608308448508232, "grad_norm": 44.92657470703125, "learning_rate": 5e-06, "loss": 1.5092, "step": 30430 }, { "epoch": 1.6088369757670251, "grad_norm": 80.30310821533203, "learning_rate": 5e-06, "loss": 1.327, "step": 30440 }, { "epoch": 1.6093655030258187, "grad_norm": 45.3488883972168, "learning_rate": 5e-06, "loss": 1.1778, "step": 30450 }, { "epoch": 1.6098940302846119, "grad_norm": 66.64910125732422, "learning_rate": 5e-06, "loss": 1.3498, "step": 30460 }, { "epoch": 1.6104225575434055, "grad_norm": 44.3563346862793, "learning_rate": 5e-06, "loss": 1.4743, "step": 30470 }, { "epoch": 1.6109510848021986, "grad_norm": 60.04610061645508, "learning_rate": 5e-06, "loss": 1.5381, "step": 30480 }, { "epoch": 1.611479612060992, "grad_norm": 51.97053146362305, "learning_rate": 5e-06, "loss": 1.303, "step": 30490 }, { "epoch": 1.6120081393197854, "grad_norm": 62.00859069824219, "learning_rate": 5e-06, "loss": 1.4293, "step": 30500 }, { "epoch": 1.6125366665785787, "grad_norm": 94.77154541015625, "learning_rate": 5e-06, "loss": 1.1666, "step": 30510 }, { "epoch": 1.6130651938373721, "grad_norm": 51.846168518066406, "learning_rate": 5e-06, "loss": 1.4963, "step": 30520 }, { "epoch": 1.6135937210961655, "grad_norm": 78.77046966552734, "learning_rate": 5e-06, "loss": 1.1797, "step": 30530 }, { "epoch": 1.6141222483549589, "grad_norm": 53.78853988647461, "learning_rate": 5e-06, "loss": 1.1652, "step": 30540 }, { "epoch": 1.6146507756137523, "grad_norm": 41.021827697753906, "learning_rate": 5e-06, "loss": 0.9379, "step": 30550 }, { "epoch": 1.6151793028725456, "grad_norm": 56.2506103515625, "learning_rate": 5e-06, "loss": 1.2076, "step": 30560 }, { "epoch": 1.615707830131339, "grad_norm": 64.29259490966797, "learning_rate": 5e-06, "loss": 1.0748, "step": 30570 }, { "epoch": 1.6162363573901324, "grad_norm": 59.780609130859375, "learning_rate": 5e-06, "loss": 1.213, "step": 30580 }, { "epoch": 1.6167648846489258, "grad_norm": 55.08890151977539, "learning_rate": 5e-06, "loss": 1.4922, "step": 30590 }, { "epoch": 1.6172934119077191, "grad_norm": 55.9611930847168, "learning_rate": 5e-06, "loss": 1.3427, "step": 30600 }, { "epoch": 1.6178219391665125, "grad_norm": 63.1608772277832, "learning_rate": 5e-06, "loss": 1.5222, "step": 30610 }, { "epoch": 1.618350466425306, "grad_norm": 67.90170288085938, "learning_rate": 5e-06, "loss": 1.1987, "step": 30620 }, { "epoch": 1.6188789936840993, "grad_norm": 57.92901611328125, "learning_rate": 5e-06, "loss": 1.1699, "step": 30630 }, { "epoch": 1.6194075209428926, "grad_norm": 56.54025650024414, "learning_rate": 5e-06, "loss": 1.2812, "step": 30640 }, { "epoch": 1.619936048201686, "grad_norm": 66.2483901977539, "learning_rate": 5e-06, "loss": 1.3655, "step": 30650 }, { "epoch": 1.6204645754604794, "grad_norm": 68.03911590576172, "learning_rate": 5e-06, "loss": 1.1224, "step": 30660 }, { "epoch": 1.6209931027192728, "grad_norm": 61.53436279296875, "learning_rate": 5e-06, "loss": 1.2459, "step": 30670 }, { "epoch": 1.6215216299780661, "grad_norm": 61.15830993652344, "learning_rate": 5e-06, "loss": 1.3892, "step": 30680 }, { "epoch": 1.6220501572368595, "grad_norm": 73.14398956298828, "learning_rate": 5e-06, "loss": 1.1979, "step": 30690 }, { "epoch": 1.622578684495653, "grad_norm": 64.87484741210938, "learning_rate": 5e-06, "loss": 1.4339, "step": 30700 }, { "epoch": 1.6231072117544463, "grad_norm": 32.464988708496094, "learning_rate": 5e-06, "loss": 1.2799, "step": 30710 }, { "epoch": 1.6236357390132397, "grad_norm": 79.91792297363281, "learning_rate": 5e-06, "loss": 1.0471, "step": 30720 }, { "epoch": 1.624164266272033, "grad_norm": 52.33320236206055, "learning_rate": 5e-06, "loss": 1.3155, "step": 30730 }, { "epoch": 1.6246927935308264, "grad_norm": 80.93895721435547, "learning_rate": 5e-06, "loss": 1.1514, "step": 30740 }, { "epoch": 1.6252213207896196, "grad_norm": 55.318328857421875, "learning_rate": 5e-06, "loss": 1.146, "step": 30750 }, { "epoch": 1.6257498480484132, "grad_norm": 73.6034927368164, "learning_rate": 5e-06, "loss": 1.2075, "step": 30760 }, { "epoch": 1.6262783753072063, "grad_norm": 71.53067779541016, "learning_rate": 5e-06, "loss": 1.6122, "step": 30770 }, { "epoch": 1.626806902566, "grad_norm": 65.3229751586914, "learning_rate": 5e-06, "loss": 1.1288, "step": 30780 }, { "epoch": 1.627335429824793, "grad_norm": 76.7731704711914, "learning_rate": 5e-06, "loss": 1.5738, "step": 30790 }, { "epoch": 1.6278639570835867, "grad_norm": 62.199642181396484, "learning_rate": 5e-06, "loss": 1.2474, "step": 30800 }, { "epoch": 1.6283924843423798, "grad_norm": 49.87674331665039, "learning_rate": 5e-06, "loss": 1.2063, "step": 30810 }, { "epoch": 1.6289210116011734, "grad_norm": 44.75525665283203, "learning_rate": 5e-06, "loss": 1.4932, "step": 30820 }, { "epoch": 1.6294495388599666, "grad_norm": 75.62806701660156, "learning_rate": 5e-06, "loss": 1.448, "step": 30830 }, { "epoch": 1.6299780661187602, "grad_norm": 44.79450607299805, "learning_rate": 5e-06, "loss": 1.1905, "step": 30840 }, { "epoch": 1.6305065933775533, "grad_norm": 85.97756958007812, "learning_rate": 5e-06, "loss": 1.2403, "step": 30850 }, { "epoch": 1.631035120636347, "grad_norm": 38.89265441894531, "learning_rate": 5e-06, "loss": 1.0189, "step": 30860 }, { "epoch": 1.63156364789514, "grad_norm": 54.63916015625, "learning_rate": 5e-06, "loss": 1.535, "step": 30870 }, { "epoch": 1.6320921751539337, "grad_norm": 30.033679962158203, "learning_rate": 5e-06, "loss": 0.9821, "step": 30880 }, { "epoch": 1.6326207024127268, "grad_norm": 38.61416244506836, "learning_rate": 5e-06, "loss": 1.1211, "step": 30890 }, { "epoch": 1.6331492296715204, "grad_norm": 79.64067077636719, "learning_rate": 5e-06, "loss": 1.5806, "step": 30900 }, { "epoch": 1.6336777569303136, "grad_norm": 56.7329216003418, "learning_rate": 5e-06, "loss": 1.7373, "step": 30910 }, { "epoch": 1.6342062841891072, "grad_norm": 63.99620056152344, "learning_rate": 5e-06, "loss": 1.3721, "step": 30920 }, { "epoch": 1.6347348114479003, "grad_norm": 29.872285842895508, "learning_rate": 5e-06, "loss": 1.2261, "step": 30930 }, { "epoch": 1.635263338706694, "grad_norm": 48.37982940673828, "learning_rate": 5e-06, "loss": 1.1965, "step": 30940 }, { "epoch": 1.635791865965487, "grad_norm": 65.16964721679688, "learning_rate": 5e-06, "loss": 1.1681, "step": 30950 }, { "epoch": 1.6363203932242807, "grad_norm": 62.50584030151367, "learning_rate": 5e-06, "loss": 1.2049, "step": 30960 }, { "epoch": 1.6368489204830738, "grad_norm": 43.75497055053711, "learning_rate": 5e-06, "loss": 1.3337, "step": 30970 }, { "epoch": 1.6373774477418674, "grad_norm": 53.50751495361328, "learning_rate": 5e-06, "loss": 1.2995, "step": 30980 }, { "epoch": 1.6379059750006606, "grad_norm": 69.6371841430664, "learning_rate": 5e-06, "loss": 1.1166, "step": 30990 }, { "epoch": 1.6384345022594542, "grad_norm": 49.90292739868164, "learning_rate": 5e-06, "loss": 1.2994, "step": 31000 }, { "epoch": 1.6384345022594542, "eval_loss": 1.343705654144287, "eval_runtime": 35.4662, "eval_samples_per_second": 344.892, "eval_steps_per_second": 10.799, "eval_sts-dev_pearson_cosine": 0.8456583735108788, "eval_sts-dev_pearson_dot": 0.8236050566115587, "eval_sts-dev_pearson_euclidean": 0.8475151899406401, "eval_sts-dev_pearson_manhattan": 0.8474676914786333, "eval_sts-dev_pearson_max": 0.8475151899406401, "eval_sts-dev_spearman_cosine": 0.8478766887868737, "eval_sts-dev_spearman_dot": 0.8207058298780999, "eval_sts-dev_spearman_euclidean": 0.8495885913413754, "eval_sts-dev_spearman_manhattan": 0.8491595897310438, "eval_sts-dev_spearman_max": 0.8495885913413754, "step": 31000 }, { "epoch": 1.6389630295182473, "grad_norm": 55.60801696777344, "learning_rate": 5e-06, "loss": 1.2314, "step": 31010 }, { "epoch": 1.6394915567770407, "grad_norm": 63.608497619628906, "learning_rate": 5e-06, "loss": 1.451, "step": 31020 }, { "epoch": 1.640020084035834, "grad_norm": 50.279685974121094, "learning_rate": 5e-06, "loss": 1.1377, "step": 31030 }, { "epoch": 1.6405486112946275, "grad_norm": 47.23553466796875, "learning_rate": 5e-06, "loss": 1.2627, "step": 31040 }, { "epoch": 1.6410771385534209, "grad_norm": 34.774696350097656, "learning_rate": 5e-06, "loss": 1.0399, "step": 31050 }, { "epoch": 1.6416056658122142, "grad_norm": 58.4742317199707, "learning_rate": 5e-06, "loss": 1.219, "step": 31060 }, { "epoch": 1.6421341930710076, "grad_norm": 79.4696044921875, "learning_rate": 5e-06, "loss": 1.381, "step": 31070 }, { "epoch": 1.642662720329801, "grad_norm": 68.21162414550781, "learning_rate": 5e-06, "loss": 1.4769, "step": 31080 }, { "epoch": 1.6431912475885944, "grad_norm": 35.992881774902344, "learning_rate": 5e-06, "loss": 1.1268, "step": 31090 }, { "epoch": 1.6437197748473877, "grad_norm": 80.10655212402344, "learning_rate": 5e-06, "loss": 1.446, "step": 31100 }, { "epoch": 1.6442483021061811, "grad_norm": 53.28129196166992, "learning_rate": 5e-06, "loss": 1.2527, "step": 31110 }, { "epoch": 1.6447768293649745, "grad_norm": 35.730873107910156, "learning_rate": 5e-06, "loss": 1.2776, "step": 31120 }, { "epoch": 1.6453053566237679, "grad_norm": 59.40654373168945, "learning_rate": 5e-06, "loss": 1.2168, "step": 31130 }, { "epoch": 1.6458338838825612, "grad_norm": 69.36235809326172, "learning_rate": 5e-06, "loss": 1.3588, "step": 31140 }, { "epoch": 1.6463624111413546, "grad_norm": 66.0289077758789, "learning_rate": 5e-06, "loss": 1.3801, "step": 31150 }, { "epoch": 1.646890938400148, "grad_norm": 61.658756256103516, "learning_rate": 5e-06, "loss": 1.1347, "step": 31160 }, { "epoch": 1.6474194656589414, "grad_norm": 42.461570739746094, "learning_rate": 5e-06, "loss": 1.2991, "step": 31170 }, { "epoch": 1.6479479929177347, "grad_norm": 46.57524108886719, "learning_rate": 5e-06, "loss": 1.356, "step": 31180 }, { "epoch": 1.6484765201765281, "grad_norm": 58.66014862060547, "learning_rate": 5e-06, "loss": 1.2072, "step": 31190 }, { "epoch": 1.6490050474353215, "grad_norm": 65.98417663574219, "learning_rate": 5e-06, "loss": 1.3178, "step": 31200 }, { "epoch": 1.6495335746941149, "grad_norm": 80.14251708984375, "learning_rate": 5e-06, "loss": 1.3161, "step": 31210 }, { "epoch": 1.6500621019529083, "grad_norm": 68.35901641845703, "learning_rate": 5e-06, "loss": 1.4715, "step": 31220 }, { "epoch": 1.6505906292117016, "grad_norm": 34.18833923339844, "learning_rate": 5e-06, "loss": 1.3137, "step": 31230 }, { "epoch": 1.651119156470495, "grad_norm": 72.56924438476562, "learning_rate": 5e-06, "loss": 1.2613, "step": 31240 }, { "epoch": 1.6516476837292884, "grad_norm": 152.40487670898438, "learning_rate": 5e-06, "loss": 1.2838, "step": 31250 }, { "epoch": 1.6521762109880818, "grad_norm": 48.45184326171875, "learning_rate": 5e-06, "loss": 1.5064, "step": 31260 }, { "epoch": 1.6527047382468751, "grad_norm": 70.47327423095703, "learning_rate": 5e-06, "loss": 1.2744, "step": 31270 }, { "epoch": 1.6532332655056683, "grad_norm": 82.3625259399414, "learning_rate": 5e-06, "loss": 1.3993, "step": 31280 }, { "epoch": 1.653761792764462, "grad_norm": 76.85757446289062, "learning_rate": 5e-06, "loss": 1.4096, "step": 31290 }, { "epoch": 1.654290320023255, "grad_norm": 58.043609619140625, "learning_rate": 5e-06, "loss": 1.5254, "step": 31300 }, { "epoch": 1.6548188472820486, "grad_norm": 68.23970794677734, "learning_rate": 5e-06, "loss": 1.188, "step": 31310 }, { "epoch": 1.6553473745408418, "grad_norm": 49.635616302490234, "learning_rate": 5e-06, "loss": 1.2327, "step": 31320 }, { "epoch": 1.6558759017996354, "grad_norm": 63.63436508178711, "learning_rate": 5e-06, "loss": 1.1061, "step": 31330 }, { "epoch": 1.6564044290584286, "grad_norm": 63.434940338134766, "learning_rate": 5e-06, "loss": 1.026, "step": 31340 }, { "epoch": 1.6569329563172221, "grad_norm": 60.4963264465332, "learning_rate": 5e-06, "loss": 1.2995, "step": 31350 }, { "epoch": 1.6574614835760153, "grad_norm": 69.485107421875, "learning_rate": 5e-06, "loss": 1.4329, "step": 31360 }, { "epoch": 1.657990010834809, "grad_norm": 66.54202270507812, "learning_rate": 5e-06, "loss": 1.2681, "step": 31370 }, { "epoch": 1.658518538093602, "grad_norm": 44.29518127441406, "learning_rate": 5e-06, "loss": 1.3633, "step": 31380 }, { "epoch": 1.6590470653523957, "grad_norm": 61.27717208862305, "learning_rate": 5e-06, "loss": 1.443, "step": 31390 }, { "epoch": 1.6595755926111888, "grad_norm": 50.850276947021484, "learning_rate": 5e-06, "loss": 1.3547, "step": 31400 }, { "epoch": 1.6601041198699824, "grad_norm": 48.65614318847656, "learning_rate": 5e-06, "loss": 1.2772, "step": 31410 }, { "epoch": 1.6606326471287756, "grad_norm": 85.38390350341797, "learning_rate": 5e-06, "loss": 1.4519, "step": 31420 }, { "epoch": 1.6611611743875692, "grad_norm": 73.85458374023438, "learning_rate": 5e-06, "loss": 1.4909, "step": 31430 }, { "epoch": 1.6616897016463623, "grad_norm": 38.652889251708984, "learning_rate": 5e-06, "loss": 1.2438, "step": 31440 }, { "epoch": 1.662218228905156, "grad_norm": 47.95797348022461, "learning_rate": 5e-06, "loss": 0.971, "step": 31450 }, { "epoch": 1.662746756163949, "grad_norm": 82.71182250976562, "learning_rate": 5e-06, "loss": 1.1472, "step": 31460 }, { "epoch": 1.6632752834227427, "grad_norm": 43.13788986206055, "learning_rate": 5e-06, "loss": 1.4384, "step": 31470 }, { "epoch": 1.6638038106815358, "grad_norm": 66.79840087890625, "learning_rate": 5e-06, "loss": 1.5068, "step": 31480 }, { "epoch": 1.6643323379403294, "grad_norm": 63.88251876831055, "learning_rate": 5e-06, "loss": 1.3266, "step": 31490 }, { "epoch": 1.6648608651991226, "grad_norm": 64.70431518554688, "learning_rate": 5e-06, "loss": 1.3617, "step": 31500 }, { "epoch": 1.6653893924579162, "grad_norm": 50.61076354980469, "learning_rate": 5e-06, "loss": 1.1555, "step": 31510 }, { "epoch": 1.6659179197167093, "grad_norm": 68.80541229248047, "learning_rate": 5e-06, "loss": 1.5435, "step": 31520 }, { "epoch": 1.666446446975503, "grad_norm": 55.1401481628418, "learning_rate": 5e-06, "loss": 1.1828, "step": 31530 }, { "epoch": 1.666974974234296, "grad_norm": 46.76604461669922, "learning_rate": 5e-06, "loss": 1.4354, "step": 31540 }, { "epoch": 1.6675035014930895, "grad_norm": 54.417232513427734, "learning_rate": 5e-06, "loss": 1.3439, "step": 31550 }, { "epoch": 1.6680320287518828, "grad_norm": 82.51524353027344, "learning_rate": 5e-06, "loss": 1.4905, "step": 31560 }, { "epoch": 1.6685605560106762, "grad_norm": 45.966800689697266, "learning_rate": 5e-06, "loss": 1.4144, "step": 31570 }, { "epoch": 1.6690890832694696, "grad_norm": 62.57585525512695, "learning_rate": 5e-06, "loss": 1.4134, "step": 31580 }, { "epoch": 1.669617610528263, "grad_norm": 57.7529296875, "learning_rate": 5e-06, "loss": 1.2558, "step": 31590 }, { "epoch": 1.6701461377870563, "grad_norm": 58.01054763793945, "learning_rate": 5e-06, "loss": 1.1733, "step": 31600 }, { "epoch": 1.6706746650458497, "grad_norm": 54.53889465332031, "learning_rate": 5e-06, "loss": 1.0385, "step": 31610 }, { "epoch": 1.671203192304643, "grad_norm": 59.964759826660156, "learning_rate": 5e-06, "loss": 1.169, "step": 31620 }, { "epoch": 1.6717317195634365, "grad_norm": 81.3023452758789, "learning_rate": 5e-06, "loss": 1.1041, "step": 31630 }, { "epoch": 1.6722602468222298, "grad_norm": 41.24848937988281, "learning_rate": 5e-06, "loss": 1.0, "step": 31640 }, { "epoch": 1.6727887740810232, "grad_norm": 75.99176025390625, "learning_rate": 5e-06, "loss": 1.1679, "step": 31650 }, { "epoch": 1.6733173013398166, "grad_norm": 57.96307373046875, "learning_rate": 5e-06, "loss": 1.5827, "step": 31660 }, { "epoch": 1.67384582859861, "grad_norm": 335.76007080078125, "learning_rate": 5e-06, "loss": 1.1991, "step": 31670 }, { "epoch": 1.6743743558574034, "grad_norm": 64.96198272705078, "learning_rate": 5e-06, "loss": 1.3072, "step": 31680 }, { "epoch": 1.6749028831161967, "grad_norm": 51.96187973022461, "learning_rate": 5e-06, "loss": 1.2697, "step": 31690 }, { "epoch": 1.67543141037499, "grad_norm": 58.741878509521484, "learning_rate": 5e-06, "loss": 1.3298, "step": 31700 }, { "epoch": 1.6759599376337835, "grad_norm": 82.67789459228516, "learning_rate": 5e-06, "loss": 1.4215, "step": 31710 }, { "epoch": 1.6764884648925769, "grad_norm": 64.91748046875, "learning_rate": 5e-06, "loss": 1.4405, "step": 31720 }, { "epoch": 1.6770169921513702, "grad_norm": 42.15787124633789, "learning_rate": 5e-06, "loss": 1.4729, "step": 31730 }, { "epoch": 1.6775455194101636, "grad_norm": 62.20142364501953, "learning_rate": 5e-06, "loss": 1.297, "step": 31740 }, { "epoch": 1.678074046668957, "grad_norm": 66.71304321289062, "learning_rate": 5e-06, "loss": 1.2696, "step": 31750 }, { "epoch": 1.6786025739277504, "grad_norm": 53.9964599609375, "learning_rate": 5e-06, "loss": 1.2869, "step": 31760 }, { "epoch": 1.6791311011865437, "grad_norm": 65.32472229003906, "learning_rate": 5e-06, "loss": 1.1183, "step": 31770 }, { "epoch": 1.6796596284453371, "grad_norm": 42.30437088012695, "learning_rate": 5e-06, "loss": 1.5437, "step": 31780 }, { "epoch": 1.6801881557041305, "grad_norm": 51.85568618774414, "learning_rate": 5e-06, "loss": 1.1567, "step": 31790 }, { "epoch": 1.6807166829629239, "grad_norm": 47.79453659057617, "learning_rate": 5e-06, "loss": 1.2295, "step": 31800 }, { "epoch": 1.681245210221717, "grad_norm": 67.43780517578125, "learning_rate": 5e-06, "loss": 1.146, "step": 31810 }, { "epoch": 1.6817737374805106, "grad_norm": 59.61632537841797, "learning_rate": 5e-06, "loss": 1.3423, "step": 31820 }, { "epoch": 1.6823022647393038, "grad_norm": 45.67245101928711, "learning_rate": 5e-06, "loss": 1.0198, "step": 31830 }, { "epoch": 1.6828307919980974, "grad_norm": 111.51473999023438, "learning_rate": 5e-06, "loss": 1.5291, "step": 31840 }, { "epoch": 1.6833593192568905, "grad_norm": 55.78244400024414, "learning_rate": 5e-06, "loss": 1.4157, "step": 31850 }, { "epoch": 1.6838878465156841, "grad_norm": 67.63272094726562, "learning_rate": 5e-06, "loss": 1.1891, "step": 31860 }, { "epoch": 1.6844163737744773, "grad_norm": 54.22568893432617, "learning_rate": 5e-06, "loss": 1.5837, "step": 31870 }, { "epoch": 1.6849449010332709, "grad_norm": 81.06907653808594, "learning_rate": 5e-06, "loss": 1.3803, "step": 31880 }, { "epoch": 1.685473428292064, "grad_norm": 56.033668518066406, "learning_rate": 5e-06, "loss": 1.2828, "step": 31890 }, { "epoch": 1.6860019555508576, "grad_norm": 23.683719635009766, "learning_rate": 5e-06, "loss": 1.1052, "step": 31900 }, { "epoch": 1.6865304828096508, "grad_norm": 43.42954635620117, "learning_rate": 5e-06, "loss": 1.2808, "step": 31910 }, { "epoch": 1.6870590100684444, "grad_norm": 53.197750091552734, "learning_rate": 5e-06, "loss": 1.3, "step": 31920 }, { "epoch": 1.6875875373272375, "grad_norm": 65.53741455078125, "learning_rate": 5e-06, "loss": 1.2605, "step": 31930 }, { "epoch": 1.6881160645860311, "grad_norm": 67.16991424560547, "learning_rate": 5e-06, "loss": 1.5225, "step": 31940 }, { "epoch": 1.6886445918448243, "grad_norm": 42.880699157714844, "learning_rate": 5e-06, "loss": 1.4001, "step": 31950 }, { "epoch": 1.689173119103618, "grad_norm": 63.90024185180664, "learning_rate": 5e-06, "loss": 1.0204, "step": 31960 }, { "epoch": 1.689701646362411, "grad_norm": 58.075557708740234, "learning_rate": 5e-06, "loss": 1.0752, "step": 31970 }, { "epoch": 1.6902301736212046, "grad_norm": 57.784095764160156, "learning_rate": 5e-06, "loss": 1.3884, "step": 31980 }, { "epoch": 1.6907587008799978, "grad_norm": 35.74909210205078, "learning_rate": 5e-06, "loss": 1.034, "step": 31990 }, { "epoch": 1.6912872281387914, "grad_norm": 83.82180786132812, "learning_rate": 5e-06, "loss": 1.3779, "step": 32000 }, { "epoch": 1.6912872281387914, "eval_loss": 1.330617070198059, "eval_runtime": 36.5769, "eval_samples_per_second": 334.419, "eval_steps_per_second": 10.471, "eval_sts-dev_pearson_cosine": 0.8512591671243915, "eval_sts-dev_pearson_dot": 0.8280287768597405, "eval_sts-dev_pearson_euclidean": 0.8503952072107469, "eval_sts-dev_pearson_manhattan": 0.8499657449761426, "eval_sts-dev_pearson_max": 0.8512591671243915, "eval_sts-dev_spearman_cosine": 0.8524808480032621, "eval_sts-dev_spearman_dot": 0.8246939649735996, "eval_sts-dev_spearman_euclidean": 0.8525399842733375, "eval_sts-dev_spearman_manhattan": 0.8517690319327056, "eval_sts-dev_spearman_max": 0.8525399842733375, "step": 32000 }, { "epoch": 1.6918157553975846, "grad_norm": 92.42147064208984, "learning_rate": 5e-06, "loss": 1.2526, "step": 32010 }, { "epoch": 1.6923442826563782, "grad_norm": 84.5929946899414, "learning_rate": 5e-06, "loss": 1.6893, "step": 32020 }, { "epoch": 1.6928728099151713, "grad_norm": 60.460350036621094, "learning_rate": 5e-06, "loss": 1.3086, "step": 32030 }, { "epoch": 1.693401337173965, "grad_norm": 59.69585037231445, "learning_rate": 5e-06, "loss": 1.0836, "step": 32040 }, { "epoch": 1.693929864432758, "grad_norm": 70.93650817871094, "learning_rate": 5e-06, "loss": 1.3783, "step": 32050 }, { "epoch": 1.6944583916915517, "grad_norm": 51.59893035888672, "learning_rate": 5e-06, "loss": 0.9692, "step": 32060 }, { "epoch": 1.6949869189503448, "grad_norm": 59.058162689208984, "learning_rate": 5e-06, "loss": 1.1219, "step": 32070 }, { "epoch": 1.6955154462091382, "grad_norm": 79.61420440673828, "learning_rate": 5e-06, "loss": 1.2089, "step": 32080 }, { "epoch": 1.6960439734679316, "grad_norm": 72.13050842285156, "learning_rate": 5e-06, "loss": 1.2346, "step": 32090 }, { "epoch": 1.696572500726725, "grad_norm": 80.80980682373047, "learning_rate": 5e-06, "loss": 1.4697, "step": 32100 }, { "epoch": 1.6971010279855183, "grad_norm": 41.185035705566406, "learning_rate": 5e-06, "loss": 1.3924, "step": 32110 }, { "epoch": 1.6976295552443117, "grad_norm": 42.05282211303711, "learning_rate": 5e-06, "loss": 1.1985, "step": 32120 }, { "epoch": 1.698158082503105, "grad_norm": 62.828800201416016, "learning_rate": 5e-06, "loss": 1.0684, "step": 32130 }, { "epoch": 1.6986866097618984, "grad_norm": 54.82646942138672, "learning_rate": 5e-06, "loss": 1.6912, "step": 32140 }, { "epoch": 1.6992151370206918, "grad_norm": 65.66151428222656, "learning_rate": 5e-06, "loss": 1.1914, "step": 32150 }, { "epoch": 1.6997436642794852, "grad_norm": 50.503135681152344, "learning_rate": 5e-06, "loss": 1.3228, "step": 32160 }, { "epoch": 1.7002721915382786, "grad_norm": 73.315673828125, "learning_rate": 5e-06, "loss": 1.0678, "step": 32170 }, { "epoch": 1.700800718797072, "grad_norm": 61.23784637451172, "learning_rate": 5e-06, "loss": 1.3563, "step": 32180 }, { "epoch": 1.7013292460558653, "grad_norm": 54.984737396240234, "learning_rate": 5e-06, "loss": 1.4703, "step": 32190 }, { "epoch": 1.7018577733146587, "grad_norm": 72.98963165283203, "learning_rate": 5e-06, "loss": 1.4767, "step": 32200 }, { "epoch": 1.702386300573452, "grad_norm": 77.67941284179688, "learning_rate": 5e-06, "loss": 1.3243, "step": 32210 }, { "epoch": 1.7029148278322455, "grad_norm": 109.08268737792969, "learning_rate": 5e-06, "loss": 1.517, "step": 32220 }, { "epoch": 1.7034433550910388, "grad_norm": 67.05606079101562, "learning_rate": 5e-06, "loss": 1.2481, "step": 32230 }, { "epoch": 1.7039718823498322, "grad_norm": 48.122467041015625, "learning_rate": 5e-06, "loss": 1.6173, "step": 32240 }, { "epoch": 1.7045004096086256, "grad_norm": 62.783660888671875, "learning_rate": 5e-06, "loss": 1.502, "step": 32250 }, { "epoch": 1.705028936867419, "grad_norm": 33.61434555053711, "learning_rate": 5e-06, "loss": 1.1594, "step": 32260 }, { "epoch": 1.7055574641262123, "grad_norm": 60.95294952392578, "learning_rate": 5e-06, "loss": 1.397, "step": 32270 }, { "epoch": 1.7060859913850057, "grad_norm": 55.350059509277344, "learning_rate": 5e-06, "loss": 1.2753, "step": 32280 }, { "epoch": 1.706614518643799, "grad_norm": 54.6579704284668, "learning_rate": 5e-06, "loss": 1.0975, "step": 32290 }, { "epoch": 1.7071430459025925, "grad_norm": 50.34257125854492, "learning_rate": 5e-06, "loss": 1.2953, "step": 32300 }, { "epoch": 1.7076715731613858, "grad_norm": 53.06980895996094, "learning_rate": 5e-06, "loss": 1.2855, "step": 32310 }, { "epoch": 1.7082001004201792, "grad_norm": 51.65300369262695, "learning_rate": 5e-06, "loss": 1.0232, "step": 32320 }, { "epoch": 1.7087286276789726, "grad_norm": 51.24443435668945, "learning_rate": 5e-06, "loss": 1.2306, "step": 32330 }, { "epoch": 1.7092571549377658, "grad_norm": 43.16934585571289, "learning_rate": 5e-06, "loss": 1.2168, "step": 32340 }, { "epoch": 1.7097856821965594, "grad_norm": 88.2076644897461, "learning_rate": 5e-06, "loss": 1.0677, "step": 32350 }, { "epoch": 1.7103142094553525, "grad_norm": 76.3524169921875, "learning_rate": 5e-06, "loss": 1.4887, "step": 32360 }, { "epoch": 1.710842736714146, "grad_norm": 99.64130401611328, "learning_rate": 5e-06, "loss": 1.2089, "step": 32370 }, { "epoch": 1.7113712639729393, "grad_norm": 58.75669479370117, "learning_rate": 5e-06, "loss": 1.0732, "step": 32380 }, { "epoch": 1.7118997912317329, "grad_norm": 58.819149017333984, "learning_rate": 5e-06, "loss": 1.5187, "step": 32390 }, { "epoch": 1.712428318490526, "grad_norm": 21.212162017822266, "learning_rate": 5e-06, "loss": 1.0057, "step": 32400 }, { "epoch": 1.7129568457493196, "grad_norm": 52.7188606262207, "learning_rate": 5e-06, "loss": 1.2875, "step": 32410 }, { "epoch": 1.7134853730081128, "grad_norm": 33.70364761352539, "learning_rate": 5e-06, "loss": 1.2045, "step": 32420 }, { "epoch": 1.7140139002669064, "grad_norm": 47.88471603393555, "learning_rate": 5e-06, "loss": 1.2538, "step": 32430 }, { "epoch": 1.7145424275256995, "grad_norm": 80.36946105957031, "learning_rate": 5e-06, "loss": 1.541, "step": 32440 }, { "epoch": 1.7150709547844931, "grad_norm": 88.33899688720703, "learning_rate": 5e-06, "loss": 1.3413, "step": 32450 }, { "epoch": 1.7155994820432863, "grad_norm": 82.73915100097656, "learning_rate": 5e-06, "loss": 1.5937, "step": 32460 }, { "epoch": 1.7161280093020799, "grad_norm": 60.204689025878906, "learning_rate": 5e-06, "loss": 1.3148, "step": 32470 }, { "epoch": 1.716656536560873, "grad_norm": 63.874088287353516, "learning_rate": 5e-06, "loss": 1.4396, "step": 32480 }, { "epoch": 1.7171850638196666, "grad_norm": 57.434974670410156, "learning_rate": 5e-06, "loss": 1.3613, "step": 32490 }, { "epoch": 1.7177135910784598, "grad_norm": 59.78832244873047, "learning_rate": 5e-06, "loss": 1.6015, "step": 32500 }, { "epoch": 1.7182421183372534, "grad_norm": 65.44934844970703, "learning_rate": 5e-06, "loss": 1.2519, "step": 32510 }, { "epoch": 1.7187706455960465, "grad_norm": 90.75067901611328, "learning_rate": 5e-06, "loss": 1.2883, "step": 32520 }, { "epoch": 1.7192991728548401, "grad_norm": 47.94356918334961, "learning_rate": 5e-06, "loss": 1.1583, "step": 32530 }, { "epoch": 1.7198277001136333, "grad_norm": 46.339515686035156, "learning_rate": 5e-06, "loss": 1.2876, "step": 32540 }, { "epoch": 1.7203562273724269, "grad_norm": 32.699493408203125, "learning_rate": 5e-06, "loss": 1.2746, "step": 32550 }, { "epoch": 1.72088475463122, "grad_norm": 48.04414749145508, "learning_rate": 5e-06, "loss": 1.1851, "step": 32560 }, { "epoch": 1.7214132818900136, "grad_norm": 49.2949104309082, "learning_rate": 5e-06, "loss": 1.3465, "step": 32570 }, { "epoch": 1.7219418091488068, "grad_norm": 81.90032196044922, "learning_rate": 5e-06, "loss": 1.3775, "step": 32580 }, { "epoch": 1.7224703364076002, "grad_norm": 23.272125244140625, "learning_rate": 5e-06, "loss": 1.0096, "step": 32590 }, { "epoch": 1.7229988636663935, "grad_norm": 65.33106994628906, "learning_rate": 5e-06, "loss": 1.2655, "step": 32600 }, { "epoch": 1.723527390925187, "grad_norm": 45.76142883300781, "learning_rate": 5e-06, "loss": 1.2471, "step": 32610 }, { "epoch": 1.7240559181839803, "grad_norm": 75.01055145263672, "learning_rate": 5e-06, "loss": 1.4701, "step": 32620 }, { "epoch": 1.7245844454427737, "grad_norm": 41.546356201171875, "learning_rate": 5e-06, "loss": 1.4139, "step": 32630 }, { "epoch": 1.725112972701567, "grad_norm": 56.61280822753906, "learning_rate": 5e-06, "loss": 1.1076, "step": 32640 }, { "epoch": 1.7256414999603604, "grad_norm": 47.783714294433594, "learning_rate": 5e-06, "loss": 1.4049, "step": 32650 }, { "epoch": 1.7261700272191538, "grad_norm": 55.10177230834961, "learning_rate": 5e-06, "loss": 1.6174, "step": 32660 }, { "epoch": 1.7266985544779472, "grad_norm": 95.15694427490234, "learning_rate": 5e-06, "loss": 1.5311, "step": 32670 }, { "epoch": 1.7272270817367406, "grad_norm": 116.92245483398438, "learning_rate": 5e-06, "loss": 1.1779, "step": 32680 }, { "epoch": 1.727755608995534, "grad_norm": 55.29475021362305, "learning_rate": 5e-06, "loss": 1.4845, "step": 32690 }, { "epoch": 1.7282841362543273, "grad_norm": 70.47010040283203, "learning_rate": 5e-06, "loss": 1.1846, "step": 32700 }, { "epoch": 1.7288126635131207, "grad_norm": 56.93955612182617, "learning_rate": 5e-06, "loss": 1.2022, "step": 32710 }, { "epoch": 1.729341190771914, "grad_norm": 57.56611633300781, "learning_rate": 5e-06, "loss": 1.4357, "step": 32720 }, { "epoch": 1.7298697180307074, "grad_norm": 63.752071380615234, "learning_rate": 5e-06, "loss": 1.2632, "step": 32730 }, { "epoch": 1.7303982452895008, "grad_norm": 76.33563995361328, "learning_rate": 5e-06, "loss": 1.4441, "step": 32740 }, { "epoch": 1.7309267725482942, "grad_norm": 35.30137634277344, "learning_rate": 5e-06, "loss": 1.3508, "step": 32750 }, { "epoch": 1.7314552998070876, "grad_norm": 28.916339874267578, "learning_rate": 5e-06, "loss": 0.8306, "step": 32760 }, { "epoch": 1.731983827065881, "grad_norm": 34.01126480102539, "learning_rate": 5e-06, "loss": 1.2166, "step": 32770 }, { "epoch": 1.7325123543246743, "grad_norm": 55.19509506225586, "learning_rate": 5e-06, "loss": 1.26, "step": 32780 }, { "epoch": 1.7330408815834677, "grad_norm": 90.2646713256836, "learning_rate": 5e-06, "loss": 1.4576, "step": 32790 }, { "epoch": 1.733569408842261, "grad_norm": 60.079830169677734, "learning_rate": 5e-06, "loss": 1.2055, "step": 32800 }, { "epoch": 1.7340979361010544, "grad_norm": 83.39769744873047, "learning_rate": 5e-06, "loss": 1.3491, "step": 32810 }, { "epoch": 1.7346264633598478, "grad_norm": 66.99046325683594, "learning_rate": 5e-06, "loss": 1.4273, "step": 32820 }, { "epoch": 1.7351549906186412, "grad_norm": 58.89002990722656, "learning_rate": 5e-06, "loss": 1.5338, "step": 32830 }, { "epoch": 1.7356835178774346, "grad_norm": 59.00112533569336, "learning_rate": 5e-06, "loss": 1.3145, "step": 32840 }, { "epoch": 1.736212045136228, "grad_norm": 57.027347564697266, "learning_rate": 5e-06, "loss": 1.2469, "step": 32850 }, { "epoch": 1.7367405723950213, "grad_norm": 58.74709701538086, "learning_rate": 5e-06, "loss": 1.1801, "step": 32860 }, { "epoch": 1.7372690996538145, "grad_norm": 87.6282958984375, "learning_rate": 5e-06, "loss": 1.1369, "step": 32870 }, { "epoch": 1.737797626912608, "grad_norm": 56.77216720581055, "learning_rate": 5e-06, "loss": 1.287, "step": 32880 }, { "epoch": 1.7383261541714012, "grad_norm": 47.291168212890625, "learning_rate": 5e-06, "loss": 1.0792, "step": 32890 }, { "epoch": 1.7388546814301948, "grad_norm": 38.02457046508789, "learning_rate": 5e-06, "loss": 1.4088, "step": 32900 }, { "epoch": 1.739383208688988, "grad_norm": 40.76594161987305, "learning_rate": 5e-06, "loss": 1.344, "step": 32910 }, { "epoch": 1.7399117359477816, "grad_norm": 41.4588508605957, "learning_rate": 5e-06, "loss": 1.3965, "step": 32920 }, { "epoch": 1.7404402632065747, "grad_norm": 56.933414459228516, "learning_rate": 5e-06, "loss": 1.2203, "step": 32930 }, { "epoch": 1.7409687904653683, "grad_norm": 52.588558197021484, "learning_rate": 5e-06, "loss": 1.1422, "step": 32940 }, { "epoch": 1.7414973177241615, "grad_norm": 51.92063903808594, "learning_rate": 5e-06, "loss": 1.2204, "step": 32950 }, { "epoch": 1.742025844982955, "grad_norm": 56.55862808227539, "learning_rate": 5e-06, "loss": 0.9529, "step": 32960 }, { "epoch": 1.7425543722417483, "grad_norm": 62.046607971191406, "learning_rate": 5e-06, "loss": 1.2354, "step": 32970 }, { "epoch": 1.7430828995005418, "grad_norm": 43.24995803833008, "learning_rate": 5e-06, "loss": 1.3199, "step": 32980 }, { "epoch": 1.743611426759335, "grad_norm": 53.9385871887207, "learning_rate": 5e-06, "loss": 1.0348, "step": 32990 }, { "epoch": 1.7441399540181286, "grad_norm": 52.61133575439453, "learning_rate": 5e-06, "loss": 1.1847, "step": 33000 }, { "epoch": 1.7441399540181286, "eval_loss": 1.3215175867080688, "eval_runtime": 37.1302, "eval_samples_per_second": 329.435, "eval_steps_per_second": 10.315, "eval_sts-dev_pearson_cosine": 0.851199168334393, "eval_sts-dev_pearson_dot": 0.8321721689082475, "eval_sts-dev_pearson_euclidean": 0.8498544589149055, "eval_sts-dev_pearson_manhattan": 0.8499592908556184, "eval_sts-dev_pearson_max": 0.851199168334393, "eval_sts-dev_spearman_cosine": 0.8530344153773509, "eval_sts-dev_spearman_dot": 0.8308723190888475, "eval_sts-dev_spearman_euclidean": 0.8532407677444032, "eval_sts-dev_spearman_manhattan": 0.853096754176196, "eval_sts-dev_spearman_max": 0.8532407677444032, "step": 33000 }, { "epoch": 1.7446684812769218, "grad_norm": 67.75917053222656, "learning_rate": 5e-06, "loss": 1.2157, "step": 33010 }, { "epoch": 1.7451970085357154, "grad_norm": 59.75360107421875, "learning_rate": 5e-06, "loss": 1.2648, "step": 33020 }, { "epoch": 1.7457255357945085, "grad_norm": 70.63703918457031, "learning_rate": 5e-06, "loss": 1.2706, "step": 33030 }, { "epoch": 1.746254063053302, "grad_norm": 66.32929992675781, "learning_rate": 5e-06, "loss": 1.422, "step": 33040 }, { "epoch": 1.7467825903120953, "grad_norm": 71.07129669189453, "learning_rate": 5e-06, "loss": 1.4776, "step": 33050 }, { "epoch": 1.7473111175708889, "grad_norm": 51.606510162353516, "learning_rate": 5e-06, "loss": 1.3032, "step": 33060 }, { "epoch": 1.747839644829682, "grad_norm": 49.34293746948242, "learning_rate": 5e-06, "loss": 1.2495, "step": 33070 }, { "epoch": 1.7483681720884756, "grad_norm": 53.27204895019531, "learning_rate": 5e-06, "loss": 0.9858, "step": 33080 }, { "epoch": 1.7488966993472688, "grad_norm": 86.07818603515625, "learning_rate": 5e-06, "loss": 1.3451, "step": 33090 }, { "epoch": 1.7494252266060624, "grad_norm": 82.5360336303711, "learning_rate": 5e-06, "loss": 1.2559, "step": 33100 }, { "epoch": 1.7499537538648555, "grad_norm": 25.825977325439453, "learning_rate": 5e-06, "loss": 1.1796, "step": 33110 }, { "epoch": 1.750482281123649, "grad_norm": 55.48245620727539, "learning_rate": 5e-06, "loss": 1.1883, "step": 33120 }, { "epoch": 1.7510108083824423, "grad_norm": 53.412750244140625, "learning_rate": 5e-06, "loss": 1.2827, "step": 33130 }, { "epoch": 1.7515393356412357, "grad_norm": 25.983047485351562, "learning_rate": 5e-06, "loss": 1.0852, "step": 33140 }, { "epoch": 1.752067862900029, "grad_norm": 80.1585922241211, "learning_rate": 5e-06, "loss": 1.4255, "step": 33150 }, { "epoch": 1.7525963901588224, "grad_norm": 43.511375427246094, "learning_rate": 5e-06, "loss": 1.1468, "step": 33160 }, { "epoch": 1.7531249174176158, "grad_norm": 22.106292724609375, "learning_rate": 5e-06, "loss": 0.9698, "step": 33170 }, { "epoch": 1.7536534446764092, "grad_norm": 57.60072708129883, "learning_rate": 5e-06, "loss": 1.0769, "step": 33180 }, { "epoch": 1.7541819719352025, "grad_norm": 64.14100646972656, "learning_rate": 5e-06, "loss": 1.1074, "step": 33190 }, { "epoch": 1.754710499193996, "grad_norm": 92.71598815917969, "learning_rate": 5e-06, "loss": 1.4589, "step": 33200 }, { "epoch": 1.7552390264527893, "grad_norm": 43.498844146728516, "learning_rate": 5e-06, "loss": 1.239, "step": 33210 }, { "epoch": 1.7557675537115827, "grad_norm": 50.223812103271484, "learning_rate": 5e-06, "loss": 1.3238, "step": 33220 }, { "epoch": 1.756296080970376, "grad_norm": 62.46632385253906, "learning_rate": 5e-06, "loss": 1.1985, "step": 33230 }, { "epoch": 1.7568246082291694, "grad_norm": 44.360328674316406, "learning_rate": 5e-06, "loss": 1.3617, "step": 33240 }, { "epoch": 1.7573531354879628, "grad_norm": 77.78473663330078, "learning_rate": 5e-06, "loss": 1.6292, "step": 33250 }, { "epoch": 1.7578816627467562, "grad_norm": 67.98311614990234, "learning_rate": 5e-06, "loss": 1.4692, "step": 33260 }, { "epoch": 1.7584101900055495, "grad_norm": 63.04520034790039, "learning_rate": 5e-06, "loss": 1.2419, "step": 33270 }, { "epoch": 1.758938717264343, "grad_norm": 55.779808044433594, "learning_rate": 5e-06, "loss": 1.4002, "step": 33280 }, { "epoch": 1.7594672445231363, "grad_norm": 55.96197509765625, "learning_rate": 5e-06, "loss": 1.1766, "step": 33290 }, { "epoch": 1.7599957717819297, "grad_norm": 55.60296630859375, "learning_rate": 5e-06, "loss": 1.1138, "step": 33300 }, { "epoch": 1.760524299040723, "grad_norm": 73.82131958007812, "learning_rate": 5e-06, "loss": 1.2627, "step": 33310 }, { "epoch": 1.7610528262995164, "grad_norm": 52.6255989074707, "learning_rate": 5e-06, "loss": 1.2105, "step": 33320 }, { "epoch": 1.7615813535583098, "grad_norm": 56.960777282714844, "learning_rate": 5e-06, "loss": 1.1924, "step": 33330 }, { "epoch": 1.7621098808171032, "grad_norm": 48.40010070800781, "learning_rate": 5e-06, "loss": 1.2314, "step": 33340 }, { "epoch": 1.7626384080758966, "grad_norm": 32.891544342041016, "learning_rate": 5e-06, "loss": 1.2137, "step": 33350 }, { "epoch": 1.76316693533469, "grad_norm": 87.52027893066406, "learning_rate": 5e-06, "loss": 1.2381, "step": 33360 }, { "epoch": 1.7636954625934833, "grad_norm": 70.98344421386719, "learning_rate": 5e-06, "loss": 1.2882, "step": 33370 }, { "epoch": 1.7642239898522767, "grad_norm": 36.194580078125, "learning_rate": 5e-06, "loss": 1.1388, "step": 33380 }, { "epoch": 1.76475251711107, "grad_norm": 38.49562454223633, "learning_rate": 5e-06, "loss": 1.1297, "step": 33390 }, { "epoch": 1.7652810443698632, "grad_norm": 73.038818359375, "learning_rate": 5e-06, "loss": 1.5347, "step": 33400 }, { "epoch": 1.7658095716286568, "grad_norm": 103.56266021728516, "learning_rate": 5e-06, "loss": 1.2449, "step": 33410 }, { "epoch": 1.76633809888745, "grad_norm": 88.69149017333984, "learning_rate": 5e-06, "loss": 1.3627, "step": 33420 }, { "epoch": 1.7668666261462436, "grad_norm": 58.33865737915039, "learning_rate": 5e-06, "loss": 1.2105, "step": 33430 }, { "epoch": 1.7673951534050367, "grad_norm": 60.59918975830078, "learning_rate": 5e-06, "loss": 1.3891, "step": 33440 }, { "epoch": 1.7679236806638303, "grad_norm": 39.43567657470703, "learning_rate": 5e-06, "loss": 1.3317, "step": 33450 }, { "epoch": 1.7684522079226235, "grad_norm": 40.017269134521484, "learning_rate": 5e-06, "loss": 1.0952, "step": 33460 }, { "epoch": 1.768980735181417, "grad_norm": 69.17350769042969, "learning_rate": 5e-06, "loss": 1.1448, "step": 33470 }, { "epoch": 1.7695092624402102, "grad_norm": 79.23472595214844, "learning_rate": 5e-06, "loss": 1.3604, "step": 33480 }, { "epoch": 1.7700377896990038, "grad_norm": 54.79966735839844, "learning_rate": 5e-06, "loss": 1.1324, "step": 33490 }, { "epoch": 1.770566316957797, "grad_norm": 59.56077575683594, "learning_rate": 5e-06, "loss": 1.3328, "step": 33500 }, { "epoch": 1.7710948442165906, "grad_norm": 60.155391693115234, "learning_rate": 5e-06, "loss": 1.298, "step": 33510 }, { "epoch": 1.7716233714753837, "grad_norm": 56.82815170288086, "learning_rate": 5e-06, "loss": 1.4539, "step": 33520 }, { "epoch": 1.7721518987341773, "grad_norm": 55.90195083618164, "learning_rate": 5e-06, "loss": 1.4158, "step": 33530 }, { "epoch": 1.7726804259929705, "grad_norm": 40.326080322265625, "learning_rate": 5e-06, "loss": 1.2422, "step": 33540 }, { "epoch": 1.773208953251764, "grad_norm": 48.895050048828125, "learning_rate": 5e-06, "loss": 1.221, "step": 33550 }, { "epoch": 1.7737374805105572, "grad_norm": 79.3000259399414, "learning_rate": 5e-06, "loss": 0.9455, "step": 33560 }, { "epoch": 1.7742660077693508, "grad_norm": 51.00856018066406, "learning_rate": 5e-06, "loss": 1.1929, "step": 33570 }, { "epoch": 1.774794535028144, "grad_norm": 58.94082260131836, "learning_rate": 5e-06, "loss": 1.3221, "step": 33580 }, { "epoch": 1.7753230622869376, "grad_norm": 40.02196502685547, "learning_rate": 5e-06, "loss": 1.1114, "step": 33590 }, { "epoch": 1.7758515895457307, "grad_norm": 49.932884216308594, "learning_rate": 5e-06, "loss": 1.4385, "step": 33600 }, { "epoch": 1.7763801168045243, "grad_norm": 44.251731872558594, "learning_rate": 5e-06, "loss": 1.3115, "step": 33610 }, { "epoch": 1.7769086440633175, "grad_norm": 51.12035369873047, "learning_rate": 5e-06, "loss": 1.1925, "step": 33620 }, { "epoch": 1.777437171322111, "grad_norm": 49.01293182373047, "learning_rate": 5e-06, "loss": 1.0405, "step": 33630 }, { "epoch": 1.7779656985809043, "grad_norm": 53.07071304321289, "learning_rate": 5e-06, "loss": 1.04, "step": 33640 }, { "epoch": 1.7784942258396976, "grad_norm": 29.874717712402344, "learning_rate": 5e-06, "loss": 1.0173, "step": 33650 }, { "epoch": 1.779022753098491, "grad_norm": 63.591495513916016, "learning_rate": 5e-06, "loss": 1.4395, "step": 33660 }, { "epoch": 1.7795512803572844, "grad_norm": 49.83130645751953, "learning_rate": 5e-06, "loss": 1.0967, "step": 33670 }, { "epoch": 1.7800798076160778, "grad_norm": 64.14901733398438, "learning_rate": 5e-06, "loss": 1.3165, "step": 33680 }, { "epoch": 1.7806083348748711, "grad_norm": 62.76055145263672, "learning_rate": 5e-06, "loss": 1.3423, "step": 33690 }, { "epoch": 1.7811368621336645, "grad_norm": 40.888858795166016, "learning_rate": 5e-06, "loss": 0.9995, "step": 33700 }, { "epoch": 1.7816653893924579, "grad_norm": 76.30546569824219, "learning_rate": 5e-06, "loss": 1.4463, "step": 33710 }, { "epoch": 1.7821939166512513, "grad_norm": 114.20946502685547, "learning_rate": 5e-06, "loss": 1.4812, "step": 33720 }, { "epoch": 1.7827224439100446, "grad_norm": 73.99855041503906, "learning_rate": 5e-06, "loss": 1.2353, "step": 33730 }, { "epoch": 1.783250971168838, "grad_norm": 80.70513153076172, "learning_rate": 5e-06, "loss": 1.2772, "step": 33740 }, { "epoch": 1.7837794984276314, "grad_norm": 63.475616455078125, "learning_rate": 5e-06, "loss": 1.0286, "step": 33750 }, { "epoch": 1.7843080256864248, "grad_norm": 40.2576904296875, "learning_rate": 5e-06, "loss": 0.9973, "step": 33760 }, { "epoch": 1.7848365529452181, "grad_norm": 58.28984832763672, "learning_rate": 5e-06, "loss": 1.3899, "step": 33770 }, { "epoch": 1.7853650802040115, "grad_norm": 75.47794342041016, "learning_rate": 5e-06, "loss": 1.4418, "step": 33780 }, { "epoch": 1.785893607462805, "grad_norm": 64.04219055175781, "learning_rate": 5e-06, "loss": 1.2597, "step": 33790 }, { "epoch": 1.7864221347215983, "grad_norm": 41.63417053222656, "learning_rate": 5e-06, "loss": 1.37, "step": 33800 }, { "epoch": 1.7869506619803917, "grad_norm": 86.96611022949219, "learning_rate": 5e-06, "loss": 1.4745, "step": 33810 }, { "epoch": 1.787479189239185, "grad_norm": 71.58750915527344, "learning_rate": 5e-06, "loss": 0.8799, "step": 33820 }, { "epoch": 1.7880077164979784, "grad_norm": 57.78753662109375, "learning_rate": 5e-06, "loss": 1.3464, "step": 33830 }, { "epoch": 1.7885362437567718, "grad_norm": 73.6544418334961, "learning_rate": 5e-06, "loss": 1.2032, "step": 33840 }, { "epoch": 1.7890647710155652, "grad_norm": 68.14430236816406, "learning_rate": 5e-06, "loss": 1.3082, "step": 33850 }, { "epoch": 1.7895932982743585, "grad_norm": 62.96996307373047, "learning_rate": 5e-06, "loss": 1.3251, "step": 33860 }, { "epoch": 1.790121825533152, "grad_norm": 52.47750473022461, "learning_rate": 5e-06, "loss": 1.2193, "step": 33870 }, { "epoch": 1.7906503527919453, "grad_norm": 92.67930603027344, "learning_rate": 5e-06, "loss": 1.0706, "step": 33880 }, { "epoch": 1.7911788800507387, "grad_norm": 57.83734893798828, "learning_rate": 5e-06, "loss": 1.3149, "step": 33890 }, { "epoch": 1.791707407309532, "grad_norm": 52.931636810302734, "learning_rate": 5e-06, "loss": 1.6994, "step": 33900 }, { "epoch": 1.7922359345683252, "grad_norm": 50.074867248535156, "learning_rate": 5e-06, "loss": 1.3907, "step": 33910 }, { "epoch": 1.7927644618271188, "grad_norm": 77.64434814453125, "learning_rate": 5e-06, "loss": 1.1517, "step": 33920 }, { "epoch": 1.793292989085912, "grad_norm": 95.42201232910156, "learning_rate": 5e-06, "loss": 1.3625, "step": 33930 }, { "epoch": 1.7938215163447055, "grad_norm": 85.44052124023438, "learning_rate": 5e-06, "loss": 1.2253, "step": 33940 }, { "epoch": 1.7943500436034987, "grad_norm": 60.96820831298828, "learning_rate": 5e-06, "loss": 1.1748, "step": 33950 }, { "epoch": 1.7948785708622923, "grad_norm": 71.53482055664062, "learning_rate": 5e-06, "loss": 1.2533, "step": 33960 }, { "epoch": 1.7954070981210855, "grad_norm": 45.19137191772461, "learning_rate": 5e-06, "loss": 1.1764, "step": 33970 }, { "epoch": 1.795935625379879, "grad_norm": 44.083526611328125, "learning_rate": 5e-06, "loss": 1.0739, "step": 33980 }, { "epoch": 1.7964641526386722, "grad_norm": 38.39666748046875, "learning_rate": 5e-06, "loss": 1.1432, "step": 33990 }, { "epoch": 1.7969926798974658, "grad_norm": 56.074546813964844, "learning_rate": 5e-06, "loss": 1.346, "step": 34000 }, { "epoch": 1.7969926798974658, "eval_loss": 1.2973145246505737, "eval_runtime": 37.7454, "eval_samples_per_second": 324.066, "eval_steps_per_second": 10.147, "eval_sts-dev_pearson_cosine": 0.8425653578483727, "eval_sts-dev_pearson_dot": 0.8178248991356382, "eval_sts-dev_pearson_euclidean": 0.8433607155166116, "eval_sts-dev_pearson_manhattan": 0.8431469787868768, "eval_sts-dev_pearson_max": 0.8433607155166116, "eval_sts-dev_spearman_cosine": 0.844999019827698, "eval_sts-dev_spearman_dot": 0.8160040841298025, "eval_sts-dev_spearman_euclidean": 0.8456867074357836, "eval_sts-dev_spearman_manhattan": 0.8452471775345209, "eval_sts-dev_spearman_max": 0.8456867074357836, "step": 34000 }, { "epoch": 1.797521207156259, "grad_norm": 49.71381759643555, "learning_rate": 5e-06, "loss": 1.1228, "step": 34010 }, { "epoch": 1.7980497344150526, "grad_norm": 57.12239456176758, "learning_rate": 5e-06, "loss": 1.0973, "step": 34020 }, { "epoch": 1.7985782616738457, "grad_norm": 66.77684783935547, "learning_rate": 5e-06, "loss": 1.2383, "step": 34030 }, { "epoch": 1.7991067889326393, "grad_norm": 51.89598846435547, "learning_rate": 5e-06, "loss": 1.4445, "step": 34040 }, { "epoch": 1.7996353161914325, "grad_norm": 45.866546630859375, "learning_rate": 5e-06, "loss": 1.0611, "step": 34050 }, { "epoch": 1.800163843450226, "grad_norm": 54.38979721069336, "learning_rate": 5e-06, "loss": 1.2269, "step": 34060 }, { "epoch": 1.8006923707090192, "grad_norm": 42.73430633544922, "learning_rate": 5e-06, "loss": 1.1217, "step": 34070 }, { "epoch": 1.8012208979678128, "grad_norm": 78.08465576171875, "learning_rate": 5e-06, "loss": 1.4143, "step": 34080 }, { "epoch": 1.801749425226606, "grad_norm": 69.70826721191406, "learning_rate": 5e-06, "loss": 1.3261, "step": 34090 }, { "epoch": 1.8022779524853996, "grad_norm": 65.09899139404297, "learning_rate": 5e-06, "loss": 1.156, "step": 34100 }, { "epoch": 1.8028064797441927, "grad_norm": 51.34832763671875, "learning_rate": 5e-06, "loss": 1.2159, "step": 34110 }, { "epoch": 1.8033350070029863, "grad_norm": 51.96937942504883, "learning_rate": 5e-06, "loss": 1.3298, "step": 34120 }, { "epoch": 1.8038635342617795, "grad_norm": 71.90108489990234, "learning_rate": 5e-06, "loss": 1.4517, "step": 34130 }, { "epoch": 1.804392061520573, "grad_norm": 73.82743835449219, "learning_rate": 5e-06, "loss": 1.0298, "step": 34140 }, { "epoch": 1.8049205887793662, "grad_norm": 64.88067626953125, "learning_rate": 5e-06, "loss": 1.2386, "step": 34150 }, { "epoch": 1.8054491160381598, "grad_norm": 61.49146270751953, "learning_rate": 5e-06, "loss": 0.8868, "step": 34160 }, { "epoch": 1.805977643296953, "grad_norm": 60.550113677978516, "learning_rate": 5e-06, "loss": 1.1413, "step": 34170 }, { "epoch": 1.8065061705557464, "grad_norm": 67.52088165283203, "learning_rate": 5e-06, "loss": 1.2794, "step": 34180 }, { "epoch": 1.8070346978145397, "grad_norm": 119.52902221679688, "learning_rate": 5e-06, "loss": 1.0665, "step": 34190 }, { "epoch": 1.8075632250733331, "grad_norm": 56.886451721191406, "learning_rate": 5e-06, "loss": 1.3873, "step": 34200 }, { "epoch": 1.8080917523321265, "grad_norm": 52.99644088745117, "learning_rate": 5e-06, "loss": 1.1465, "step": 34210 }, { "epoch": 1.8086202795909199, "grad_norm": 52.69524002075195, "learning_rate": 5e-06, "loss": 1.4141, "step": 34220 }, { "epoch": 1.8091488068497132, "grad_norm": 63.60430908203125, "learning_rate": 5e-06, "loss": 1.4109, "step": 34230 }, { "epoch": 1.8096773341085066, "grad_norm": 62.41635513305664, "learning_rate": 5e-06, "loss": 1.464, "step": 34240 }, { "epoch": 1.8102058613673, "grad_norm": 38.12107849121094, "learning_rate": 5e-06, "loss": 1.0715, "step": 34250 }, { "epoch": 1.8107343886260934, "grad_norm": 68.84140014648438, "learning_rate": 5e-06, "loss": 1.4021, "step": 34260 }, { "epoch": 1.8112629158848867, "grad_norm": 62.28350067138672, "learning_rate": 5e-06, "loss": 1.1368, "step": 34270 }, { "epoch": 1.8117914431436801, "grad_norm": 23.826274871826172, "learning_rate": 5e-06, "loss": 1.2085, "step": 34280 }, { "epoch": 1.8123199704024735, "grad_norm": 39.63323211669922, "learning_rate": 5e-06, "loss": 1.4717, "step": 34290 }, { "epoch": 1.8128484976612669, "grad_norm": 68.50835418701172, "learning_rate": 5e-06, "loss": 1.4729, "step": 34300 }, { "epoch": 1.8133770249200603, "grad_norm": 73.73039245605469, "learning_rate": 5e-06, "loss": 1.4122, "step": 34310 }, { "epoch": 1.8139055521788536, "grad_norm": 44.48155975341797, "learning_rate": 5e-06, "loss": 1.319, "step": 34320 }, { "epoch": 1.814434079437647, "grad_norm": 48.61583709716797, "learning_rate": 5e-06, "loss": 1.1506, "step": 34330 }, { "epoch": 1.8149626066964404, "grad_norm": 69.66651916503906, "learning_rate": 5e-06, "loss": 1.1512, "step": 34340 }, { "epoch": 1.8154911339552338, "grad_norm": 58.751338958740234, "learning_rate": 5e-06, "loss": 1.3536, "step": 34350 }, { "epoch": 1.8160196612140271, "grad_norm": 46.621788024902344, "learning_rate": 5e-06, "loss": 0.8645, "step": 34360 }, { "epoch": 1.8165481884728205, "grad_norm": 75.89000701904297, "learning_rate": 5e-06, "loss": 1.1463, "step": 34370 }, { "epoch": 1.817076715731614, "grad_norm": 55.49875259399414, "learning_rate": 5e-06, "loss": 1.0952, "step": 34380 }, { "epoch": 1.8176052429904073, "grad_norm": 56.25929641723633, "learning_rate": 5e-06, "loss": 1.1387, "step": 34390 }, { "epoch": 1.8181337702492006, "grad_norm": 60.8397331237793, "learning_rate": 5e-06, "loss": 0.8996, "step": 34400 }, { "epoch": 1.818662297507994, "grad_norm": 54.871734619140625, "learning_rate": 5e-06, "loss": 0.9799, "step": 34410 }, { "epoch": 1.8191908247667874, "grad_norm": 84.30791473388672, "learning_rate": 5e-06, "loss": 1.1955, "step": 34420 }, { "epoch": 1.8197193520255808, "grad_norm": 91.80293273925781, "learning_rate": 5e-06, "loss": 1.5502, "step": 34430 }, { "epoch": 1.820247879284374, "grad_norm": 205.8928985595703, "learning_rate": 5e-06, "loss": 1.5062, "step": 34440 }, { "epoch": 1.8207764065431675, "grad_norm": 95.62725830078125, "learning_rate": 5e-06, "loss": 1.2693, "step": 34450 }, { "epoch": 1.8213049338019607, "grad_norm": 69.70640563964844, "learning_rate": 5e-06, "loss": 1.3458, "step": 34460 }, { "epoch": 1.8218334610607543, "grad_norm": 55.68267822265625, "learning_rate": 5e-06, "loss": 1.2888, "step": 34470 }, { "epoch": 1.8223619883195474, "grad_norm": 56.419185638427734, "learning_rate": 5e-06, "loss": 1.0463, "step": 34480 }, { "epoch": 1.822890515578341, "grad_norm": 49.749755859375, "learning_rate": 5e-06, "loss": 1.0258, "step": 34490 }, { "epoch": 1.8234190428371342, "grad_norm": 63.33424377441406, "learning_rate": 5e-06, "loss": 1.4667, "step": 34500 }, { "epoch": 1.8239475700959278, "grad_norm": 92.80966186523438, "learning_rate": 5e-06, "loss": 1.3323, "step": 34510 }, { "epoch": 1.824476097354721, "grad_norm": 31.221500396728516, "learning_rate": 5e-06, "loss": 1.1372, "step": 34520 }, { "epoch": 1.8250046246135145, "grad_norm": 25.9733943939209, "learning_rate": 5e-06, "loss": 1.0579, "step": 34530 }, { "epoch": 1.8255331518723077, "grad_norm": 63.46030807495117, "learning_rate": 5e-06, "loss": 1.2394, "step": 34540 }, { "epoch": 1.8260616791311013, "grad_norm": 72.08595275878906, "learning_rate": 5e-06, "loss": 1.3874, "step": 34550 }, { "epoch": 1.8265902063898944, "grad_norm": 71.5980453491211, "learning_rate": 5e-06, "loss": 1.5371, "step": 34560 }, { "epoch": 1.827118733648688, "grad_norm": 61.494415283203125, "learning_rate": 5e-06, "loss": 1.1812, "step": 34570 }, { "epoch": 1.8276472609074812, "grad_norm": 70.05894470214844, "learning_rate": 5e-06, "loss": 1.4459, "step": 34580 }, { "epoch": 1.8281757881662748, "grad_norm": 58.309715270996094, "learning_rate": 5e-06, "loss": 1.2114, "step": 34590 }, { "epoch": 1.828704315425068, "grad_norm": 22.02941131591797, "learning_rate": 5e-06, "loss": 1.2257, "step": 34600 }, { "epoch": 1.8292328426838615, "grad_norm": 65.42754364013672, "learning_rate": 5e-06, "loss": 1.2429, "step": 34610 }, { "epoch": 1.8297613699426547, "grad_norm": 52.73159408569336, "learning_rate": 5e-06, "loss": 1.1829, "step": 34620 }, { "epoch": 1.8302898972014483, "grad_norm": 68.38138580322266, "learning_rate": 5e-06, "loss": 1.4173, "step": 34630 }, { "epoch": 1.8308184244602415, "grad_norm": 46.74427795410156, "learning_rate": 5e-06, "loss": 1.24, "step": 34640 }, { "epoch": 1.831346951719035, "grad_norm": 74.01024627685547, "learning_rate": 5e-06, "loss": 1.1857, "step": 34650 }, { "epoch": 1.8318754789778282, "grad_norm": 54.04070281982422, "learning_rate": 5e-06, "loss": 1.2685, "step": 34660 }, { "epoch": 1.8324040062366218, "grad_norm": 86.50411224365234, "learning_rate": 5e-06, "loss": 1.3215, "step": 34670 }, { "epoch": 1.832932533495415, "grad_norm": 69.3853988647461, "learning_rate": 5e-06, "loss": 1.226, "step": 34680 }, { "epoch": 1.8334610607542086, "grad_norm": 45.05531311035156, "learning_rate": 5e-06, "loss": 0.792, "step": 34690 }, { "epoch": 1.8339895880130017, "grad_norm": 79.07721710205078, "learning_rate": 5e-06, "loss": 1.3176, "step": 34700 }, { "epoch": 1.834518115271795, "grad_norm": 58.36614990234375, "learning_rate": 5e-06, "loss": 1.235, "step": 34710 }, { "epoch": 1.8350466425305885, "grad_norm": 56.50035095214844, "learning_rate": 5e-06, "loss": 1.0959, "step": 34720 }, { "epoch": 1.8355751697893818, "grad_norm": 45.68974304199219, "learning_rate": 5e-06, "loss": 1.1325, "step": 34730 }, { "epoch": 1.8361036970481752, "grad_norm": 69.6730728149414, "learning_rate": 5e-06, "loss": 1.5583, "step": 34740 }, { "epoch": 1.8366322243069686, "grad_norm": 26.95366859436035, "learning_rate": 5e-06, "loss": 1.1006, "step": 34750 }, { "epoch": 1.837160751565762, "grad_norm": 29.971088409423828, "learning_rate": 5e-06, "loss": 1.3286, "step": 34760 }, { "epoch": 1.8376892788245554, "grad_norm": 105.68348693847656, "learning_rate": 5e-06, "loss": 0.9568, "step": 34770 }, { "epoch": 1.8382178060833487, "grad_norm": 66.75220489501953, "learning_rate": 5e-06, "loss": 1.2388, "step": 34780 }, { "epoch": 1.838746333342142, "grad_norm": 50.97855758666992, "learning_rate": 5e-06, "loss": 1.2033, "step": 34790 }, { "epoch": 1.8392748606009355, "grad_norm": 60.70822525024414, "learning_rate": 5e-06, "loss": 0.9289, "step": 34800 }, { "epoch": 1.8398033878597289, "grad_norm": 53.1993408203125, "learning_rate": 5e-06, "loss": 1.279, "step": 34810 }, { "epoch": 1.8403319151185222, "grad_norm": 31.63226318359375, "learning_rate": 5e-06, "loss": 0.8013, "step": 34820 }, { "epoch": 1.8408604423773156, "grad_norm": 80.950439453125, "learning_rate": 5e-06, "loss": 1.3743, "step": 34830 }, { "epoch": 1.841388969636109, "grad_norm": 82.38343811035156, "learning_rate": 5e-06, "loss": 1.3927, "step": 34840 }, { "epoch": 1.8419174968949024, "grad_norm": 52.6089973449707, "learning_rate": 5e-06, "loss": 1.1932, "step": 34850 }, { "epoch": 1.8424460241536957, "grad_norm": 40.219261169433594, "learning_rate": 5e-06, "loss": 1.0867, "step": 34860 }, { "epoch": 1.8429745514124891, "grad_norm": 61.35457992553711, "learning_rate": 5e-06, "loss": 1.0624, "step": 34870 }, { "epoch": 1.8435030786712825, "grad_norm": 57.405418395996094, "learning_rate": 5e-06, "loss": 1.1346, "step": 34880 }, { "epoch": 1.8440316059300759, "grad_norm": 73.98985290527344, "learning_rate": 5e-06, "loss": 1.2374, "step": 34890 }, { "epoch": 1.8445601331888692, "grad_norm": 41.59527587890625, "learning_rate": 5e-06, "loss": 0.9057, "step": 34900 }, { "epoch": 1.8450886604476626, "grad_norm": 34.47740936279297, "learning_rate": 5e-06, "loss": 1.1266, "step": 34910 }, { "epoch": 1.845617187706456, "grad_norm": 63.55693054199219, "learning_rate": 5e-06, "loss": 1.1489, "step": 34920 }, { "epoch": 1.8461457149652494, "grad_norm": 69.36996459960938, "learning_rate": 5e-06, "loss": 1.471, "step": 34930 }, { "epoch": 1.8466742422240428, "grad_norm": 33.804100036621094, "learning_rate": 5e-06, "loss": 0.9371, "step": 34940 }, { "epoch": 1.8472027694828361, "grad_norm": 79.52246856689453, "learning_rate": 5e-06, "loss": 1.2584, "step": 34950 }, { "epoch": 1.8477312967416295, "grad_norm": 39.1114616394043, "learning_rate": 5e-06, "loss": 1.4788, "step": 34960 }, { "epoch": 1.8482598240004227, "grad_norm": 34.964027404785156, "learning_rate": 5e-06, "loss": 1.0333, "step": 34970 }, { "epoch": 1.8487883512592163, "grad_norm": 54.50927734375, "learning_rate": 5e-06, "loss": 1.2382, "step": 34980 }, { "epoch": 1.8493168785180094, "grad_norm": 85.30730438232422, "learning_rate": 5e-06, "loss": 1.262, "step": 34990 }, { "epoch": 1.849845405776803, "grad_norm": 50.98641586303711, "learning_rate": 5e-06, "loss": 0.9822, "step": 35000 }, { "epoch": 1.849845405776803, "eval_loss": 1.2914385795593262, "eval_runtime": 34.7811, "eval_samples_per_second": 351.685, "eval_steps_per_second": 11.012, "eval_sts-dev_pearson_cosine": 0.8458373509161696, "eval_sts-dev_pearson_dot": 0.8234889749580144, "eval_sts-dev_pearson_euclidean": 0.8466782892979008, "eval_sts-dev_pearson_manhattan": 0.8464761742591546, "eval_sts-dev_pearson_max": 0.8466782892979008, "eval_sts-dev_spearman_cosine": 0.8481959422615287, "eval_sts-dev_spearman_dot": 0.8218827422687479, "eval_sts-dev_spearman_euclidean": 0.8493710968148149, "eval_sts-dev_spearman_manhattan": 0.8488972436531064, "eval_sts-dev_spearman_max": 0.8493710968148149, "step": 35000 }, { "epoch": 1.8503739330355962, "grad_norm": 69.37211608886719, "learning_rate": 5e-06, "loss": 1.3243, "step": 35010 }, { "epoch": 1.8509024602943898, "grad_norm": 51.468414306640625, "learning_rate": 5e-06, "loss": 1.1695, "step": 35020 }, { "epoch": 1.851430987553183, "grad_norm": 82.64826965332031, "learning_rate": 5e-06, "loss": 1.3631, "step": 35030 }, { "epoch": 1.8519595148119765, "grad_norm": 56.08043670654297, "learning_rate": 5e-06, "loss": 1.0279, "step": 35040 }, { "epoch": 1.8524880420707697, "grad_norm": 137.46717834472656, "learning_rate": 5e-06, "loss": 1.3773, "step": 35050 }, { "epoch": 1.8530165693295633, "grad_norm": 71.13401794433594, "learning_rate": 5e-06, "loss": 1.1107, "step": 35060 }, { "epoch": 1.8535450965883564, "grad_norm": 61.08729553222656, "learning_rate": 5e-06, "loss": 1.0063, "step": 35070 }, { "epoch": 1.85407362384715, "grad_norm": 62.5564079284668, "learning_rate": 5e-06, "loss": 1.334, "step": 35080 }, { "epoch": 1.8546021511059432, "grad_norm": 94.94432830810547, "learning_rate": 5e-06, "loss": 1.568, "step": 35090 }, { "epoch": 1.8551306783647368, "grad_norm": 36.020023345947266, "learning_rate": 5e-06, "loss": 1.0427, "step": 35100 }, { "epoch": 1.85565920562353, "grad_norm": 82.48129272460938, "learning_rate": 5e-06, "loss": 1.2111, "step": 35110 }, { "epoch": 1.8561877328823235, "grad_norm": 67.48646545410156, "learning_rate": 5e-06, "loss": 1.4523, "step": 35120 }, { "epoch": 1.8567162601411167, "grad_norm": 74.20268249511719, "learning_rate": 5e-06, "loss": 1.2233, "step": 35130 }, { "epoch": 1.8572447873999103, "grad_norm": 78.64826202392578, "learning_rate": 5e-06, "loss": 1.4149, "step": 35140 }, { "epoch": 1.8577733146587034, "grad_norm": 66.85326385498047, "learning_rate": 5e-06, "loss": 1.224, "step": 35150 }, { "epoch": 1.858301841917497, "grad_norm": 40.311119079589844, "learning_rate": 5e-06, "loss": 1.2869, "step": 35160 }, { "epoch": 1.8588303691762902, "grad_norm": 77.55250549316406, "learning_rate": 5e-06, "loss": 1.3693, "step": 35170 }, { "epoch": 1.8593588964350838, "grad_norm": 61.48048782348633, "learning_rate": 5e-06, "loss": 1.2931, "step": 35180 }, { "epoch": 1.859887423693877, "grad_norm": 46.911197662353516, "learning_rate": 5e-06, "loss": 1.1267, "step": 35190 }, { "epoch": 1.8604159509526705, "grad_norm": 44.313018798828125, "learning_rate": 5e-06, "loss": 0.9819, "step": 35200 }, { "epoch": 1.8609444782114637, "grad_norm": 67.5103759765625, "learning_rate": 5e-06, "loss": 1.3212, "step": 35210 }, { "epoch": 1.8614730054702573, "grad_norm": 36.48656463623047, "learning_rate": 5e-06, "loss": 1.1141, "step": 35220 }, { "epoch": 1.8620015327290504, "grad_norm": 87.11944580078125, "learning_rate": 5e-06, "loss": 1.261, "step": 35230 }, { "epoch": 1.8625300599878438, "grad_norm": 63.6995735168457, "learning_rate": 5e-06, "loss": 1.6378, "step": 35240 }, { "epoch": 1.8630585872466372, "grad_norm": 83.29837036132812, "learning_rate": 5e-06, "loss": 1.1853, "step": 35250 }, { "epoch": 1.8635871145054306, "grad_norm": 60.85847091674805, "learning_rate": 5e-06, "loss": 1.1109, "step": 35260 }, { "epoch": 1.864115641764224, "grad_norm": 67.9332046508789, "learning_rate": 5e-06, "loss": 1.1758, "step": 35270 }, { "epoch": 1.8646441690230173, "grad_norm": 48.44803237915039, "learning_rate": 5e-06, "loss": 1.324, "step": 35280 }, { "epoch": 1.8651726962818107, "grad_norm": 66.1905746459961, "learning_rate": 5e-06, "loss": 1.1997, "step": 35290 }, { "epoch": 1.865701223540604, "grad_norm": 86.51795196533203, "learning_rate": 5e-06, "loss": 1.4792, "step": 35300 }, { "epoch": 1.8662297507993975, "grad_norm": 85.64861297607422, "learning_rate": 5e-06, "loss": 1.4003, "step": 35310 }, { "epoch": 1.8667582780581908, "grad_norm": 83.29185485839844, "learning_rate": 5e-06, "loss": 1.7445, "step": 35320 }, { "epoch": 1.8672868053169842, "grad_norm": 39.68517303466797, "learning_rate": 5e-06, "loss": 1.2434, "step": 35330 }, { "epoch": 1.8678153325757776, "grad_norm": 72.00218963623047, "learning_rate": 5e-06, "loss": 1.1728, "step": 35340 }, { "epoch": 1.868343859834571, "grad_norm": 71.4185562133789, "learning_rate": 5e-06, "loss": 1.0449, "step": 35350 }, { "epoch": 1.8688723870933643, "grad_norm": 67.16625213623047, "learning_rate": 5e-06, "loss": 1.1651, "step": 35360 }, { "epoch": 1.8694009143521577, "grad_norm": 70.09860229492188, "learning_rate": 5e-06, "loss": 1.194, "step": 35370 }, { "epoch": 1.869929441610951, "grad_norm": 76.03398895263672, "learning_rate": 5e-06, "loss": 1.1882, "step": 35380 }, { "epoch": 1.8704579688697445, "grad_norm": 42.91001892089844, "learning_rate": 5e-06, "loss": 1.1461, "step": 35390 }, { "epoch": 1.8709864961285378, "grad_norm": 45.161930084228516, "learning_rate": 5e-06, "loss": 1.1802, "step": 35400 }, { "epoch": 1.8715150233873312, "grad_norm": 54.87264633178711, "learning_rate": 5e-06, "loss": 1.3367, "step": 35410 }, { "epoch": 1.8720435506461246, "grad_norm": 36.78129959106445, "learning_rate": 5e-06, "loss": 1.2895, "step": 35420 }, { "epoch": 1.872572077904918, "grad_norm": 47.13407516479492, "learning_rate": 5e-06, "loss": 0.9883, "step": 35430 }, { "epoch": 1.8731006051637114, "grad_norm": 43.22871398925781, "learning_rate": 5e-06, "loss": 1.0859, "step": 35440 }, { "epoch": 1.8736291324225047, "grad_norm": 55.359622955322266, "learning_rate": 5e-06, "loss": 1.031, "step": 35450 }, { "epoch": 1.874157659681298, "grad_norm": 39.64622497558594, "learning_rate": 5e-06, "loss": 1.274, "step": 35460 }, { "epoch": 1.8746861869400915, "grad_norm": 47.52775573730469, "learning_rate": 5e-06, "loss": 1.2421, "step": 35470 }, { "epoch": 1.8752147141988849, "grad_norm": 41.93030548095703, "learning_rate": 5e-06, "loss": 1.2901, "step": 35480 }, { "epoch": 1.8757432414576782, "grad_norm": 57.00129699707031, "learning_rate": 5e-06, "loss": 0.8959, "step": 35490 }, { "epoch": 1.8762717687164714, "grad_norm": 83.50321197509766, "learning_rate": 5e-06, "loss": 1.579, "step": 35500 }, { "epoch": 1.876800295975265, "grad_norm": 108.28717803955078, "learning_rate": 5e-06, "loss": 1.428, "step": 35510 }, { "epoch": 1.8773288232340581, "grad_norm": 40.840518951416016, "learning_rate": 5e-06, "loss": 1.1259, "step": 35520 }, { "epoch": 1.8778573504928517, "grad_norm": 44.09311294555664, "learning_rate": 5e-06, "loss": 1.0866, "step": 35530 }, { "epoch": 1.878385877751645, "grad_norm": 100.2142105102539, "learning_rate": 5e-06, "loss": 0.9673, "step": 35540 }, { "epoch": 1.8789144050104385, "grad_norm": 80.96053314208984, "learning_rate": 5e-06, "loss": 1.349, "step": 35550 }, { "epoch": 1.8794429322692316, "grad_norm": 62.0711669921875, "learning_rate": 5e-06, "loss": 1.0463, "step": 35560 }, { "epoch": 1.8799714595280252, "grad_norm": 50.626399993896484, "learning_rate": 5e-06, "loss": 1.0867, "step": 35570 }, { "epoch": 1.8804999867868184, "grad_norm": 50.4387092590332, "learning_rate": 5e-06, "loss": 1.5429, "step": 35580 }, { "epoch": 1.881028514045612, "grad_norm": 64.17430877685547, "learning_rate": 5e-06, "loss": 1.2096, "step": 35590 }, { "epoch": 1.8815570413044052, "grad_norm": 88.36742401123047, "learning_rate": 5e-06, "loss": 1.3313, "step": 35600 }, { "epoch": 1.8820855685631988, "grad_norm": 56.565589904785156, "learning_rate": 5e-06, "loss": 1.318, "step": 35610 }, { "epoch": 1.882614095821992, "grad_norm": 44.099853515625, "learning_rate": 5e-06, "loss": 1.057, "step": 35620 }, { "epoch": 1.8831426230807855, "grad_norm": 45.281864166259766, "learning_rate": 5e-06, "loss": 1.3395, "step": 35630 }, { "epoch": 1.8836711503395787, "grad_norm": 56.560787200927734, "learning_rate": 5e-06, "loss": 1.134, "step": 35640 }, { "epoch": 1.8841996775983723, "grad_norm": 81.83673858642578, "learning_rate": 5e-06, "loss": 1.506, "step": 35650 }, { "epoch": 1.8847282048571654, "grad_norm": 94.54393005371094, "learning_rate": 5e-06, "loss": 1.245, "step": 35660 }, { "epoch": 1.885256732115959, "grad_norm": 46.95947265625, "learning_rate": 5e-06, "loss": 1.373, "step": 35670 }, { "epoch": 1.8857852593747522, "grad_norm": 56.60572052001953, "learning_rate": 5e-06, "loss": 1.1521, "step": 35680 }, { "epoch": 1.8863137866335458, "grad_norm": 71.34803009033203, "learning_rate": 5e-06, "loss": 1.2702, "step": 35690 }, { "epoch": 1.886842313892339, "grad_norm": 51.7674446105957, "learning_rate": 5e-06, "loss": 1.1071, "step": 35700 }, { "epoch": 1.8873708411511325, "grad_norm": 74.32508087158203, "learning_rate": 5e-06, "loss": 1.278, "step": 35710 }, { "epoch": 1.8878993684099257, "grad_norm": 102.182373046875, "learning_rate": 5e-06, "loss": 1.2579, "step": 35720 }, { "epoch": 1.8884278956687193, "grad_norm": 61.59678649902344, "learning_rate": 5e-06, "loss": 1.2934, "step": 35730 }, { "epoch": 1.8889564229275124, "grad_norm": 45.047420501708984, "learning_rate": 5e-06, "loss": 1.0741, "step": 35740 }, { "epoch": 1.889484950186306, "grad_norm": 53.263580322265625, "learning_rate": 5e-06, "loss": 1.2702, "step": 35750 }, { "epoch": 1.8900134774450992, "grad_norm": 38.001155853271484, "learning_rate": 5e-06, "loss": 1.3521, "step": 35760 }, { "epoch": 1.8905420047038926, "grad_norm": 48.60731887817383, "learning_rate": 5e-06, "loss": 1.1362, "step": 35770 }, { "epoch": 1.891070531962686, "grad_norm": 48.54228210449219, "learning_rate": 5e-06, "loss": 0.98, "step": 35780 }, { "epoch": 1.8915990592214793, "grad_norm": 51.173423767089844, "learning_rate": 5e-06, "loss": 1.3053, "step": 35790 }, { "epoch": 1.8921275864802727, "grad_norm": 50.9604377746582, "learning_rate": 5e-06, "loss": 1.2485, "step": 35800 }, { "epoch": 1.892656113739066, "grad_norm": 56.89975357055664, "learning_rate": 5e-06, "loss": 0.89, "step": 35810 }, { "epoch": 1.8931846409978594, "grad_norm": 60.37491226196289, "learning_rate": 5e-06, "loss": 1.0995, "step": 35820 }, { "epoch": 1.8937131682566528, "grad_norm": 35.38029861450195, "learning_rate": 5e-06, "loss": 1.1369, "step": 35830 }, { "epoch": 1.8942416955154462, "grad_norm": 40.478302001953125, "learning_rate": 5e-06, "loss": 1.3656, "step": 35840 }, { "epoch": 1.8947702227742396, "grad_norm": 63.50598907470703, "learning_rate": 5e-06, "loss": 1.482, "step": 35850 }, { "epoch": 1.895298750033033, "grad_norm": 47.72547912597656, "learning_rate": 5e-06, "loss": 1.0902, "step": 35860 }, { "epoch": 1.8958272772918263, "grad_norm": 58.328392028808594, "learning_rate": 5e-06, "loss": 1.2932, "step": 35870 }, { "epoch": 1.8963558045506197, "grad_norm": 67.7713394165039, "learning_rate": 5e-06, "loss": 1.1485, "step": 35880 }, { "epoch": 1.896884331809413, "grad_norm": 66.898193359375, "learning_rate": 5e-06, "loss": 1.1775, "step": 35890 }, { "epoch": 1.8974128590682064, "grad_norm": 58.97044372558594, "learning_rate": 5e-06, "loss": 1.2248, "step": 35900 }, { "epoch": 1.8979413863269998, "grad_norm": 44.18434524536133, "learning_rate": 5e-06, "loss": 1.2923, "step": 35910 }, { "epoch": 1.8984699135857932, "grad_norm": 109.49869537353516, "learning_rate": 5e-06, "loss": 1.0001, "step": 35920 }, { "epoch": 1.8989984408445866, "grad_norm": 63.060081481933594, "learning_rate": 5e-06, "loss": 1.3429, "step": 35930 }, { "epoch": 1.89952696810338, "grad_norm": 56.62800598144531, "learning_rate": 5e-06, "loss": 1.214, "step": 35940 }, { "epoch": 1.9000554953621733, "grad_norm": 43.84016418457031, "learning_rate": 5e-06, "loss": 1.2035, "step": 35950 }, { "epoch": 1.9005840226209667, "grad_norm": 31.9194393157959, "learning_rate": 5e-06, "loss": 1.2396, "step": 35960 }, { "epoch": 1.90111254987976, "grad_norm": 61.28843307495117, "learning_rate": 5e-06, "loss": 1.314, "step": 35970 }, { "epoch": 1.9016410771385535, "grad_norm": 67.76539611816406, "learning_rate": 5e-06, "loss": 1.493, "step": 35980 }, { "epoch": 1.9021696043973468, "grad_norm": 44.184810638427734, "learning_rate": 5e-06, "loss": 1.1751, "step": 35990 }, { "epoch": 1.9026981316561402, "grad_norm": 62.17245864868164, "learning_rate": 5e-06, "loss": 1.3366, "step": 36000 }, { "epoch": 1.9026981316561402, "eval_loss": 1.245326042175293, "eval_runtime": 34.6393, "eval_samples_per_second": 353.125, "eval_steps_per_second": 11.057, "eval_sts-dev_pearson_cosine": 0.8437412142031411, "eval_sts-dev_pearson_dot": 0.8186980206247805, "eval_sts-dev_pearson_euclidean": 0.8476252808689944, "eval_sts-dev_pearson_manhattan": 0.8470572099860441, "eval_sts-dev_pearson_max": 0.8476252808689944, "eval_sts-dev_spearman_cosine": 0.8470273287938402, "eval_sts-dev_spearman_dot": 0.816005439978655, "eval_sts-dev_spearman_euclidean": 0.8491503834817151, "eval_sts-dev_spearman_manhattan": 0.8483071771627557, "eval_sts-dev_spearman_max": 0.8491503834817151, "step": 36000 }, { "epoch": 1.9032266589149336, "grad_norm": 61.73591995239258, "learning_rate": 5e-06, "loss": 1.1759, "step": 36010 }, { "epoch": 1.903755186173727, "grad_norm": 70.79891204833984, "learning_rate": 5e-06, "loss": 1.2156, "step": 36020 }, { "epoch": 1.9042837134325201, "grad_norm": 105.9446792602539, "learning_rate": 5e-06, "loss": 1.205, "step": 36030 }, { "epoch": 1.9048122406913137, "grad_norm": 40.99330520629883, "learning_rate": 5e-06, "loss": 0.9175, "step": 36040 }, { "epoch": 1.9053407679501069, "grad_norm": 58.54442596435547, "learning_rate": 5e-06, "loss": 1.4136, "step": 36050 }, { "epoch": 1.9058692952089005, "grad_norm": 49.43284225463867, "learning_rate": 5e-06, "loss": 0.9956, "step": 36060 }, { "epoch": 1.9063978224676936, "grad_norm": 61.654258728027344, "learning_rate": 5e-06, "loss": 1.186, "step": 36070 }, { "epoch": 1.9069263497264872, "grad_norm": 61.67768859863281, "learning_rate": 5e-06, "loss": 1.1827, "step": 36080 }, { "epoch": 1.9074548769852804, "grad_norm": 73.99486541748047, "learning_rate": 5e-06, "loss": 1.4269, "step": 36090 }, { "epoch": 1.907983404244074, "grad_norm": 45.964664459228516, "learning_rate": 5e-06, "loss": 1.1819, "step": 36100 }, { "epoch": 1.9085119315028671, "grad_norm": 59.5894660949707, "learning_rate": 5e-06, "loss": 1.1824, "step": 36110 }, { "epoch": 1.9090404587616607, "grad_norm": 52.50589370727539, "learning_rate": 5e-06, "loss": 0.9164, "step": 36120 }, { "epoch": 1.9095689860204539, "grad_norm": 43.19770812988281, "learning_rate": 5e-06, "loss": 1.149, "step": 36130 }, { "epoch": 1.9100975132792475, "grad_norm": 83.90937805175781, "learning_rate": 5e-06, "loss": 1.1523, "step": 36140 }, { "epoch": 1.9106260405380406, "grad_norm": 46.722984313964844, "learning_rate": 5e-06, "loss": 1.1018, "step": 36150 }, { "epoch": 1.9111545677968342, "grad_norm": 66.1560287475586, "learning_rate": 5e-06, "loss": 1.1148, "step": 36160 }, { "epoch": 1.9116830950556274, "grad_norm": 70.5313949584961, "learning_rate": 5e-06, "loss": 1.0059, "step": 36170 }, { "epoch": 1.912211622314421, "grad_norm": 47.81333923339844, "learning_rate": 5e-06, "loss": 1.2174, "step": 36180 }, { "epoch": 1.9127401495732141, "grad_norm": 84.23686981201172, "learning_rate": 5e-06, "loss": 1.2934, "step": 36190 }, { "epoch": 1.9132686768320077, "grad_norm": 44.42354965209961, "learning_rate": 5e-06, "loss": 0.9857, "step": 36200 }, { "epoch": 1.913797204090801, "grad_norm": 71.60594177246094, "learning_rate": 5e-06, "loss": 1.2007, "step": 36210 }, { "epoch": 1.9143257313495945, "grad_norm": 61.31769943237305, "learning_rate": 5e-06, "loss": 1.052, "step": 36220 }, { "epoch": 1.9148542586083876, "grad_norm": 67.3980941772461, "learning_rate": 5e-06, "loss": 1.3199, "step": 36230 }, { "epoch": 1.9153827858671812, "grad_norm": 44.637611389160156, "learning_rate": 5e-06, "loss": 0.903, "step": 36240 }, { "epoch": 1.9159113131259744, "grad_norm": 56.921836853027344, "learning_rate": 5e-06, "loss": 1.1578, "step": 36250 }, { "epoch": 1.916439840384768, "grad_norm": 69.24896240234375, "learning_rate": 5e-06, "loss": 1.1194, "step": 36260 }, { "epoch": 1.9169683676435612, "grad_norm": 64.05181121826172, "learning_rate": 5e-06, "loss": 1.248, "step": 36270 }, { "epoch": 1.9174968949023548, "grad_norm": 38.51970291137695, "learning_rate": 5e-06, "loss": 0.9636, "step": 36280 }, { "epoch": 1.918025422161148, "grad_norm": 56.27132034301758, "learning_rate": 5e-06, "loss": 1.3105, "step": 36290 }, { "epoch": 1.9185539494199413, "grad_norm": 73.37287139892578, "learning_rate": 5e-06, "loss": 0.903, "step": 36300 }, { "epoch": 1.9190824766787347, "grad_norm": 63.69118881225586, "learning_rate": 5e-06, "loss": 1.1322, "step": 36310 }, { "epoch": 1.919611003937528, "grad_norm": 61.1439208984375, "learning_rate": 5e-06, "loss": 1.0915, "step": 36320 }, { "epoch": 1.9201395311963214, "grad_norm": 36.860374450683594, "learning_rate": 5e-06, "loss": 1.2095, "step": 36330 }, { "epoch": 1.9206680584551148, "grad_norm": 46.34273147583008, "learning_rate": 5e-06, "loss": 1.2222, "step": 36340 }, { "epoch": 1.9211965857139082, "grad_norm": 61.128211975097656, "learning_rate": 5e-06, "loss": 0.8844, "step": 36350 }, { "epoch": 1.9217251129727015, "grad_norm": 82.07186126708984, "learning_rate": 5e-06, "loss": 1.015, "step": 36360 }, { "epoch": 1.922253640231495, "grad_norm": 59.65434646606445, "learning_rate": 5e-06, "loss": 1.2956, "step": 36370 }, { "epoch": 1.9227821674902883, "grad_norm": 57.38762283325195, "learning_rate": 5e-06, "loss": 1.2039, "step": 36380 }, { "epoch": 1.9233106947490817, "grad_norm": 69.7767562866211, "learning_rate": 5e-06, "loss": 1.1242, "step": 36390 }, { "epoch": 1.923839222007875, "grad_norm": 59.285377502441406, "learning_rate": 5e-06, "loss": 1.0715, "step": 36400 }, { "epoch": 1.9243677492666684, "grad_norm": 92.31892395019531, "learning_rate": 5e-06, "loss": 0.9937, "step": 36410 }, { "epoch": 1.9248962765254618, "grad_norm": 51.268524169921875, "learning_rate": 5e-06, "loss": 1.2022, "step": 36420 }, { "epoch": 1.9254248037842552, "grad_norm": 57.95781707763672, "learning_rate": 5e-06, "loss": 1.5181, "step": 36430 }, { "epoch": 1.9259533310430486, "grad_norm": 92.81102752685547, "learning_rate": 5e-06, "loss": 1.2477, "step": 36440 }, { "epoch": 1.926481858301842, "grad_norm": 44.6760368347168, "learning_rate": 5e-06, "loss": 1.0125, "step": 36450 }, { "epoch": 1.9270103855606353, "grad_norm": 87.3685302734375, "learning_rate": 5e-06, "loss": 1.5546, "step": 36460 }, { "epoch": 1.9275389128194287, "grad_norm": 61.84728240966797, "learning_rate": 5e-06, "loss": 0.8852, "step": 36470 }, { "epoch": 1.928067440078222, "grad_norm": 54.93688201904297, "learning_rate": 5e-06, "loss": 1.1351, "step": 36480 }, { "epoch": 1.9285959673370154, "grad_norm": 71.09061431884766, "learning_rate": 5e-06, "loss": 1.2227, "step": 36490 }, { "epoch": 1.9291244945958088, "grad_norm": 102.31282806396484, "learning_rate": 5e-06, "loss": 1.5427, "step": 36500 }, { "epoch": 1.9296530218546022, "grad_norm": 78.48812103271484, "learning_rate": 5e-06, "loss": 1.2759, "step": 36510 }, { "epoch": 1.9301815491133956, "grad_norm": 81.47463989257812, "learning_rate": 5e-06, "loss": 1.4688, "step": 36520 }, { "epoch": 1.930710076372189, "grad_norm": 46.64634323120117, "learning_rate": 5e-06, "loss": 1.2361, "step": 36530 }, { "epoch": 1.9312386036309823, "grad_norm": 95.32649230957031, "learning_rate": 5e-06, "loss": 1.3348, "step": 36540 }, { "epoch": 1.9317671308897757, "grad_norm": 40.78327560424805, "learning_rate": 5e-06, "loss": 1.112, "step": 36550 }, { "epoch": 1.9322956581485689, "grad_norm": 50.70182800292969, "learning_rate": 5e-06, "loss": 1.0132, "step": 36560 }, { "epoch": 1.9328241854073624, "grad_norm": 57.28990173339844, "learning_rate": 5e-06, "loss": 1.0952, "step": 36570 }, { "epoch": 1.9333527126661556, "grad_norm": 45.35701370239258, "learning_rate": 5e-06, "loss": 0.9987, "step": 36580 }, { "epoch": 1.9338812399249492, "grad_norm": 64.58379364013672, "learning_rate": 5e-06, "loss": 1.1866, "step": 36590 }, { "epoch": 1.9344097671837424, "grad_norm": 52.60076141357422, "learning_rate": 5e-06, "loss": 1.3136, "step": 36600 }, { "epoch": 1.934938294442536, "grad_norm": 80.05229949951172, "learning_rate": 5e-06, "loss": 1.5236, "step": 36610 }, { "epoch": 1.935466821701329, "grad_norm": 49.11976623535156, "learning_rate": 5e-06, "loss": 1.2283, "step": 36620 }, { "epoch": 1.9359953489601227, "grad_norm": 73.55956268310547, "learning_rate": 5e-06, "loss": 1.1192, "step": 36630 }, { "epoch": 1.9365238762189159, "grad_norm": 79.5677261352539, "learning_rate": 5e-06, "loss": 1.2465, "step": 36640 }, { "epoch": 1.9370524034777095, "grad_norm": 40.01661682128906, "learning_rate": 5e-06, "loss": 1.1233, "step": 36650 }, { "epoch": 1.9375809307365026, "grad_norm": 57.84092712402344, "learning_rate": 5e-06, "loss": 1.07, "step": 36660 }, { "epoch": 1.9381094579952962, "grad_norm": 67.33594512939453, "learning_rate": 5e-06, "loss": 1.1676, "step": 36670 }, { "epoch": 1.9386379852540894, "grad_norm": 40.41339874267578, "learning_rate": 5e-06, "loss": 1.0913, "step": 36680 }, { "epoch": 1.939166512512883, "grad_norm": 54.85749816894531, "learning_rate": 5e-06, "loss": 1.3901, "step": 36690 }, { "epoch": 1.9396950397716761, "grad_norm": 56.54562759399414, "learning_rate": 5e-06, "loss": 0.9669, "step": 36700 }, { "epoch": 1.9402235670304697, "grad_norm": 122.287109375, "learning_rate": 5e-06, "loss": 1.2373, "step": 36710 }, { "epoch": 1.9407520942892629, "grad_norm": 70.20087432861328, "learning_rate": 5e-06, "loss": 1.1385, "step": 36720 }, { "epoch": 1.9412806215480565, "grad_norm": 52.70350646972656, "learning_rate": 5e-06, "loss": 1.1563, "step": 36730 }, { "epoch": 1.9418091488068496, "grad_norm": 53.60816955566406, "learning_rate": 5e-06, "loss": 1.5104, "step": 36740 }, { "epoch": 1.9423376760656432, "grad_norm": 59.91215133666992, "learning_rate": 5e-06, "loss": 1.0229, "step": 36750 }, { "epoch": 1.9428662033244364, "grad_norm": 62.5777587890625, "learning_rate": 5e-06, "loss": 1.2259, "step": 36760 }, { "epoch": 1.94339473058323, "grad_norm": 61.030029296875, "learning_rate": 5e-06, "loss": 1.1109, "step": 36770 }, { "epoch": 1.9439232578420231, "grad_norm": 45.95779037475586, "learning_rate": 5e-06, "loss": 1.3496, "step": 36780 }, { "epoch": 1.9444517851008167, "grad_norm": 45.52033233642578, "learning_rate": 5e-06, "loss": 1.0848, "step": 36790 }, { "epoch": 1.9449803123596099, "grad_norm": 50.52760696411133, "learning_rate": 5e-06, "loss": 1.3513, "step": 36800 }, { "epoch": 1.9455088396184035, "grad_norm": 60.49251937866211, "learning_rate": 5e-06, "loss": 1.2765, "step": 36810 }, { "epoch": 1.9460373668771966, "grad_norm": 61.24678421020508, "learning_rate": 5e-06, "loss": 1.1988, "step": 36820 }, { "epoch": 1.94656589413599, "grad_norm": 58.46382522583008, "learning_rate": 5e-06, "loss": 1.2311, "step": 36830 }, { "epoch": 1.9470944213947834, "grad_norm": 41.03225326538086, "learning_rate": 5e-06, "loss": 1.2922, "step": 36840 }, { "epoch": 1.9476229486535768, "grad_norm": 56.77100372314453, "learning_rate": 5e-06, "loss": 1.2357, "step": 36850 }, { "epoch": 1.9481514759123701, "grad_norm": 57.24354934692383, "learning_rate": 5e-06, "loss": 1.0465, "step": 36860 }, { "epoch": 1.9486800031711635, "grad_norm": 81.97982025146484, "learning_rate": 5e-06, "loss": 1.3336, "step": 36870 }, { "epoch": 1.949208530429957, "grad_norm": 48.759700775146484, "learning_rate": 5e-06, "loss": 1.2529, "step": 36880 }, { "epoch": 1.9497370576887503, "grad_norm": 54.89510726928711, "learning_rate": 5e-06, "loss": 1.133, "step": 36890 }, { "epoch": 1.9502655849475437, "grad_norm": 66.60230255126953, "learning_rate": 5e-06, "loss": 1.1463, "step": 36900 }, { "epoch": 1.950794112206337, "grad_norm": 51.61023712158203, "learning_rate": 5e-06, "loss": 1.1191, "step": 36910 }, { "epoch": 1.9513226394651304, "grad_norm": 48.830318450927734, "learning_rate": 5e-06, "loss": 1.0474, "step": 36920 }, { "epoch": 1.9518511667239238, "grad_norm": 48.1805305480957, "learning_rate": 5e-06, "loss": 1.061, "step": 36930 }, { "epoch": 1.9523796939827172, "grad_norm": 44.90130615234375, "learning_rate": 5e-06, "loss": 1.0839, "step": 36940 }, { "epoch": 1.9529082212415105, "grad_norm": 79.8802261352539, "learning_rate": 5e-06, "loss": 1.2627, "step": 36950 }, { "epoch": 1.953436748500304, "grad_norm": 46.70833969116211, "learning_rate": 5e-06, "loss": 1.2666, "step": 36960 }, { "epoch": 1.9539652757590973, "grad_norm": 46.038536071777344, "learning_rate": 5e-06, "loss": 1.1172, "step": 36970 }, { "epoch": 1.9544938030178907, "grad_norm": 57.11937713623047, "learning_rate": 5e-06, "loss": 1.213, "step": 36980 }, { "epoch": 1.955022330276684, "grad_norm": 57.25285720825195, "learning_rate": 5e-06, "loss": 1.2092, "step": 36990 }, { "epoch": 1.9555508575354774, "grad_norm": 59.58512496948242, "learning_rate": 5e-06, "loss": 1.2047, "step": 37000 }, { "epoch": 1.9555508575354774, "eval_loss": 1.270440697669983, "eval_runtime": 42.3088, "eval_samples_per_second": 289.113, "eval_steps_per_second": 9.052, "eval_sts-dev_pearson_cosine": 0.8376002825810096, "eval_sts-dev_pearson_dot": 0.8128477782721132, "eval_sts-dev_pearson_euclidean": 0.8404052955406778, "eval_sts-dev_pearson_manhattan": 0.8401620101211146, "eval_sts-dev_pearson_max": 0.8404052955406778, "eval_sts-dev_spearman_cosine": 0.8397798629219329, "eval_sts-dev_spearman_dot": 0.8089755451694591, "eval_sts-dev_spearman_euclidean": 0.8410840738663811, "eval_sts-dev_spearman_manhattan": 0.8406494851623324, "eval_sts-dev_spearman_max": 0.8410840738663811, "step": 37000 }, { "epoch": 1.9560793847942708, "grad_norm": 50.16804885864258, "learning_rate": 5e-06, "loss": 1.0944, "step": 37010 }, { "epoch": 1.9566079120530642, "grad_norm": 58.452056884765625, "learning_rate": 5e-06, "loss": 1.0748, "step": 37020 }, { "epoch": 1.9571364393118575, "grad_norm": 43.09967041015625, "learning_rate": 5e-06, "loss": 1.2117, "step": 37030 }, { "epoch": 1.957664966570651, "grad_norm": 59.81379318237305, "learning_rate": 5e-06, "loss": 0.7529, "step": 37040 }, { "epoch": 1.9581934938294443, "grad_norm": 76.8800048828125, "learning_rate": 5e-06, "loss": 1.1838, "step": 37050 }, { "epoch": 1.9587220210882377, "grad_norm": 82.68061065673828, "learning_rate": 5e-06, "loss": 1.0911, "step": 37060 }, { "epoch": 1.959250548347031, "grad_norm": 35.03467559814453, "learning_rate": 5e-06, "loss": 1.1836, "step": 37070 }, { "epoch": 1.9597790756058244, "grad_norm": 54.89982604980469, "learning_rate": 5e-06, "loss": 1.2948, "step": 37080 }, { "epoch": 1.9603076028646176, "grad_norm": 38.87999725341797, "learning_rate": 5e-06, "loss": 1.4727, "step": 37090 }, { "epoch": 1.9608361301234112, "grad_norm": 32.81504821777344, "learning_rate": 5e-06, "loss": 1.3187, "step": 37100 }, { "epoch": 1.9613646573822043, "grad_norm": 68.10120391845703, "learning_rate": 5e-06, "loss": 1.167, "step": 37110 }, { "epoch": 1.961893184640998, "grad_norm": 87.36077880859375, "learning_rate": 5e-06, "loss": 0.9744, "step": 37120 }, { "epoch": 1.962421711899791, "grad_norm": 84.50756072998047, "learning_rate": 5e-06, "loss": 1.1851, "step": 37130 }, { "epoch": 1.9629502391585847, "grad_norm": 46.839820861816406, "learning_rate": 5e-06, "loss": 0.9715, "step": 37140 }, { "epoch": 1.9634787664173778, "grad_norm": 60.084110260009766, "learning_rate": 5e-06, "loss": 0.9356, "step": 37150 }, { "epoch": 1.9640072936761714, "grad_norm": 61.410789489746094, "learning_rate": 5e-06, "loss": 1.1153, "step": 37160 }, { "epoch": 1.9645358209349646, "grad_norm": 50.83837127685547, "learning_rate": 5e-06, "loss": 1.1331, "step": 37170 }, { "epoch": 1.9650643481937582, "grad_norm": 59.86146926879883, "learning_rate": 5e-06, "loss": 1.0135, "step": 37180 }, { "epoch": 1.9655928754525513, "grad_norm": 80.90714263916016, "learning_rate": 5e-06, "loss": 1.2782, "step": 37190 }, { "epoch": 1.966121402711345, "grad_norm": 76.65208435058594, "learning_rate": 5e-06, "loss": 0.9344, "step": 37200 }, { "epoch": 1.966649929970138, "grad_norm": 38.09241485595703, "learning_rate": 5e-06, "loss": 1.0012, "step": 37210 }, { "epoch": 1.9671784572289317, "grad_norm": 69.65997314453125, "learning_rate": 5e-06, "loss": 1.3649, "step": 37220 }, { "epoch": 1.9677069844877249, "grad_norm": 80.97835540771484, "learning_rate": 5e-06, "loss": 1.5144, "step": 37230 }, { "epoch": 1.9682355117465185, "grad_norm": 58.61711502075195, "learning_rate": 5e-06, "loss": 1.1432, "step": 37240 }, { "epoch": 1.9687640390053116, "grad_norm": 64.32734680175781, "learning_rate": 5e-06, "loss": 1.2262, "step": 37250 }, { "epoch": 1.9692925662641052, "grad_norm": 73.07118225097656, "learning_rate": 5e-06, "loss": 1.0686, "step": 37260 }, { "epoch": 1.9698210935228984, "grad_norm": 72.35934448242188, "learning_rate": 5e-06, "loss": 1.1807, "step": 37270 }, { "epoch": 1.970349620781692, "grad_norm": 43.337791442871094, "learning_rate": 5e-06, "loss": 1.0876, "step": 37280 }, { "epoch": 1.9708781480404851, "grad_norm": 60.53177261352539, "learning_rate": 5e-06, "loss": 1.0879, "step": 37290 }, { "epoch": 1.9714066752992787, "grad_norm": 75.4104232788086, "learning_rate": 5e-06, "loss": 1.3277, "step": 37300 }, { "epoch": 1.9719352025580719, "grad_norm": 59.92351150512695, "learning_rate": 5e-06, "loss": 1.304, "step": 37310 }, { "epoch": 1.9724637298168655, "grad_norm": 71.55609893798828, "learning_rate": 5e-06, "loss": 1.3843, "step": 37320 }, { "epoch": 1.9729922570756586, "grad_norm": 51.825931549072266, "learning_rate": 5e-06, "loss": 1.3937, "step": 37330 }, { "epoch": 1.973520784334452, "grad_norm": 72.15837860107422, "learning_rate": 5e-06, "loss": 1.0984, "step": 37340 }, { "epoch": 1.9740493115932454, "grad_norm": 42.74842834472656, "learning_rate": 5e-06, "loss": 1.0848, "step": 37350 }, { "epoch": 1.9745778388520387, "grad_norm": 50.120941162109375, "learning_rate": 5e-06, "loss": 1.084, "step": 37360 }, { "epoch": 1.9751063661108321, "grad_norm": 51.196746826171875, "learning_rate": 5e-06, "loss": 1.2268, "step": 37370 }, { "epoch": 1.9756348933696255, "grad_norm": 86.85008239746094, "learning_rate": 5e-06, "loss": 1.3481, "step": 37380 }, { "epoch": 1.9761634206284189, "grad_norm": 58.25742721557617, "learning_rate": 5e-06, "loss": 1.2808, "step": 37390 }, { "epoch": 1.9766919478872123, "grad_norm": 68.6895751953125, "learning_rate": 5e-06, "loss": 1.4644, "step": 37400 }, { "epoch": 1.9772204751460056, "grad_norm": 62.18275451660156, "learning_rate": 5e-06, "loss": 1.2382, "step": 37410 }, { "epoch": 1.977749002404799, "grad_norm": 62.776763916015625, "learning_rate": 5e-06, "loss": 1.1067, "step": 37420 }, { "epoch": 1.9782775296635924, "grad_norm": 91.94361877441406, "learning_rate": 5e-06, "loss": 1.586, "step": 37430 }, { "epoch": 1.9788060569223858, "grad_norm": 78.92806243896484, "learning_rate": 5e-06, "loss": 1.1311, "step": 37440 }, { "epoch": 1.9793345841811791, "grad_norm": 69.46902465820312, "learning_rate": 5e-06, "loss": 1.0332, "step": 37450 }, { "epoch": 1.9798631114399725, "grad_norm": 79.76998901367188, "learning_rate": 5e-06, "loss": 1.4379, "step": 37460 }, { "epoch": 1.9803916386987659, "grad_norm": 49.92424011230469, "learning_rate": 5e-06, "loss": 1.3925, "step": 37470 }, { "epoch": 1.9809201659575593, "grad_norm": 47.178306579589844, "learning_rate": 5e-06, "loss": 0.9263, "step": 37480 }, { "epoch": 1.9814486932163526, "grad_norm": 46.83830642700195, "learning_rate": 5e-06, "loss": 1.0779, "step": 37490 }, { "epoch": 1.981977220475146, "grad_norm": 48.86762619018555, "learning_rate": 5e-06, "loss": 0.9861, "step": 37500 } ], "logging_steps": 10, "max_steps": 37840, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }