{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.976312925460403, "global_step": 1870000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.973325366509464e-05, "loss": 5.9346, "step": 10000 }, { "epoch": 0.11, "learning_rate": 4.946650733018929e-05, "loss": 4.8171, "step": 20000 }, { "epoch": 0.16, "learning_rate": 4.919976099528393e-05, "loss": 4.378, "step": 30000 }, { "epoch": 0.21, "learning_rate": 4.893301466037857e-05, "loss": 4.0841, "step": 40000 }, { "epoch": 0.27, "learning_rate": 4.866626832547321e-05, "loss": 3.8807, "step": 50000 }, { "epoch": 0.32, "learning_rate": 4.839952199056785e-05, "loss": 3.7164, "step": 60000 }, { "epoch": 0.37, "learning_rate": 4.8132775655662495e-05, "loss": 3.584, "step": 70000 }, { "epoch": 0.43, "learning_rate": 4.786602932075713e-05, "loss": 3.4784, "step": 80000 }, { "epoch": 0.48, "learning_rate": 4.759928298585178e-05, "loss": 3.3842, "step": 90000 }, { "epoch": 0.53, "learning_rate": 4.733253665094642e-05, "loss": 3.3004, "step": 100000 }, { "epoch": 0.59, "learning_rate": 4.7065790316041056e-05, "loss": 3.2337, "step": 110000 }, { "epoch": 0.64, "learning_rate": 4.6799043981135706e-05, "loss": 3.1757, "step": 120000 }, { "epoch": 0.69, "learning_rate": 4.653229764623034e-05, "loss": 3.1198, "step": 130000 }, { "epoch": 0.75, "learning_rate": 4.626555131132499e-05, "loss": 3.0722, "step": 140000 }, { "epoch": 0.8, "learning_rate": 4.5998804976419624e-05, "loss": 3.0281, "step": 150000 }, { "epoch": 0.85, "learning_rate": 4.573205864151427e-05, "loss": 2.9954, "step": 160000 }, { "epoch": 0.91, "learning_rate": 4.546531230660891e-05, "loss": 2.9605, "step": 170000 }, { "epoch": 0.96, "learning_rate": 4.519856597170355e-05, "loss": 2.9286, "step": 180000 }, { "epoch": 1.01, "learning_rate": 4.493181963679819e-05, "loss": 2.8946, "step": 190000 }, { "epoch": 1.07, "learning_rate": 4.4665073301892835e-05, "loss": 2.8688, "step": 200000 }, { "epoch": 1.12, "learning_rate": 4.439832696698747e-05, "loss": 2.8424, "step": 210000 }, { "epoch": 1.17, "learning_rate": 4.4131580632082116e-05, "loss": 2.8178, "step": 220000 }, { "epoch": 1.23, "learning_rate": 4.386483429717676e-05, "loss": 2.7994, "step": 230000 }, { "epoch": 1.28, "learning_rate": 4.35980879622714e-05, "loss": 2.784, "step": 240000 }, { "epoch": 1.33, "learning_rate": 4.333134162736604e-05, "loss": 2.7612, "step": 250000 }, { "epoch": 1.39, "learning_rate": 4.3064595292460684e-05, "loss": 2.7435, "step": 260000 }, { "epoch": 1.44, "learning_rate": 4.279784895755533e-05, "loss": 2.7275, "step": 270000 }, { "epoch": 1.49, "learning_rate": 4.2531102622649964e-05, "loss": 2.7117, "step": 280000 }, { "epoch": 1.55, "learning_rate": 4.226435628774461e-05, "loss": 2.6908, "step": 290000 }, { "epoch": 1.6, "learning_rate": 4.199760995283925e-05, "loss": 2.6787, "step": 300000 }, { "epoch": 1.65, "learning_rate": 4.173086361793389e-05, "loss": 2.6626, "step": 310000 }, { "epoch": 1.71, "learning_rate": 4.146411728302853e-05, "loss": 2.6431, "step": 320000 }, { "epoch": 1.76, "learning_rate": 4.1197370948123176e-05, "loss": 2.6329, "step": 330000 }, { "epoch": 1.81, "learning_rate": 4.093062461321781e-05, "loss": 2.6195, "step": 340000 }, { "epoch": 1.87, "learning_rate": 4.066387827831246e-05, "loss": 2.6042, "step": 350000 }, { "epoch": 1.92, "learning_rate": 4.03971319434071e-05, "loss": 2.5967, "step": 360000 }, { "epoch": 1.97, "learning_rate": 4.0130385608501744e-05, "loss": 2.5843, "step": 370000 }, { "epoch": 2.03, "learning_rate": 3.986363927359638e-05, "loss": 2.5683, "step": 380000 }, { "epoch": 2.08, "learning_rate": 3.9596892938691025e-05, "loss": 2.5605, "step": 390000 }, { "epoch": 2.13, "learning_rate": 3.933014660378567e-05, "loss": 2.5515, "step": 400000 }, { "epoch": 2.19, "learning_rate": 3.9063400268880305e-05, "loss": 2.5406, "step": 410000 }, { "epoch": 2.24, "learning_rate": 3.879665393397495e-05, "loss": 2.5315, "step": 420000 }, { "epoch": 2.29, "learning_rate": 3.852990759906959e-05, "loss": 2.5272, "step": 430000 }, { "epoch": 2.35, "learning_rate": 3.826316126416423e-05, "loss": 2.5199, "step": 440000 }, { "epoch": 2.4, "learning_rate": 3.799641492925887e-05, "loss": 2.5061, "step": 450000 }, { "epoch": 2.45, "learning_rate": 3.772966859435352e-05, "loss": 2.5007, "step": 460000 }, { "epoch": 2.51, "learning_rate": 3.746292225944816e-05, "loss": 2.4908, "step": 470000 }, { "epoch": 2.56, "learning_rate": 3.71961759245428e-05, "loss": 2.4817, "step": 480000 }, { "epoch": 2.61, "learning_rate": 3.6929429589637434e-05, "loss": 2.4731, "step": 490000 }, { "epoch": 2.67, "learning_rate": 3.6662683254732085e-05, "loss": 2.4659, "step": 500000 }, { "epoch": 2.72, "learning_rate": 3.639593691982672e-05, "loss": 2.4596, "step": 510000 }, { "epoch": 2.77, "learning_rate": 3.6129190584921365e-05, "loss": 2.4506, "step": 520000 }, { "epoch": 2.83, "learning_rate": 3.586244425001601e-05, "loss": 2.4425, "step": 530000 }, { "epoch": 2.88, "learning_rate": 3.5595697915110646e-05, "loss": 2.4378, "step": 540000 }, { "epoch": 2.93, "learning_rate": 3.532895158020529e-05, "loss": 2.4301, "step": 550000 }, { "epoch": 2.99, "learning_rate": 3.506220524529993e-05, "loss": 2.4195, "step": 560000 }, { "epoch": 3.04, "learning_rate": 3.479545891039458e-05, "loss": 2.4108, "step": 570000 }, { "epoch": 3.09, "learning_rate": 3.4528712575489214e-05, "loss": 2.4003, "step": 580000 }, { "epoch": 3.15, "learning_rate": 3.426196624058385e-05, "loss": 2.3948, "step": 590000 }, { "epoch": 3.2, "learning_rate": 3.39952199056785e-05, "loss": 2.3911, "step": 600000 }, { "epoch": 3.25, "learning_rate": 3.372847357077314e-05, "loss": 2.3805, "step": 610000 }, { "epoch": 3.31, "learning_rate": 3.346172723586778e-05, "loss": 2.3719, "step": 620000 }, { "epoch": 3.36, "learning_rate": 3.3194980900962426e-05, "loss": 2.3685, "step": 630000 }, { "epoch": 3.41, "learning_rate": 3.292823456605706e-05, "loss": 2.3668, "step": 640000 }, { "epoch": 3.47, "learning_rate": 3.2661488231151706e-05, "loss": 2.3584, "step": 650000 }, { "epoch": 3.52, "learning_rate": 3.239474189624634e-05, "loss": 2.3538, "step": 660000 }, { "epoch": 3.57, "learning_rate": 3.212799556134099e-05, "loss": 2.3481, "step": 670000 }, { "epoch": 3.63, "learning_rate": 3.186124922643563e-05, "loss": 2.3495, "step": 680000 }, { "epoch": 3.68, "learning_rate": 3.159450289153027e-05, "loss": 2.3413, "step": 690000 }, { "epoch": 3.73, "learning_rate": 3.132775655662492e-05, "loss": 2.3325, "step": 700000 }, { "epoch": 3.79, "learning_rate": 3.1061010221719555e-05, "loss": 2.3269, "step": 710000 }, { "epoch": 3.84, "learning_rate": 3.07942638868142e-05, "loss": 2.3255, "step": 720000 }, { "epoch": 3.89, "learning_rate": 3.052751755190884e-05, "loss": 2.3168, "step": 730000 }, { "epoch": 3.95, "learning_rate": 3.0260771217003482e-05, "loss": 2.3179, "step": 740000 }, { "epoch": 4.0, "learning_rate": 2.999402488209812e-05, "loss": 2.3098, "step": 750000 }, { "epoch": 4.05, "learning_rate": 2.972727854719276e-05, "loss": 2.2928, "step": 760000 }, { "epoch": 4.11, "learning_rate": 2.9460532212287407e-05, "loss": 2.2911, "step": 770000 }, { "epoch": 4.16, "learning_rate": 2.9193785877382047e-05, "loss": 2.2906, "step": 780000 }, { "epoch": 4.21, "learning_rate": 2.8927039542476687e-05, "loss": 2.2824, "step": 790000 }, { "epoch": 4.27, "learning_rate": 2.866029320757133e-05, "loss": 2.2766, "step": 800000 }, { "epoch": 4.32, "learning_rate": 2.839354687266597e-05, "loss": 2.2722, "step": 810000 }, { "epoch": 4.37, "learning_rate": 2.812680053776061e-05, "loss": 2.2665, "step": 820000 }, { "epoch": 4.43, "learning_rate": 2.786005420285525e-05, "loss": 2.259, "step": 830000 }, { "epoch": 4.48, "learning_rate": 2.7593307867949895e-05, "loss": 2.2584, "step": 840000 }, { "epoch": 4.53, "learning_rate": 2.7326561533044536e-05, "loss": 2.2524, "step": 850000 }, { "epoch": 4.59, "learning_rate": 2.7059815198139176e-05, "loss": 2.2536, "step": 860000 }, { "epoch": 4.64, "learning_rate": 2.6793068863233823e-05, "loss": 2.2446, "step": 870000 }, { "epoch": 4.69, "learning_rate": 2.6526322528328463e-05, "loss": 2.2439, "step": 880000 }, { "epoch": 4.75, "learning_rate": 2.6259576193423104e-05, "loss": 2.2389, "step": 890000 }, { "epoch": 4.8, "learning_rate": 2.5992829858517747e-05, "loss": 2.2362, "step": 900000 }, { "epoch": 4.85, "learning_rate": 2.5726083523612388e-05, "loss": 2.2313, "step": 910000 }, { "epoch": 4.91, "learning_rate": 2.5459337188707028e-05, "loss": 2.2283, "step": 920000 }, { "epoch": 4.96, "learning_rate": 2.5192590853801668e-05, "loss": 2.2224, "step": 930000 }, { "epoch": 5.01, "learning_rate": 2.4925844518896312e-05, "loss": 2.2175, "step": 940000 }, { "epoch": 5.07, "learning_rate": 2.4659098183990952e-05, "loss": 2.2067, "step": 950000 }, { "epoch": 5.12, "learning_rate": 2.4392351849085596e-05, "loss": 2.2029, "step": 960000 }, { "epoch": 5.17, "learning_rate": 2.4125605514180236e-05, "loss": 2.2014, "step": 970000 }, { "epoch": 5.23, "learning_rate": 2.385885917927488e-05, "loss": 2.2002, "step": 980000 }, { "epoch": 5.28, "learning_rate": 2.3592112844369517e-05, "loss": 2.195, "step": 990000 }, { "epoch": 5.33, "learning_rate": 2.332536650946416e-05, "loss": 2.1898, "step": 1000000 }, { "epoch": 5.39, "learning_rate": 2.3058620174558804e-05, "loss": 2.1864, "step": 1010000 }, { "epoch": 5.44, "learning_rate": 2.2791873839653444e-05, "loss": 2.1833, "step": 1020000 }, { "epoch": 5.49, "learning_rate": 2.2525127504748088e-05, "loss": 2.1799, "step": 1030000 }, { "epoch": 5.55, "learning_rate": 2.2258381169842725e-05, "loss": 2.1785, "step": 1040000 }, { "epoch": 5.6, "learning_rate": 2.199163483493737e-05, "loss": 2.1756, "step": 1050000 }, { "epoch": 5.66, "learning_rate": 2.1724888500032012e-05, "loss": 2.1726, "step": 1060000 }, { "epoch": 5.71, "learning_rate": 2.1458142165126653e-05, "loss": 2.1664, "step": 1070000 }, { "epoch": 5.76, "learning_rate": 2.1191395830221293e-05, "loss": 2.1651, "step": 1080000 }, { "epoch": 5.82, "learning_rate": 2.0924649495315933e-05, "loss": 2.1577, "step": 1090000 }, { "epoch": 5.87, "learning_rate": 2.0657903160410577e-05, "loss": 2.1472, "step": 1100000 }, { "epoch": 5.92, "learning_rate": 2.039115682550522e-05, "loss": 2.1458, "step": 1110000 }, { "epoch": 5.98, "learning_rate": 2.012441049059986e-05, "loss": 2.1432, "step": 1120000 }, { "epoch": 6.03, "learning_rate": 1.98576641556945e-05, "loss": 2.134, "step": 1130000 }, { "epoch": 6.08, "learning_rate": 1.959091782078914e-05, "loss": 2.1337, "step": 1140000 }, { "epoch": 6.14, "learning_rate": 1.9324171485883785e-05, "loss": 2.1337, "step": 1150000 }, { "epoch": 6.19, "learning_rate": 1.9057425150978425e-05, "loss": 2.128, "step": 1160000 }, { "epoch": 6.24, "learning_rate": 1.879067881607307e-05, "loss": 2.1262, "step": 1170000 }, { "epoch": 6.3, "learning_rate": 1.852393248116771e-05, "loss": 2.1209, "step": 1180000 }, { "epoch": 6.35, "learning_rate": 1.825718614626235e-05, "loss": 2.1178, "step": 1190000 }, { "epoch": 6.4, "learning_rate": 1.7990439811356993e-05, "loss": 2.1179, "step": 1200000 }, { "epoch": 6.46, "learning_rate": 1.7723693476451634e-05, "loss": 2.1123, "step": 1210000 }, { "epoch": 6.51, "learning_rate": 1.7456947141546277e-05, "loss": 2.1098, "step": 1220000 }, { "epoch": 6.56, "learning_rate": 1.7190200806640918e-05, "loss": 2.1115, "step": 1230000 }, { "epoch": 6.62, "learning_rate": 1.6923454471735558e-05, "loss": 2.1068, "step": 1240000 }, { "epoch": 6.67, "learning_rate": 1.66567081368302e-05, "loss": 2.1002, "step": 1250000 }, { "epoch": 6.72, "learning_rate": 1.6389961801924842e-05, "loss": 2.0962, "step": 1260000 }, { "epoch": 6.78, "learning_rate": 1.6123215467019486e-05, "loss": 2.0961, "step": 1270000 }, { "epoch": 6.83, "learning_rate": 1.5856469132114126e-05, "loss": 2.0911, "step": 1280000 }, { "epoch": 6.88, "learning_rate": 1.5589722797208766e-05, "loss": 2.0916, "step": 1290000 }, { "epoch": 6.94, "learning_rate": 1.532297646230341e-05, "loss": 2.0854, "step": 1300000 }, { "epoch": 6.99, "learning_rate": 1.505623012739805e-05, "loss": 2.0846, "step": 1310000 }, { "epoch": 7.04, "learning_rate": 1.4789483792492692e-05, "loss": 2.072, "step": 1320000 }, { "epoch": 7.1, "learning_rate": 1.4522737457587332e-05, "loss": 2.0691, "step": 1330000 }, { "epoch": 7.15, "learning_rate": 1.4255991122681974e-05, "loss": 2.072, "step": 1340000 }, { "epoch": 7.2, "learning_rate": 1.3989244787776618e-05, "loss": 2.0669, "step": 1350000 }, { "epoch": 7.26, "learning_rate": 1.3722498452871257e-05, "loss": 2.0646, "step": 1360000 }, { "epoch": 7.31, "learning_rate": 1.34557521179659e-05, "loss": 2.0573, "step": 1370000 }, { "epoch": 7.36, "learning_rate": 1.318900578306054e-05, "loss": 2.0487, "step": 1380000 }, { "epoch": 7.42, "learning_rate": 1.2922259448155183e-05, "loss": 2.0529, "step": 1390000 }, { "epoch": 7.47, "learning_rate": 1.2655513113249826e-05, "loss": 2.0468, "step": 1400000 }, { "epoch": 7.52, "learning_rate": 1.2388766778344465e-05, "loss": 2.0455, "step": 1410000 }, { "epoch": 7.58, "learning_rate": 1.2122020443439109e-05, "loss": 2.046, "step": 1420000 }, { "epoch": 7.63, "learning_rate": 1.1855274108533749e-05, "loss": 2.0398, "step": 1430000 }, { "epoch": 7.68, "learning_rate": 1.158852777362839e-05, "loss": 2.0391, "step": 1440000 }, { "epoch": 7.74, "learning_rate": 1.1321781438723033e-05, "loss": 2.0345, "step": 1450000 }, { "epoch": 7.79, "learning_rate": 1.1055035103817673e-05, "loss": 2.038, "step": 1460000 }, { "epoch": 7.84, "learning_rate": 1.0788288768912317e-05, "loss": 2.032, "step": 1470000 }, { "epoch": 7.9, "learning_rate": 1.0521542434006957e-05, "loss": 2.0271, "step": 1480000 }, { "epoch": 7.95, "learning_rate": 1.0254796099101599e-05, "loss": 2.0261, "step": 1490000 }, { "epoch": 8.0, "learning_rate": 9.988049764196241e-06, "loss": 2.0235, "step": 1500000 }, { "epoch": 8.06, "learning_rate": 9.721303429290881e-06, "loss": 2.0173, "step": 1510000 }, { "epoch": 8.11, "learning_rate": 9.454557094385523e-06, "loss": 2.0178, "step": 1520000 }, { "epoch": 8.16, "learning_rate": 9.187810759480165e-06, "loss": 2.0116, "step": 1530000 }, { "epoch": 8.22, "learning_rate": 8.921064424574807e-06, "loss": 2.0099, "step": 1540000 }, { "epoch": 8.27, "learning_rate": 8.654318089669448e-06, "loss": 2.0105, "step": 1550000 }, { "epoch": 8.32, "learning_rate": 8.38757175476409e-06, "loss": 2.0033, "step": 1560000 }, { "epoch": 8.38, "learning_rate": 8.120825419858732e-06, "loss": 2.0033, "step": 1570000 }, { "epoch": 8.43, "learning_rate": 7.854079084953374e-06, "loss": 2.0, "step": 1580000 }, { "epoch": 8.48, "learning_rate": 7.5873327500480155e-06, "loss": 1.9954, "step": 1590000 }, { "epoch": 8.54, "learning_rate": 7.320586415142657e-06, "loss": 1.9964, "step": 1600000 }, { "epoch": 8.59, "learning_rate": 7.053840080237298e-06, "loss": 1.9995, "step": 1610000 }, { "epoch": 8.64, "learning_rate": 6.787093745331939e-06, "loss": 1.9878, "step": 1620000 }, { "epoch": 8.7, "learning_rate": 6.52034741042658e-06, "loss": 1.988, "step": 1630000 }, { "epoch": 8.75, "learning_rate": 6.253601075521223e-06, "loss": 1.9861, "step": 1640000 }, { "epoch": 8.8, "learning_rate": 5.986854740615865e-06, "loss": 1.9828, "step": 1650000 }, { "epoch": 8.86, "learning_rate": 5.720108405710506e-06, "loss": 1.9787, "step": 1660000 }, { "epoch": 8.91, "learning_rate": 5.453362070805147e-06, "loss": 1.9794, "step": 1670000 }, { "epoch": 8.96, "learning_rate": 5.186615735899789e-06, "loss": 1.9756, "step": 1680000 }, { "epoch": 9.02, "learning_rate": 4.91986940099443e-06, "loss": 1.9756, "step": 1690000 }, { "epoch": 9.07, "learning_rate": 4.653123066089072e-06, "loss": 1.9676, "step": 1700000 }, { "epoch": 9.12, "learning_rate": 4.386376731183714e-06, "loss": 1.967, "step": 1710000 }, { "epoch": 9.18, "learning_rate": 4.119630396278355e-06, "loss": 1.9656, "step": 1720000 }, { "epoch": 9.23, "learning_rate": 3.8528840613729966e-06, "loss": 1.9627, "step": 1730000 }, { "epoch": 9.28, "learning_rate": 3.5861377264676385e-06, "loss": 1.964, "step": 1740000 }, { "epoch": 9.34, "learning_rate": 3.31939139156228e-06, "loss": 1.9646, "step": 1750000 }, { "epoch": 9.39, "learning_rate": 3.0526450566569217e-06, "loss": 1.9591, "step": 1760000 }, { "epoch": 9.44, "learning_rate": 2.7858987217515632e-06, "loss": 1.9623, "step": 1770000 }, { "epoch": 9.5, "learning_rate": 2.519152386846205e-06, "loss": 1.9544, "step": 1780000 }, { "epoch": 9.55, "learning_rate": 2.2524060519408464e-06, "loss": 1.9572, "step": 1790000 }, { "epoch": 9.6, "learning_rate": 1.985659717035488e-06, "loss": 1.9545, "step": 1800000 }, { "epoch": 9.66, "learning_rate": 1.7189133821301297e-06, "loss": 1.953, "step": 1810000 }, { "epoch": 9.71, "learning_rate": 1.4521670472247713e-06, "loss": 1.9523, "step": 1820000 }, { "epoch": 9.76, "learning_rate": 1.1854207123194128e-06, "loss": 1.9528, "step": 1830000 }, { "epoch": 9.82, "learning_rate": 9.186743774140545e-07, "loss": 1.9482, "step": 1840000 }, { "epoch": 9.87, "learning_rate": 6.519280425086959e-07, "loss": 1.9511, "step": 1850000 }, { "epoch": 9.92, "learning_rate": 3.8518170760333756e-07, "loss": 1.947, "step": 1860000 }, { "epoch": 9.98, "learning_rate": 1.1843537269797913e-07, "loss": 1.9437, "step": 1870000 } ], "max_steps": 1874440, "num_train_epochs": 10, "total_flos": 9.135177187546945e+18, "trial_name": null, "trial_params": null }