{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "global_step": 5930, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.995951417004049e-06, "loss": 4.0911, "step": 10 }, { "epoch": 0.01, "learning_rate": 9.991902834008098e-06, "loss": 2.8012, "step": 20 }, { "epoch": 0.02, "learning_rate": 9.987854251012146e-06, "loss": 2.389, "step": 30 }, { "epoch": 0.03, "learning_rate": 9.983805668016194e-06, "loss": 2.1658, "step": 40 }, { "epoch": 0.03, "learning_rate": 9.979757085020244e-06, "loss": 2.026, "step": 50 }, { "epoch": 0.04, "learning_rate": 9.975708502024292e-06, "loss": 1.7846, "step": 60 }, { "epoch": 0.05, "learning_rate": 9.971659919028341e-06, "loss": 1.703, "step": 70 }, { "epoch": 0.05, "learning_rate": 9.96761133603239e-06, "loss": 1.5691, "step": 80 }, { "epoch": 0.06, "learning_rate": 9.963562753036437e-06, "loss": 1.4297, "step": 90 }, { "epoch": 0.07, "learning_rate": 9.959514170040487e-06, "loss": 1.4341, "step": 100 }, { "epoch": 0.07, "learning_rate": 9.955465587044535e-06, "loss": 1.3304, "step": 110 }, { "epoch": 0.08, "learning_rate": 9.951417004048583e-06, "loss": 1.3988, "step": 120 }, { "epoch": 0.09, "learning_rate": 9.947368421052632e-06, "loss": 1.3272, "step": 130 }, { "epoch": 0.09, "learning_rate": 9.943319838056682e-06, "loss": 1.2999, "step": 140 }, { "epoch": 0.1, "learning_rate": 9.93927125506073e-06, "loss": 1.2997, "step": 150 }, { "epoch": 0.11, "learning_rate": 9.935222672064778e-06, "loss": 1.3704, "step": 160 }, { "epoch": 0.11, "learning_rate": 9.931174089068828e-06, "loss": 1.3313, "step": 170 }, { "epoch": 0.12, "learning_rate": 9.927125506072876e-06, "loss": 1.3554, "step": 180 }, { "epoch": 0.13, "learning_rate": 9.923076923076923e-06, "loss": 1.41, "step": 190 }, { "epoch": 0.13, "learning_rate": 9.919028340080973e-06, "loss": 1.3005, "step": 200 }, { "epoch": 0.14, "learning_rate": 9.914979757085021e-06, "loss": 1.2311, "step": 210 }, { "epoch": 0.15, "learning_rate": 9.910931174089069e-06, "loss": 1.3162, "step": 220 }, { "epoch": 0.16, "learning_rate": 9.906882591093119e-06, "loss": 1.2895, "step": 230 }, { "epoch": 0.16, "learning_rate": 9.902834008097167e-06, "loss": 1.2812, "step": 240 }, { "epoch": 0.17, "learning_rate": 9.898785425101216e-06, "loss": 1.2682, "step": 250 }, { "epoch": 0.18, "learning_rate": 9.894736842105264e-06, "loss": 1.2992, "step": 260 }, { "epoch": 0.18, "learning_rate": 9.890688259109312e-06, "loss": 1.2832, "step": 270 }, { "epoch": 0.19, "learning_rate": 9.886639676113362e-06, "loss": 1.2912, "step": 280 }, { "epoch": 0.2, "learning_rate": 9.88259109311741e-06, "loss": 1.2949, "step": 290 }, { "epoch": 0.2, "learning_rate": 9.878542510121458e-06, "loss": 1.2557, "step": 300 }, { "epoch": 0.21, "learning_rate": 9.874493927125507e-06, "loss": 1.3119, "step": 310 }, { "epoch": 0.22, "learning_rate": 9.870445344129555e-06, "loss": 1.3092, "step": 320 }, { "epoch": 0.22, "learning_rate": 9.866396761133603e-06, "loss": 1.2441, "step": 330 }, { "epoch": 0.23, "learning_rate": 9.862348178137653e-06, "loss": 1.3288, "step": 340 }, { "epoch": 0.24, "learning_rate": 9.8582995951417e-06, "loss": 1.2382, "step": 350 }, { "epoch": 0.24, "learning_rate": 9.85425101214575e-06, "loss": 1.2819, "step": 360 }, { "epoch": 0.25, "learning_rate": 9.850202429149798e-06, "loss": 1.3011, "step": 370 }, { "epoch": 0.26, "learning_rate": 9.846153846153846e-06, "loss": 1.2421, "step": 380 }, { "epoch": 0.26, "learning_rate": 9.842105263157896e-06, "loss": 1.2071, "step": 390 }, { "epoch": 0.27, "learning_rate": 9.838056680161945e-06, "loss": 1.2838, "step": 400 }, { "epoch": 0.28, "learning_rate": 9.834008097165993e-06, "loss": 1.298, "step": 410 }, { "epoch": 0.28, "learning_rate": 9.829959514170041e-06, "loss": 1.2867, "step": 420 }, { "epoch": 0.29, "learning_rate": 9.825910931174091e-06, "loss": 1.1868, "step": 430 }, { "epoch": 0.3, "learning_rate": 9.821862348178139e-06, "loss": 1.2263, "step": 440 }, { "epoch": 0.3, "learning_rate": 9.817813765182187e-06, "loss": 1.2701, "step": 450 }, { "epoch": 0.31, "learning_rate": 9.813765182186236e-06, "loss": 1.2542, "step": 460 }, { "epoch": 0.32, "learning_rate": 9.809716599190284e-06, "loss": 1.2833, "step": 470 }, { "epoch": 0.32, "learning_rate": 9.805668016194332e-06, "loss": 1.2109, "step": 480 }, { "epoch": 0.33, "learning_rate": 9.801619433198382e-06, "loss": 1.2657, "step": 490 }, { "epoch": 0.34, "learning_rate": 9.79757085020243e-06, "loss": 1.2587, "step": 500 }, { "epoch": 0.34, "learning_rate": 9.79352226720648e-06, "loss": 1.2647, "step": 510 }, { "epoch": 0.35, "learning_rate": 9.789473684210527e-06, "loss": 1.1917, "step": 520 }, { "epoch": 0.36, "learning_rate": 9.785425101214575e-06, "loss": 1.2281, "step": 530 }, { "epoch": 0.36, "learning_rate": 9.781376518218625e-06, "loss": 1.2677, "step": 540 }, { "epoch": 0.37, "learning_rate": 9.777327935222673e-06, "loss": 1.2299, "step": 550 }, { "epoch": 0.38, "learning_rate": 9.77327935222672e-06, "loss": 1.2524, "step": 560 }, { "epoch": 0.38, "learning_rate": 9.76923076923077e-06, "loss": 1.2902, "step": 570 }, { "epoch": 0.39, "learning_rate": 9.765182186234818e-06, "loss": 1.2603, "step": 580 }, { "epoch": 0.4, "learning_rate": 9.761133603238866e-06, "loss": 1.2572, "step": 590 }, { "epoch": 0.4, "learning_rate": 9.757085020242916e-06, "loss": 1.2223, "step": 600 }, { "epoch": 0.41, "learning_rate": 9.753036437246964e-06, "loss": 1.2284, "step": 610 }, { "epoch": 0.42, "learning_rate": 9.748987854251012e-06, "loss": 1.2331, "step": 620 }, { "epoch": 0.42, "learning_rate": 9.744939271255061e-06, "loss": 1.2601, "step": 630 }, { "epoch": 0.43, "learning_rate": 9.74089068825911e-06, "loss": 1.195, "step": 640 }, { "epoch": 0.44, "learning_rate": 9.736842105263159e-06, "loss": 1.2176, "step": 650 }, { "epoch": 0.45, "learning_rate": 9.732793522267207e-06, "loss": 1.2684, "step": 660 }, { "epoch": 0.45, "learning_rate": 9.728744939271257e-06, "loss": 1.2575, "step": 670 }, { "epoch": 0.46, "learning_rate": 9.724696356275305e-06, "loss": 1.2187, "step": 680 }, { "epoch": 0.47, "learning_rate": 9.720647773279354e-06, "loss": 1.2184, "step": 690 }, { "epoch": 0.47, "learning_rate": 9.716599190283402e-06, "loss": 1.282, "step": 700 }, { "epoch": 0.48, "learning_rate": 9.71255060728745e-06, "loss": 1.2464, "step": 710 }, { "epoch": 0.49, "learning_rate": 9.7085020242915e-06, "loss": 1.2041, "step": 720 }, { "epoch": 0.49, "learning_rate": 9.704453441295548e-06, "loss": 1.26, "step": 730 }, { "epoch": 0.5, "learning_rate": 9.700404858299596e-06, "loss": 1.2401, "step": 740 }, { "epoch": 0.51, "learning_rate": 9.696356275303645e-06, "loss": 1.2289, "step": 750 }, { "epoch": 0.51, "learning_rate": 9.692307692307693e-06, "loss": 1.2587, "step": 760 }, { "epoch": 0.52, "learning_rate": 9.688259109311741e-06, "loss": 1.2078, "step": 770 }, { "epoch": 0.53, "learning_rate": 9.68421052631579e-06, "loss": 1.192, "step": 780 }, { "epoch": 0.53, "learning_rate": 9.680161943319839e-06, "loss": 1.2981, "step": 790 }, { "epoch": 0.54, "learning_rate": 9.676113360323888e-06, "loss": 1.1942, "step": 800 }, { "epoch": 0.55, "learning_rate": 9.672064777327936e-06, "loss": 1.247, "step": 810 }, { "epoch": 0.55, "learning_rate": 9.668016194331984e-06, "loss": 1.1902, "step": 820 }, { "epoch": 0.56, "learning_rate": 9.663967611336034e-06, "loss": 1.2271, "step": 830 }, { "epoch": 0.57, "learning_rate": 9.659919028340082e-06, "loss": 1.2407, "step": 840 }, { "epoch": 0.57, "learning_rate": 9.65587044534413e-06, "loss": 1.2067, "step": 850 }, { "epoch": 0.58, "learning_rate": 9.65182186234818e-06, "loss": 1.2284, "step": 860 }, { "epoch": 0.59, "learning_rate": 9.647773279352227e-06, "loss": 1.2676, "step": 870 }, { "epoch": 0.59, "learning_rate": 9.643724696356275e-06, "loss": 1.2542, "step": 880 }, { "epoch": 0.6, "learning_rate": 9.639676113360325e-06, "loss": 1.2364, "step": 890 }, { "epoch": 0.61, "learning_rate": 9.635627530364373e-06, "loss": 1.3608, "step": 900 }, { "epoch": 0.61, "learning_rate": 9.63157894736842e-06, "loss": 1.2739, "step": 910 }, { "epoch": 0.62, "learning_rate": 9.62753036437247e-06, "loss": 1.1984, "step": 920 }, { "epoch": 0.63, "learning_rate": 9.62348178137652e-06, "loss": 1.1875, "step": 930 }, { "epoch": 0.63, "learning_rate": 9.619433198380568e-06, "loss": 1.2447, "step": 940 }, { "epoch": 0.64, "learning_rate": 9.615384615384616e-06, "loss": 1.26, "step": 950 }, { "epoch": 0.65, "learning_rate": 9.611336032388665e-06, "loss": 1.2059, "step": 960 }, { "epoch": 0.65, "learning_rate": 9.607287449392713e-06, "loss": 1.233, "step": 970 }, { "epoch": 0.66, "learning_rate": 9.603238866396763e-06, "loss": 1.244, "step": 980 }, { "epoch": 0.67, "learning_rate": 9.59919028340081e-06, "loss": 1.2613, "step": 990 }, { "epoch": 0.67, "learning_rate": 9.595141700404859e-06, "loss": 1.2274, "step": 1000 }, { "epoch": 0.68, "learning_rate": 9.591093117408908e-06, "loss": 1.2727, "step": 1010 }, { "epoch": 0.69, "learning_rate": 9.587044534412956e-06, "loss": 1.2834, "step": 1020 }, { "epoch": 0.69, "learning_rate": 9.582995951417004e-06, "loss": 1.259, "step": 1030 }, { "epoch": 0.7, "learning_rate": 9.578947368421054e-06, "loss": 1.2742, "step": 1040 }, { "epoch": 0.71, "learning_rate": 9.574898785425102e-06, "loss": 1.2658, "step": 1050 }, { "epoch": 0.72, "learning_rate": 9.57085020242915e-06, "loss": 1.2203, "step": 1060 }, { "epoch": 0.72, "learning_rate": 9.5668016194332e-06, "loss": 1.1757, "step": 1070 }, { "epoch": 0.73, "learning_rate": 9.562753036437247e-06, "loss": 1.2296, "step": 1080 }, { "epoch": 0.74, "learning_rate": 9.558704453441297e-06, "loss": 1.2714, "step": 1090 }, { "epoch": 0.74, "learning_rate": 9.554655870445345e-06, "loss": 1.2265, "step": 1100 }, { "epoch": 0.75, "learning_rate": 9.550607287449393e-06, "loss": 1.1984, "step": 1110 }, { "epoch": 0.76, "learning_rate": 9.546558704453442e-06, "loss": 1.2706, "step": 1120 }, { "epoch": 0.76, "learning_rate": 9.54251012145749e-06, "loss": 1.2611, "step": 1130 }, { "epoch": 0.77, "learning_rate": 9.538461538461538e-06, "loss": 1.2584, "step": 1140 }, { "epoch": 0.78, "learning_rate": 9.534412955465588e-06, "loss": 1.2849, "step": 1150 }, { "epoch": 0.78, "learning_rate": 9.530364372469636e-06, "loss": 1.2254, "step": 1160 }, { "epoch": 0.79, "learning_rate": 9.526315789473686e-06, "loss": 1.2698, "step": 1170 }, { "epoch": 0.8, "learning_rate": 9.522267206477733e-06, "loss": 1.2037, "step": 1180 }, { "epoch": 0.8, "learning_rate": 9.518218623481783e-06, "loss": 1.2408, "step": 1190 }, { "epoch": 0.81, "learning_rate": 9.514170040485831e-06, "loss": 1.2116, "step": 1200 }, { "epoch": 0.82, "learning_rate": 9.510121457489879e-06, "loss": 1.2393, "step": 1210 }, { "epoch": 0.82, "learning_rate": 9.506072874493929e-06, "loss": 1.2177, "step": 1220 }, { "epoch": 0.83, "learning_rate": 9.502024291497977e-06, "loss": 1.23, "step": 1230 }, { "epoch": 0.84, "learning_rate": 9.497975708502026e-06, "loss": 1.2245, "step": 1240 }, { "epoch": 0.84, "learning_rate": 9.493927125506074e-06, "loss": 1.2602, "step": 1250 }, { "epoch": 0.85, "learning_rate": 9.489878542510122e-06, "loss": 1.2017, "step": 1260 }, { "epoch": 0.86, "learning_rate": 9.485829959514172e-06, "loss": 1.2281, "step": 1270 }, { "epoch": 0.86, "learning_rate": 9.48178137651822e-06, "loss": 1.2808, "step": 1280 }, { "epoch": 0.87, "learning_rate": 9.477732793522268e-06, "loss": 1.2017, "step": 1290 }, { "epoch": 0.88, "learning_rate": 9.473684210526317e-06, "loss": 1.2557, "step": 1300 }, { "epoch": 0.88, "learning_rate": 9.469635627530365e-06, "loss": 1.2952, "step": 1310 }, { "epoch": 0.89, "learning_rate": 9.465587044534413e-06, "loss": 1.2477, "step": 1320 }, { "epoch": 0.9, "learning_rate": 9.461538461538463e-06, "loss": 1.2397, "step": 1330 }, { "epoch": 0.9, "learning_rate": 9.45748987854251e-06, "loss": 1.1556, "step": 1340 }, { "epoch": 0.91, "learning_rate": 9.453441295546559e-06, "loss": 1.217, "step": 1350 }, { "epoch": 0.92, "learning_rate": 9.449392712550608e-06, "loss": 1.2725, "step": 1360 }, { "epoch": 0.92, "learning_rate": 9.445344129554656e-06, "loss": 1.2079, "step": 1370 }, { "epoch": 0.93, "learning_rate": 9.441295546558706e-06, "loss": 1.2646, "step": 1380 }, { "epoch": 0.94, "learning_rate": 9.437246963562754e-06, "loss": 1.2034, "step": 1390 }, { "epoch": 0.94, "learning_rate": 9.433198380566802e-06, "loss": 1.2442, "step": 1400 }, { "epoch": 0.95, "learning_rate": 9.429149797570851e-06, "loss": 1.2463, "step": 1410 }, { "epoch": 0.96, "learning_rate": 9.4251012145749e-06, "loss": 1.2257, "step": 1420 }, { "epoch": 0.96, "learning_rate": 9.421052631578947e-06, "loss": 1.2969, "step": 1430 }, { "epoch": 0.97, "learning_rate": 9.417004048582997e-06, "loss": 1.2288, "step": 1440 }, { "epoch": 0.98, "learning_rate": 9.412955465587045e-06, "loss": 1.2307, "step": 1450 }, { "epoch": 0.98, "learning_rate": 9.408906882591094e-06, "loss": 1.2101, "step": 1460 }, { "epoch": 0.99, "learning_rate": 9.404858299595142e-06, "loss": 1.2009, "step": 1470 }, { "epoch": 1.0, "learning_rate": 9.400809716599192e-06, "loss": 1.2698, "step": 1480 }, { "epoch": 1.0, "eval_loss": 1.1532880067825317, "eval_runtime": 11.4194, "eval_samples_per_second": 96.327, "eval_steps_per_second": 12.085, "step": 1482 }, { "epoch": 1.01, "learning_rate": 9.39676113360324e-06, "loss": 1.1808, "step": 1490 }, { "epoch": 1.01, "learning_rate": 9.392712550607288e-06, "loss": 1.1629, "step": 1500 }, { "epoch": 1.02, "learning_rate": 9.388663967611337e-06, "loss": 1.1656, "step": 1510 }, { "epoch": 1.03, "learning_rate": 9.384615384615385e-06, "loss": 1.1163, "step": 1520 }, { "epoch": 1.03, "learning_rate": 9.380566801619435e-06, "loss": 1.1403, "step": 1530 }, { "epoch": 1.04, "learning_rate": 9.376518218623483e-06, "loss": 1.122, "step": 1540 }, { "epoch": 1.05, "learning_rate": 9.37246963562753e-06, "loss": 1.1426, "step": 1550 }, { "epoch": 1.05, "learning_rate": 9.36842105263158e-06, "loss": 1.1142, "step": 1560 }, { "epoch": 1.06, "learning_rate": 9.364372469635628e-06, "loss": 1.1143, "step": 1570 }, { "epoch": 1.07, "learning_rate": 9.360323886639676e-06, "loss": 1.1798, "step": 1580 }, { "epoch": 1.07, "learning_rate": 9.356275303643726e-06, "loss": 1.1575, "step": 1590 }, { "epoch": 1.08, "learning_rate": 9.352226720647774e-06, "loss": 1.1459, "step": 1600 }, { "epoch": 1.09, "learning_rate": 9.348178137651822e-06, "loss": 1.199, "step": 1610 }, { "epoch": 1.09, "learning_rate": 9.344129554655871e-06, "loss": 1.1568, "step": 1620 }, { "epoch": 1.1, "learning_rate": 9.34008097165992e-06, "loss": 1.1577, "step": 1630 }, { "epoch": 1.11, "learning_rate": 9.336032388663967e-06, "loss": 1.0549, "step": 1640 }, { "epoch": 1.11, "learning_rate": 9.331983805668017e-06, "loss": 1.1889, "step": 1650 }, { "epoch": 1.12, "learning_rate": 9.327935222672065e-06, "loss": 1.1355, "step": 1660 }, { "epoch": 1.13, "learning_rate": 9.323886639676115e-06, "loss": 1.1725, "step": 1670 }, { "epoch": 1.13, "learning_rate": 9.319838056680162e-06, "loss": 1.1096, "step": 1680 }, { "epoch": 1.14, "learning_rate": 9.315789473684212e-06, "loss": 1.2392, "step": 1690 }, { "epoch": 1.15, "learning_rate": 9.31174089068826e-06, "loss": 1.1508, "step": 1700 }, { "epoch": 1.15, "learning_rate": 9.30769230769231e-06, "loss": 1.1437, "step": 1710 }, { "epoch": 1.16, "learning_rate": 9.303643724696358e-06, "loss": 1.204, "step": 1720 }, { "epoch": 1.17, "learning_rate": 9.299595141700406e-06, "loss": 1.1795, "step": 1730 }, { "epoch": 1.17, "learning_rate": 9.295546558704455e-06, "loss": 1.112, "step": 1740 }, { "epoch": 1.18, "learning_rate": 9.291497975708503e-06, "loss": 1.2072, "step": 1750 }, { "epoch": 1.19, "learning_rate": 9.287449392712551e-06, "loss": 1.1518, "step": 1760 }, { "epoch": 1.19, "learning_rate": 9.2834008097166e-06, "loss": 1.1479, "step": 1770 }, { "epoch": 1.2, "learning_rate": 9.279352226720649e-06, "loss": 1.1183, "step": 1780 }, { "epoch": 1.21, "learning_rate": 9.275303643724697e-06, "loss": 1.1493, "step": 1790 }, { "epoch": 1.21, "learning_rate": 9.271255060728746e-06, "loss": 1.1603, "step": 1800 }, { "epoch": 1.22, "learning_rate": 9.267206477732794e-06, "loss": 1.186, "step": 1810 }, { "epoch": 1.23, "learning_rate": 9.263157894736844e-06, "loss": 1.1857, "step": 1820 }, { "epoch": 1.23, "learning_rate": 9.259109311740892e-06, "loss": 1.1957, "step": 1830 }, { "epoch": 1.24, "learning_rate": 9.25506072874494e-06, "loss": 1.1102, "step": 1840 }, { "epoch": 1.25, "learning_rate": 9.25101214574899e-06, "loss": 1.1439, "step": 1850 }, { "epoch": 1.25, "learning_rate": 9.246963562753037e-06, "loss": 1.1252, "step": 1860 }, { "epoch": 1.26, "learning_rate": 9.242914979757085e-06, "loss": 1.1238, "step": 1870 }, { "epoch": 1.27, "learning_rate": 9.238866396761135e-06, "loss": 1.135, "step": 1880 }, { "epoch": 1.27, "learning_rate": 9.234817813765183e-06, "loss": 1.1542, "step": 1890 }, { "epoch": 1.28, "learning_rate": 9.23076923076923e-06, "loss": 1.1481, "step": 1900 }, { "epoch": 1.29, "learning_rate": 9.22672064777328e-06, "loss": 1.0846, "step": 1910 }, { "epoch": 1.3, "learning_rate": 9.222672064777328e-06, "loss": 1.1346, "step": 1920 }, { "epoch": 1.3, "learning_rate": 9.218623481781376e-06, "loss": 1.0831, "step": 1930 }, { "epoch": 1.31, "learning_rate": 9.214574898785426e-06, "loss": 1.2031, "step": 1940 }, { "epoch": 1.32, "learning_rate": 9.210526315789474e-06, "loss": 1.0651, "step": 1950 }, { "epoch": 1.32, "learning_rate": 9.206477732793523e-06, "loss": 1.1237, "step": 1960 }, { "epoch": 1.33, "learning_rate": 9.202429149797571e-06, "loss": 1.0612, "step": 1970 }, { "epoch": 1.34, "learning_rate": 9.198380566801621e-06, "loss": 1.1377, "step": 1980 }, { "epoch": 1.34, "learning_rate": 9.194331983805669e-06, "loss": 1.1692, "step": 1990 }, { "epoch": 1.35, "learning_rate": 9.190283400809718e-06, "loss": 1.1136, "step": 2000 }, { "epoch": 1.36, "learning_rate": 9.186234817813766e-06, "loss": 1.1361, "step": 2010 }, { "epoch": 1.36, "learning_rate": 9.182186234817814e-06, "loss": 1.1663, "step": 2020 }, { "epoch": 1.37, "learning_rate": 9.178137651821864e-06, "loss": 1.1054, "step": 2030 }, { "epoch": 1.38, "learning_rate": 9.174089068825912e-06, "loss": 1.1427, "step": 2040 }, { "epoch": 1.38, "learning_rate": 9.17004048582996e-06, "loss": 1.1392, "step": 2050 }, { "epoch": 1.39, "learning_rate": 9.16599190283401e-06, "loss": 1.191, "step": 2060 }, { "epoch": 1.4, "learning_rate": 9.161943319838057e-06, "loss": 1.1006, "step": 2070 }, { "epoch": 1.4, "learning_rate": 9.157894736842105e-06, "loss": 1.1372, "step": 2080 }, { "epoch": 1.41, "learning_rate": 9.153846153846155e-06, "loss": 1.1428, "step": 2090 }, { "epoch": 1.42, "learning_rate": 9.149797570850203e-06, "loss": 1.1433, "step": 2100 }, { "epoch": 1.42, "learning_rate": 9.145748987854253e-06, "loss": 1.0997, "step": 2110 }, { "epoch": 1.43, "learning_rate": 9.1417004048583e-06, "loss": 1.1478, "step": 2120 }, { "epoch": 1.44, "learning_rate": 9.137651821862348e-06, "loss": 1.0928, "step": 2130 }, { "epoch": 1.44, "learning_rate": 9.133603238866398e-06, "loss": 1.0993, "step": 2140 }, { "epoch": 1.45, "learning_rate": 9.129554655870446e-06, "loss": 1.1306, "step": 2150 }, { "epoch": 1.46, "learning_rate": 9.125506072874494e-06, "loss": 1.1527, "step": 2160 }, { "epoch": 1.46, "learning_rate": 9.121457489878544e-06, "loss": 1.1394, "step": 2170 }, { "epoch": 1.47, "learning_rate": 9.117408906882591e-06, "loss": 1.1787, "step": 2180 }, { "epoch": 1.48, "learning_rate": 9.11336032388664e-06, "loss": 1.1295, "step": 2190 }, { "epoch": 1.48, "learning_rate": 9.109311740890689e-06, "loss": 1.1843, "step": 2200 }, { "epoch": 1.49, "learning_rate": 9.105263157894739e-06, "loss": 1.1243, "step": 2210 }, { "epoch": 1.5, "learning_rate": 9.101214574898785e-06, "loss": 1.1748, "step": 2220 }, { "epoch": 1.5, "learning_rate": 9.097165991902835e-06, "loss": 1.1282, "step": 2230 }, { "epoch": 1.51, "learning_rate": 9.093117408906884e-06, "loss": 1.0969, "step": 2240 }, { "epoch": 1.52, "learning_rate": 9.089068825910932e-06, "loss": 1.158, "step": 2250 }, { "epoch": 1.52, "learning_rate": 9.085020242914982e-06, "loss": 1.1417, "step": 2260 }, { "epoch": 1.53, "learning_rate": 9.08097165991903e-06, "loss": 1.1397, "step": 2270 }, { "epoch": 1.54, "learning_rate": 9.076923076923078e-06, "loss": 1.1622, "step": 2280 }, { "epoch": 1.54, "learning_rate": 9.072874493927127e-06, "loss": 1.1217, "step": 2290 }, { "epoch": 1.55, "learning_rate": 9.068825910931175e-06, "loss": 1.1527, "step": 2300 }, { "epoch": 1.56, "learning_rate": 9.064777327935223e-06, "loss": 1.1927, "step": 2310 }, { "epoch": 1.56, "learning_rate": 9.060728744939273e-06, "loss": 1.1505, "step": 2320 }, { "epoch": 1.57, "learning_rate": 9.05668016194332e-06, "loss": 1.123, "step": 2330 }, { "epoch": 1.58, "learning_rate": 9.052631578947369e-06, "loss": 1.1453, "step": 2340 }, { "epoch": 1.59, "learning_rate": 9.048582995951418e-06, "loss": 1.1903, "step": 2350 }, { "epoch": 1.59, "learning_rate": 9.044534412955466e-06, "loss": 1.144, "step": 2360 }, { "epoch": 1.6, "learning_rate": 9.040485829959514e-06, "loss": 1.1469, "step": 2370 }, { "epoch": 1.61, "learning_rate": 9.036437246963564e-06, "loss": 1.1463, "step": 2380 }, { "epoch": 1.61, "learning_rate": 9.032388663967612e-06, "loss": 1.1702, "step": 2390 }, { "epoch": 1.62, "learning_rate": 9.028340080971661e-06, "loss": 1.078, "step": 2400 }, { "epoch": 1.63, "learning_rate": 9.02429149797571e-06, "loss": 1.0907, "step": 2410 }, { "epoch": 1.63, "learning_rate": 9.020242914979757e-06, "loss": 1.1621, "step": 2420 }, { "epoch": 1.64, "learning_rate": 9.016194331983807e-06, "loss": 1.0947, "step": 2430 }, { "epoch": 1.65, "learning_rate": 9.012145748987855e-06, "loss": 1.1301, "step": 2440 }, { "epoch": 1.65, "learning_rate": 9.008097165991903e-06, "loss": 1.1232, "step": 2450 }, { "epoch": 1.66, "learning_rate": 9.004048582995952e-06, "loss": 1.1737, "step": 2460 }, { "epoch": 1.67, "learning_rate": 9e-06, "loss": 1.1475, "step": 2470 }, { "epoch": 1.67, "learning_rate": 8.99595141700405e-06, "loss": 1.1242, "step": 2480 }, { "epoch": 1.68, "learning_rate": 8.991902834008098e-06, "loss": 1.1321, "step": 2490 }, { "epoch": 1.69, "learning_rate": 8.987854251012147e-06, "loss": 1.1131, "step": 2500 }, { "epoch": 1.69, "learning_rate": 8.983805668016195e-06, "loss": 1.1456, "step": 2510 }, { "epoch": 1.7, "learning_rate": 8.979757085020243e-06, "loss": 1.0983, "step": 2520 }, { "epoch": 1.71, "learning_rate": 8.975708502024293e-06, "loss": 1.1598, "step": 2530 }, { "epoch": 1.71, "learning_rate": 8.971659919028341e-06, "loss": 1.1861, "step": 2540 }, { "epoch": 1.72, "learning_rate": 8.96761133603239e-06, "loss": 1.1334, "step": 2550 }, { "epoch": 1.73, "learning_rate": 8.963562753036438e-06, "loss": 1.1605, "step": 2560 }, { "epoch": 1.73, "learning_rate": 8.959514170040486e-06, "loss": 1.136, "step": 2570 }, { "epoch": 1.74, "learning_rate": 8.955465587044536e-06, "loss": 1.1162, "step": 2580 }, { "epoch": 1.75, "learning_rate": 8.951417004048584e-06, "loss": 1.1115, "step": 2590 }, { "epoch": 1.75, "learning_rate": 8.947368421052632e-06, "loss": 1.1025, "step": 2600 }, { "epoch": 1.76, "learning_rate": 8.943319838056681e-06, "loss": 1.1616, "step": 2610 }, { "epoch": 1.77, "learning_rate": 8.93927125506073e-06, "loss": 1.1407, "step": 2620 }, { "epoch": 1.77, "learning_rate": 8.935222672064777e-06, "loss": 1.1016, "step": 2630 }, { "epoch": 1.78, "learning_rate": 8.931174089068827e-06, "loss": 1.111, "step": 2640 }, { "epoch": 1.79, "learning_rate": 8.927125506072875e-06, "loss": 1.1226, "step": 2650 }, { "epoch": 1.79, "learning_rate": 8.923076923076923e-06, "loss": 1.1105, "step": 2660 }, { "epoch": 1.8, "learning_rate": 8.919028340080972e-06, "loss": 1.1227, "step": 2670 }, { "epoch": 1.81, "learning_rate": 8.91497975708502e-06, "loss": 1.1462, "step": 2680 }, { "epoch": 1.81, "learning_rate": 8.91093117408907e-06, "loss": 1.1119, "step": 2690 }, { "epoch": 1.82, "learning_rate": 8.906882591093118e-06, "loss": 1.1567, "step": 2700 }, { "epoch": 1.83, "learning_rate": 8.902834008097166e-06, "loss": 1.1565, "step": 2710 }, { "epoch": 1.83, "learning_rate": 8.898785425101216e-06, "loss": 1.1116, "step": 2720 }, { "epoch": 1.84, "learning_rate": 8.894736842105265e-06, "loss": 1.1139, "step": 2730 }, { "epoch": 1.85, "learning_rate": 8.890688259109311e-06, "loss": 1.1454, "step": 2740 }, { "epoch": 1.85, "learning_rate": 8.886639676113361e-06, "loss": 1.1084, "step": 2750 }, { "epoch": 1.86, "learning_rate": 8.882591093117409e-06, "loss": 1.1829, "step": 2760 }, { "epoch": 1.87, "learning_rate": 8.878542510121459e-06, "loss": 1.1262, "step": 2770 }, { "epoch": 1.88, "learning_rate": 8.874493927125507e-06, "loss": 1.1754, "step": 2780 }, { "epoch": 1.88, "learning_rate": 8.870445344129556e-06, "loss": 1.1269, "step": 2790 }, { "epoch": 1.89, "learning_rate": 8.866396761133604e-06, "loss": 1.1663, "step": 2800 }, { "epoch": 1.9, "learning_rate": 8.862348178137652e-06, "loss": 1.1457, "step": 2810 }, { "epoch": 1.9, "learning_rate": 8.858299595141702e-06, "loss": 1.1143, "step": 2820 }, { "epoch": 1.91, "learning_rate": 8.85425101214575e-06, "loss": 1.1956, "step": 2830 }, { "epoch": 1.92, "learning_rate": 8.8502024291498e-06, "loss": 1.152, "step": 2840 }, { "epoch": 1.92, "learning_rate": 8.846153846153847e-06, "loss": 1.1365, "step": 2850 }, { "epoch": 1.93, "learning_rate": 8.842105263157895e-06, "loss": 1.1519, "step": 2860 }, { "epoch": 1.94, "learning_rate": 8.838056680161945e-06, "loss": 1.126, "step": 2870 }, { "epoch": 1.94, "learning_rate": 8.834008097165993e-06, "loss": 1.1408, "step": 2880 }, { "epoch": 1.95, "learning_rate": 8.82995951417004e-06, "loss": 1.1522, "step": 2890 }, { "epoch": 1.96, "learning_rate": 8.82591093117409e-06, "loss": 1.1236, "step": 2900 }, { "epoch": 1.96, "learning_rate": 8.821862348178138e-06, "loss": 1.0927, "step": 2910 }, { "epoch": 1.97, "learning_rate": 8.817813765182186e-06, "loss": 1.21, "step": 2920 }, { "epoch": 1.98, "learning_rate": 8.813765182186236e-06, "loss": 1.1119, "step": 2930 }, { "epoch": 1.98, "learning_rate": 8.809716599190284e-06, "loss": 1.1383, "step": 2940 }, { "epoch": 1.99, "learning_rate": 8.805668016194333e-06, "loss": 1.1438, "step": 2950 }, { "epoch": 2.0, "learning_rate": 8.801619433198381e-06, "loss": 1.1747, "step": 2960 }, { "epoch": 2.0, "eval_loss": 1.1016342639923096, "eval_runtime": 10.6294, "eval_samples_per_second": 103.487, "eval_steps_per_second": 12.983, "step": 2965 }, { "epoch": 2.0, "learning_rate": 8.79757085020243e-06, "loss": 1.083, "step": 2970 }, { "epoch": 2.01, "learning_rate": 8.793522267206479e-06, "loss": 1.0542, "step": 2980 }, { "epoch": 2.02, "learning_rate": 8.789473684210527e-06, "loss": 1.0974, "step": 2990 }, { "epoch": 2.02, "learning_rate": 8.785425101214576e-06, "loss": 1.0074, "step": 3000 }, { "epoch": 2.03, "learning_rate": 8.781376518218624e-06, "loss": 1.0825, "step": 3010 }, { "epoch": 2.04, "learning_rate": 8.777327935222674e-06, "loss": 1.065, "step": 3020 }, { "epoch": 2.04, "learning_rate": 8.77327935222672e-06, "loss": 1.0996, "step": 3030 }, { "epoch": 2.05, "learning_rate": 8.76923076923077e-06, "loss": 1.1079, "step": 3040 }, { "epoch": 2.06, "learning_rate": 8.76518218623482e-06, "loss": 1.0803, "step": 3050 }, { "epoch": 2.06, "learning_rate": 8.761133603238867e-06, "loss": 1.0923, "step": 3060 }, { "epoch": 2.07, "learning_rate": 8.757085020242915e-06, "loss": 1.1171, "step": 3070 }, { "epoch": 2.08, "learning_rate": 8.753036437246965e-06, "loss": 1.0404, "step": 3080 }, { "epoch": 2.08, "learning_rate": 8.748987854251013e-06, "loss": 1.0881, "step": 3090 }, { "epoch": 2.09, "learning_rate": 8.744939271255061e-06, "loss": 1.1536, "step": 3100 }, { "epoch": 2.1, "learning_rate": 8.74089068825911e-06, "loss": 1.0868, "step": 3110 }, { "epoch": 2.1, "learning_rate": 8.736842105263158e-06, "loss": 1.1422, "step": 3120 }, { "epoch": 2.11, "learning_rate": 8.732793522267208e-06, "loss": 1.1048, "step": 3130 }, { "epoch": 2.12, "learning_rate": 8.728744939271256e-06, "loss": 1.136, "step": 3140 }, { "epoch": 2.12, "learning_rate": 8.724696356275304e-06, "loss": 1.07, "step": 3150 }, { "epoch": 2.13, "learning_rate": 8.720647773279354e-06, "loss": 1.1336, "step": 3160 }, { "epoch": 2.14, "learning_rate": 8.716599190283401e-06, "loss": 1.0946, "step": 3170 }, { "epoch": 2.15, "learning_rate": 8.71255060728745e-06, "loss": 1.061, "step": 3180 }, { "epoch": 2.15, "learning_rate": 8.708502024291499e-06, "loss": 1.0275, "step": 3190 }, { "epoch": 2.16, "learning_rate": 8.704453441295547e-06, "loss": 1.0665, "step": 3200 }, { "epoch": 2.17, "learning_rate": 8.700404858299595e-06, "loss": 1.0563, "step": 3210 }, { "epoch": 2.17, "learning_rate": 8.696356275303645e-06, "loss": 1.0756, "step": 3220 }, { "epoch": 2.18, "learning_rate": 8.692307692307692e-06, "loss": 1.0981, "step": 3230 }, { "epoch": 2.19, "learning_rate": 8.688259109311742e-06, "loss": 1.1138, "step": 3240 }, { "epoch": 2.19, "learning_rate": 8.68421052631579e-06, "loss": 1.0838, "step": 3250 }, { "epoch": 2.2, "learning_rate": 8.680161943319838e-06, "loss": 1.1053, "step": 3260 }, { "epoch": 2.21, "learning_rate": 8.676113360323888e-06, "loss": 1.1099, "step": 3270 }, { "epoch": 2.21, "learning_rate": 8.672064777327936e-06, "loss": 1.0884, "step": 3280 }, { "epoch": 2.22, "learning_rate": 8.668016194331985e-06, "loss": 1.0857, "step": 3290 }, { "epoch": 2.23, "learning_rate": 8.663967611336033e-06, "loss": 1.0629, "step": 3300 }, { "epoch": 2.23, "learning_rate": 8.659919028340083e-06, "loss": 1.0753, "step": 3310 }, { "epoch": 2.24, "learning_rate": 8.65587044534413e-06, "loss": 1.1481, "step": 3320 }, { "epoch": 2.25, "learning_rate": 8.651821862348179e-06, "loss": 1.1127, "step": 3330 }, { "epoch": 2.25, "learning_rate": 8.647773279352228e-06, "loss": 1.1666, "step": 3340 }, { "epoch": 2.26, "learning_rate": 8.643724696356276e-06, "loss": 1.1086, "step": 3350 }, { "epoch": 2.27, "learning_rate": 8.639676113360324e-06, "loss": 1.1102, "step": 3360 }, { "epoch": 2.27, "learning_rate": 8.635627530364374e-06, "loss": 1.1194, "step": 3370 }, { "epoch": 2.28, "learning_rate": 8.631578947368422e-06, "loss": 1.145, "step": 3380 }, { "epoch": 2.29, "learning_rate": 8.62753036437247e-06, "loss": 1.1103, "step": 3390 }, { "epoch": 2.29, "learning_rate": 8.62348178137652e-06, "loss": 1.0998, "step": 3400 }, { "epoch": 2.3, "learning_rate": 8.619433198380567e-06, "loss": 1.1551, "step": 3410 }, { "epoch": 2.31, "learning_rate": 8.615384615384615e-06, "loss": 1.0687, "step": 3420 }, { "epoch": 2.31, "learning_rate": 8.611336032388665e-06, "loss": 1.0959, "step": 3430 }, { "epoch": 2.32, "learning_rate": 8.607287449392713e-06, "loss": 1.0799, "step": 3440 }, { "epoch": 2.33, "learning_rate": 8.60323886639676e-06, "loss": 1.0673, "step": 3450 }, { "epoch": 2.33, "learning_rate": 8.59919028340081e-06, "loss": 1.0873, "step": 3460 }, { "epoch": 2.34, "learning_rate": 8.595141700404858e-06, "loss": 1.1119, "step": 3470 }, { "epoch": 2.35, "learning_rate": 8.591093117408906e-06, "loss": 1.1356, "step": 3480 }, { "epoch": 2.35, "learning_rate": 8.587044534412956e-06, "loss": 1.0924, "step": 3490 }, { "epoch": 2.36, "learning_rate": 8.582995951417005e-06, "loss": 1.0818, "step": 3500 }, { "epoch": 2.37, "learning_rate": 8.578947368421053e-06, "loss": 1.1007, "step": 3510 }, { "epoch": 2.37, "learning_rate": 8.574898785425103e-06, "loss": 1.0591, "step": 3520 }, { "epoch": 2.38, "learning_rate": 8.57085020242915e-06, "loss": 1.1069, "step": 3530 }, { "epoch": 2.39, "learning_rate": 8.566801619433199e-06, "loss": 1.1191, "step": 3540 }, { "epoch": 2.39, "learning_rate": 8.562753036437247e-06, "loss": 1.077, "step": 3550 }, { "epoch": 2.4, "learning_rate": 8.558704453441296e-06, "loss": 1.104, "step": 3560 }, { "epoch": 2.41, "learning_rate": 8.554655870445344e-06, "loss": 1.0758, "step": 3570 }, { "epoch": 2.41, "learning_rate": 8.550607287449394e-06, "loss": 1.0885, "step": 3580 }, { "epoch": 2.42, "learning_rate": 8.546558704453442e-06, "loss": 1.084, "step": 3590 }, { "epoch": 2.43, "learning_rate": 8.54251012145749e-06, "loss": 1.0869, "step": 3600 }, { "epoch": 2.44, "learning_rate": 8.53846153846154e-06, "loss": 1.073, "step": 3610 }, { "epoch": 2.44, "learning_rate": 8.534412955465587e-06, "loss": 1.0766, "step": 3620 }, { "epoch": 2.45, "learning_rate": 8.530364372469635e-06, "loss": 1.1085, "step": 3630 }, { "epoch": 2.46, "learning_rate": 8.526315789473685e-06, "loss": 1.097, "step": 3640 }, { "epoch": 2.46, "learning_rate": 8.522267206477733e-06, "loss": 1.0968, "step": 3650 }, { "epoch": 2.47, "learning_rate": 8.518218623481783e-06, "loss": 1.0917, "step": 3660 }, { "epoch": 2.48, "learning_rate": 8.51417004048583e-06, "loss": 1.047, "step": 3670 }, { "epoch": 2.48, "learning_rate": 8.510121457489878e-06, "loss": 1.0469, "step": 3680 }, { "epoch": 2.49, "learning_rate": 8.506072874493928e-06, "loss": 1.1114, "step": 3690 }, { "epoch": 2.5, "learning_rate": 8.502024291497976e-06, "loss": 1.0319, "step": 3700 }, { "epoch": 2.5, "learning_rate": 8.497975708502024e-06, "loss": 1.0733, "step": 3710 }, { "epoch": 2.51, "learning_rate": 8.493927125506074e-06, "loss": 1.1237, "step": 3720 }, { "epoch": 2.52, "learning_rate": 8.489878542510121e-06, "loss": 1.0649, "step": 3730 }, { "epoch": 2.52, "learning_rate": 8.48582995951417e-06, "loss": 1.0903, "step": 3740 }, { "epoch": 2.53, "learning_rate": 8.481781376518219e-06, "loss": 1.0633, "step": 3750 }, { "epoch": 2.54, "learning_rate": 8.477732793522267e-06, "loss": 1.1259, "step": 3760 }, { "epoch": 2.54, "learning_rate": 8.473684210526317e-06, "loss": 1.0978, "step": 3770 }, { "epoch": 2.55, "learning_rate": 8.469635627530365e-06, "loss": 1.1362, "step": 3780 }, { "epoch": 2.56, "learning_rate": 8.465587044534414e-06, "loss": 1.1487, "step": 3790 }, { "epoch": 2.56, "learning_rate": 8.461538461538462e-06, "loss": 1.0887, "step": 3800 }, { "epoch": 2.57, "learning_rate": 8.457489878542512e-06, "loss": 1.1039, "step": 3810 }, { "epoch": 2.58, "learning_rate": 8.453441295546558e-06, "loss": 1.0906, "step": 3820 }, { "epoch": 2.58, "learning_rate": 8.449392712550608e-06, "loss": 1.1039, "step": 3830 }, { "epoch": 2.59, "learning_rate": 8.445344129554657e-06, "loss": 1.0432, "step": 3840 }, { "epoch": 2.6, "learning_rate": 8.441295546558705e-06, "loss": 1.1565, "step": 3850 }, { "epoch": 2.6, "learning_rate": 8.437246963562753e-06, "loss": 1.1786, "step": 3860 }, { "epoch": 2.61, "learning_rate": 8.433198380566803e-06, "loss": 1.0724, "step": 3870 }, { "epoch": 2.62, "learning_rate": 8.42914979757085e-06, "loss": 1.0863, "step": 3880 }, { "epoch": 2.62, "learning_rate": 8.425101214574899e-06, "loss": 1.0825, "step": 3890 }, { "epoch": 2.63, "learning_rate": 8.421052631578948e-06, "loss": 1.0831, "step": 3900 }, { "epoch": 2.64, "learning_rate": 8.417004048582996e-06, "loss": 1.0681, "step": 3910 }, { "epoch": 2.64, "learning_rate": 8.412955465587044e-06, "loss": 1.0871, "step": 3920 }, { "epoch": 2.65, "learning_rate": 8.408906882591094e-06, "loss": 1.0531, "step": 3930 }, { "epoch": 2.66, "learning_rate": 8.404858299595142e-06, "loss": 1.1174, "step": 3940 }, { "epoch": 2.66, "learning_rate": 8.400809716599191e-06, "loss": 1.117, "step": 3950 }, { "epoch": 2.67, "learning_rate": 8.39676113360324e-06, "loss": 1.0938, "step": 3960 }, { "epoch": 2.68, "learning_rate": 8.392712550607287e-06, "loss": 1.0736, "step": 3970 }, { "epoch": 2.68, "learning_rate": 8.388663967611337e-06, "loss": 1.0872, "step": 3980 }, { "epoch": 2.69, "learning_rate": 8.384615384615385e-06, "loss": 1.1, "step": 3990 }, { "epoch": 2.7, "learning_rate": 8.380566801619433e-06, "loss": 1.1201, "step": 4000 }, { "epoch": 2.7, "learning_rate": 8.376518218623482e-06, "loss": 1.0755, "step": 4010 }, { "epoch": 2.71, "learning_rate": 8.37246963562753e-06, "loss": 1.0917, "step": 4020 }, { "epoch": 2.72, "learning_rate": 8.368421052631578e-06, "loss": 1.0873, "step": 4030 }, { "epoch": 2.73, "learning_rate": 8.364372469635628e-06, "loss": 1.0858, "step": 4040 }, { "epoch": 2.73, "learning_rate": 8.360323886639676e-06, "loss": 1.0721, "step": 4050 }, { "epoch": 2.74, "learning_rate": 8.356275303643725e-06, "loss": 1.138, "step": 4060 }, { "epoch": 2.75, "learning_rate": 8.352226720647773e-06, "loss": 1.1194, "step": 4070 }, { "epoch": 2.75, "learning_rate": 8.348178137651823e-06, "loss": 1.1364, "step": 4080 }, { "epoch": 2.76, "learning_rate": 8.344129554655871e-06, "loss": 1.1201, "step": 4090 }, { "epoch": 2.77, "learning_rate": 8.34008097165992e-06, "loss": 1.0585, "step": 4100 }, { "epoch": 2.77, "learning_rate": 8.336032388663968e-06, "loss": 1.0613, "step": 4110 }, { "epoch": 2.78, "learning_rate": 8.331983805668016e-06, "loss": 1.1091, "step": 4120 }, { "epoch": 2.79, "learning_rate": 8.327935222672066e-06, "loss": 1.1446, "step": 4130 }, { "epoch": 2.79, "learning_rate": 8.323886639676114e-06, "loss": 1.0413, "step": 4140 }, { "epoch": 2.8, "learning_rate": 8.319838056680162e-06, "loss": 1.1225, "step": 4150 }, { "epoch": 2.81, "learning_rate": 8.315789473684212e-06, "loss": 1.1121, "step": 4160 }, { "epoch": 2.81, "learning_rate": 8.31174089068826e-06, "loss": 1.0534, "step": 4170 }, { "epoch": 2.82, "learning_rate": 8.307692307692307e-06, "loss": 1.1097, "step": 4180 }, { "epoch": 2.83, "learning_rate": 8.303643724696357e-06, "loss": 1.1147, "step": 4190 }, { "epoch": 2.83, "learning_rate": 8.299595141700405e-06, "loss": 1.1009, "step": 4200 }, { "epoch": 2.84, "learning_rate": 8.295546558704453e-06, "loss": 1.0584, "step": 4210 }, { "epoch": 2.85, "learning_rate": 8.291497975708503e-06, "loss": 1.1045, "step": 4220 }, { "epoch": 2.85, "learning_rate": 8.28744939271255e-06, "loss": 1.0559, "step": 4230 }, { "epoch": 2.86, "learning_rate": 8.2834008097166e-06, "loss": 1.063, "step": 4240 }, { "epoch": 2.87, "learning_rate": 8.279352226720648e-06, "loss": 1.1671, "step": 4250 }, { "epoch": 2.87, "learning_rate": 8.275303643724696e-06, "loss": 1.077, "step": 4260 }, { "epoch": 2.88, "learning_rate": 8.271255060728746e-06, "loss": 1.1119, "step": 4270 }, { "epoch": 2.89, "learning_rate": 8.267206477732794e-06, "loss": 1.0773, "step": 4280 }, { "epoch": 2.89, "learning_rate": 8.263157894736843e-06, "loss": 1.0874, "step": 4290 }, { "epoch": 2.9, "learning_rate": 8.259109311740891e-06, "loss": 1.0793, "step": 4300 }, { "epoch": 2.91, "learning_rate": 8.25506072874494e-06, "loss": 1.1011, "step": 4310 }, { "epoch": 2.91, "learning_rate": 8.251012145748987e-06, "loss": 1.1232, "step": 4320 }, { "epoch": 2.92, "learning_rate": 8.246963562753037e-06, "loss": 1.0953, "step": 4330 }, { "epoch": 2.93, "learning_rate": 8.242914979757085e-06, "loss": 1.0841, "step": 4340 }, { "epoch": 2.93, "learning_rate": 8.238866396761134e-06, "loss": 1.1036, "step": 4350 }, { "epoch": 2.94, "learning_rate": 8.234817813765182e-06, "loss": 1.1207, "step": 4360 }, { "epoch": 2.95, "learning_rate": 8.230769230769232e-06, "loss": 1.1033, "step": 4370 }, { "epoch": 2.95, "learning_rate": 8.22672064777328e-06, "loss": 1.0719, "step": 4380 }, { "epoch": 2.96, "learning_rate": 8.22267206477733e-06, "loss": 1.0539, "step": 4390 }, { "epoch": 2.97, "learning_rate": 8.218623481781377e-06, "loss": 1.0818, "step": 4400 }, { "epoch": 2.97, "learning_rate": 8.214574898785425e-06, "loss": 1.1149, "step": 4410 }, { "epoch": 2.98, "learning_rate": 8.210526315789475e-06, "loss": 1.0624, "step": 4420 }, { "epoch": 2.99, "learning_rate": 8.206477732793523e-06, "loss": 1.1197, "step": 4430 }, { "epoch": 2.99, "learning_rate": 8.20242914979757e-06, "loss": 1.0567, "step": 4440 }, { "epoch": 3.0, "eval_loss": 1.0876251459121704, "eval_runtime": 10.6357, "eval_samples_per_second": 103.425, "eval_steps_per_second": 12.975, "step": 4447 }, { "epoch": 3.0, "learning_rate": 8.19838056680162e-06, "loss": 1.0768, "step": 4450 }, { "epoch": 3.01, "learning_rate": 8.194331983805668e-06, "loss": 1.0687, "step": 4460 }, { "epoch": 3.02, "learning_rate": 8.190283400809716e-06, "loss": 1.1029, "step": 4470 }, { "epoch": 3.02, "learning_rate": 8.186234817813766e-06, "loss": 1.0569, "step": 4480 }, { "epoch": 3.03, "learning_rate": 8.182186234817814e-06, "loss": 1.0151, "step": 4490 }, { "epoch": 3.04, "learning_rate": 8.178137651821862e-06, "loss": 1.0405, "step": 4500 }, { "epoch": 3.04, "learning_rate": 8.174089068825911e-06, "loss": 1.07, "step": 4510 }, { "epoch": 3.05, "learning_rate": 8.17004048582996e-06, "loss": 1.0643, "step": 4520 }, { "epoch": 3.06, "learning_rate": 8.165991902834009e-06, "loss": 1.0859, "step": 4530 }, { "epoch": 3.06, "learning_rate": 8.161943319838057e-06, "loss": 1.0384, "step": 4540 }, { "epoch": 3.07, "learning_rate": 8.157894736842105e-06, "loss": 1.0873, "step": 4550 }, { "epoch": 3.08, "learning_rate": 8.153846153846154e-06, "loss": 1.0847, "step": 4560 }, { "epoch": 3.08, "learning_rate": 8.149797570850202e-06, "loss": 1.0105, "step": 4570 }, { "epoch": 3.09, "learning_rate": 8.145748987854252e-06, "loss": 1.0407, "step": 4580 }, { "epoch": 3.1, "learning_rate": 8.1417004048583e-06, "loss": 1.0437, "step": 4590 }, { "epoch": 3.1, "learning_rate": 8.13765182186235e-06, "loss": 1.0641, "step": 4600 }, { "epoch": 3.11, "learning_rate": 8.133603238866397e-06, "loss": 1.0662, "step": 4610 }, { "epoch": 3.12, "learning_rate": 8.129554655870445e-06, "loss": 1.0608, "step": 4620 }, { "epoch": 3.12, "learning_rate": 8.125506072874495e-06, "loss": 1.0728, "step": 4630 }, { "epoch": 3.13, "learning_rate": 8.121457489878543e-06, "loss": 1.039, "step": 4640 }, { "epoch": 3.14, "learning_rate": 8.117408906882591e-06, "loss": 1.0898, "step": 4650 }, { "epoch": 3.14, "learning_rate": 8.11336032388664e-06, "loss": 1.107, "step": 4660 }, { "epoch": 3.15, "learning_rate": 8.109311740890688e-06, "loss": 1.1001, "step": 4670 }, { "epoch": 3.16, "learning_rate": 8.105263157894738e-06, "loss": 1.0371, "step": 4680 }, { "epoch": 3.16, "learning_rate": 8.101214574898786e-06, "loss": 1.0945, "step": 4690 }, { "epoch": 3.17, "learning_rate": 8.097165991902834e-06, "loss": 1.0509, "step": 4700 }, { "epoch": 3.18, "learning_rate": 8.093117408906884e-06, "loss": 1.0694, "step": 4710 }, { "epoch": 3.18, "learning_rate": 8.089068825910931e-06, "loss": 1.0153, "step": 4720 }, { "epoch": 3.19, "learning_rate": 8.08502024291498e-06, "loss": 1.0373, "step": 4730 }, { "epoch": 3.2, "learning_rate": 8.080971659919029e-06, "loss": 1.0583, "step": 4740 }, { "epoch": 3.2, "learning_rate": 8.076923076923077e-06, "loss": 1.0213, "step": 4750 }, { "epoch": 3.21, "learning_rate": 8.072874493927125e-06, "loss": 1.0707, "step": 4760 }, { "epoch": 3.22, "learning_rate": 8.068825910931175e-06, "loss": 1.0801, "step": 4770 }, { "epoch": 3.22, "learning_rate": 8.064777327935222e-06, "loss": 1.0336, "step": 4780 }, { "epoch": 3.23, "learning_rate": 8.060728744939272e-06, "loss": 1.0829, "step": 4790 }, { "epoch": 3.24, "learning_rate": 8.05668016194332e-06, "loss": 1.0853, "step": 4800 }, { "epoch": 3.24, "learning_rate": 8.05263157894737e-06, "loss": 1.074, "step": 4810 }, { "epoch": 3.25, "learning_rate": 8.048582995951418e-06, "loss": 1.0805, "step": 4820 }, { "epoch": 3.26, "learning_rate": 8.044534412955467e-06, "loss": 1.0644, "step": 4830 }, { "epoch": 3.26, "learning_rate": 8.040485829959514e-06, "loss": 1.039, "step": 4840 }, { "epoch": 3.27, "learning_rate": 8.036437246963563e-06, "loss": 1.0326, "step": 4850 }, { "epoch": 3.28, "learning_rate": 8.032388663967611e-06, "loss": 1.0776, "step": 4860 }, { "epoch": 3.28, "learning_rate": 8.02834008097166e-06, "loss": 1.043, "step": 4870 }, { "epoch": 3.29, "learning_rate": 8.024291497975709e-06, "loss": 1.0784, "step": 4880 }, { "epoch": 3.3, "learning_rate": 8.020242914979758e-06, "loss": 1.0783, "step": 4890 }, { "epoch": 3.31, "learning_rate": 8.016194331983806e-06, "loss": 1.0987, "step": 4900 }, { "epoch": 3.31, "learning_rate": 8.012145748987854e-06, "loss": 1.0515, "step": 4910 }, { "epoch": 3.32, "learning_rate": 8.008097165991904e-06, "loss": 1.0667, "step": 4920 }, { "epoch": 3.33, "learning_rate": 8.004048582995952e-06, "loss": 1.1095, "step": 4930 }, { "epoch": 3.33, "learning_rate": 8e-06, "loss": 1.0245, "step": 4940 }, { "epoch": 3.34, "learning_rate": 7.99595141700405e-06, "loss": 1.0753, "step": 4950 }, { "epoch": 3.35, "learning_rate": 7.991902834008097e-06, "loss": 1.0522, "step": 4960 }, { "epoch": 3.35, "learning_rate": 7.987854251012147e-06, "loss": 1.0106, "step": 4970 }, { "epoch": 3.36, "learning_rate": 7.983805668016195e-06, "loss": 1.0948, "step": 4980 }, { "epoch": 3.37, "learning_rate": 7.979757085020243e-06, "loss": 1.0649, "step": 4990 }, { "epoch": 3.37, "learning_rate": 7.975708502024292e-06, "loss": 1.0096, "step": 5000 }, { "epoch": 3.38, "learning_rate": 7.97165991902834e-06, "loss": 1.0076, "step": 5010 }, { "epoch": 3.39, "learning_rate": 7.967611336032388e-06, "loss": 1.0605, "step": 5020 }, { "epoch": 3.39, "learning_rate": 7.963562753036438e-06, "loss": 1.0544, "step": 5030 }, { "epoch": 3.4, "learning_rate": 7.959514170040486e-06, "loss": 1.0638, "step": 5040 }, { "epoch": 3.41, "learning_rate": 7.955465587044534e-06, "loss": 1.021, "step": 5050 }, { "epoch": 3.41, "learning_rate": 7.951417004048583e-06, "loss": 1.0622, "step": 5060 }, { "epoch": 3.42, "learning_rate": 7.947368421052631e-06, "loss": 1.0452, "step": 5070 }, { "epoch": 3.43, "learning_rate": 7.943319838056681e-06, "loss": 1.0678, "step": 5080 }, { "epoch": 3.43, "learning_rate": 7.939271255060729e-06, "loss": 0.9841, "step": 5090 }, { "epoch": 3.44, "learning_rate": 7.935222672064778e-06, "loss": 1.0684, "step": 5100 }, { "epoch": 3.45, "learning_rate": 7.931174089068826e-06, "loss": 1.0393, "step": 5110 }, { "epoch": 3.45, "learning_rate": 7.927125506072876e-06, "loss": 1.0816, "step": 5120 }, { "epoch": 3.46, "learning_rate": 7.923076923076922e-06, "loss": 1.0744, "step": 5130 }, { "epoch": 3.47, "learning_rate": 7.919028340080972e-06, "loss": 1.0575, "step": 5140 }, { "epoch": 3.47, "learning_rate": 7.914979757085022e-06, "loss": 1.119, "step": 5150 }, { "epoch": 3.48, "learning_rate": 7.91093117408907e-06, "loss": 1.0652, "step": 5160 }, { "epoch": 3.49, "learning_rate": 7.906882591093117e-06, "loss": 0.9888, "step": 5170 }, { "epoch": 3.49, "learning_rate": 7.902834008097167e-06, "loss": 1.0415, "step": 5180 }, { "epoch": 3.5, "learning_rate": 7.898785425101215e-06, "loss": 1.0266, "step": 5190 }, { "epoch": 3.51, "learning_rate": 7.894736842105263e-06, "loss": 1.0204, "step": 5200 }, { "epoch": 3.51, "learning_rate": 7.890688259109313e-06, "loss": 1.0709, "step": 5210 }, { "epoch": 3.52, "learning_rate": 7.88663967611336e-06, "loss": 1.0342, "step": 5220 }, { "epoch": 3.53, "learning_rate": 7.882591093117408e-06, "loss": 1.0492, "step": 5230 }, { "epoch": 3.53, "learning_rate": 7.878542510121458e-06, "loss": 1.0375, "step": 5240 }, { "epoch": 3.54, "learning_rate": 7.874493927125506e-06, "loss": 1.0188, "step": 5250 }, { "epoch": 3.55, "learning_rate": 7.870445344129556e-06, "loss": 1.0951, "step": 5260 }, { "epoch": 3.55, "learning_rate": 7.866396761133604e-06, "loss": 1.0133, "step": 5270 }, { "epoch": 3.56, "learning_rate": 7.862348178137651e-06, "loss": 1.0414, "step": 5280 }, { "epoch": 3.57, "learning_rate": 7.858299595141701e-06, "loss": 1.0863, "step": 5290 }, { "epoch": 3.58, "learning_rate": 7.854251012145749e-06, "loss": 1.0998, "step": 5300 }, { "epoch": 3.58, "learning_rate": 7.850202429149797e-06, "loss": 1.0854, "step": 5310 }, { "epoch": 3.59, "learning_rate": 7.846153846153847e-06, "loss": 1.0594, "step": 5320 }, { "epoch": 3.6, "learning_rate": 7.842105263157896e-06, "loss": 1.0225, "step": 5330 }, { "epoch": 3.6, "learning_rate": 7.838056680161942e-06, "loss": 1.0728, "step": 5340 }, { "epoch": 3.61, "learning_rate": 7.834008097165992e-06, "loss": 1.0417, "step": 5350 }, { "epoch": 3.62, "learning_rate": 7.82995951417004e-06, "loss": 1.0226, "step": 5360 }, { "epoch": 3.62, "learning_rate": 7.82591093117409e-06, "loss": 1.0953, "step": 5370 }, { "epoch": 3.63, "learning_rate": 7.821862348178138e-06, "loss": 1.053, "step": 5380 }, { "epoch": 3.64, "learning_rate": 7.817813765182187e-06, "loss": 1.0523, "step": 5390 }, { "epoch": 3.64, "learning_rate": 7.813765182186235e-06, "loss": 1.1017, "step": 5400 }, { "epoch": 3.65, "learning_rate": 7.809716599190285e-06, "loss": 1.0032, "step": 5410 }, { "epoch": 3.66, "learning_rate": 7.805668016194333e-06, "loss": 1.0331, "step": 5420 }, { "epoch": 3.66, "learning_rate": 7.80161943319838e-06, "loss": 1.0483, "step": 5430 }, { "epoch": 3.67, "learning_rate": 7.79757085020243e-06, "loss": 1.0291, "step": 5440 }, { "epoch": 3.68, "learning_rate": 7.793522267206478e-06, "loss": 1.0113, "step": 5450 }, { "epoch": 3.68, "learning_rate": 7.789473684210526e-06, "loss": 1.0407, "step": 5460 }, { "epoch": 3.69, "learning_rate": 7.785425101214576e-06, "loss": 1.077, "step": 5470 }, { "epoch": 3.7, "learning_rate": 7.781376518218624e-06, "loss": 1.1063, "step": 5480 }, { "epoch": 3.7, "learning_rate": 7.777327935222672e-06, "loss": 1.0377, "step": 5490 }, { "epoch": 3.71, "learning_rate": 7.773279352226721e-06, "loss": 1.0841, "step": 5500 }, { "epoch": 3.72, "learning_rate": 7.76923076923077e-06, "loss": 1.0638, "step": 5510 }, { "epoch": 3.72, "learning_rate": 7.765182186234819e-06, "loss": 1.0525, "step": 5520 }, { "epoch": 3.73, "learning_rate": 7.761133603238867e-06, "loss": 1.0638, "step": 5530 }, { "epoch": 3.74, "learning_rate": 7.757085020242915e-06, "loss": 1.0478, "step": 5540 }, { "epoch": 3.74, "learning_rate": 7.753036437246964e-06, "loss": 1.0429, "step": 5550 }, { "epoch": 3.75, "learning_rate": 7.748987854251014e-06, "loss": 1.0615, "step": 5560 }, { "epoch": 3.76, "learning_rate": 7.74493927125506e-06, "loss": 1.0653, "step": 5570 }, { "epoch": 3.76, "learning_rate": 7.74089068825911e-06, "loss": 1.0755, "step": 5580 }, { "epoch": 3.77, "learning_rate": 7.736842105263158e-06, "loss": 1.0447, "step": 5590 }, { "epoch": 3.78, "learning_rate": 7.732793522267207e-06, "loss": 1.0372, "step": 5600 }, { "epoch": 3.78, "learning_rate": 7.728744939271255e-06, "loss": 1.1158, "step": 5610 }, { "epoch": 3.79, "learning_rate": 7.724696356275305e-06, "loss": 1.1195, "step": 5620 }, { "epoch": 3.8, "learning_rate": 7.720647773279351e-06, "loss": 1.0246, "step": 5630 }, { "epoch": 3.8, "learning_rate": 7.716599190283401e-06, "loss": 1.0339, "step": 5640 }, { "epoch": 3.81, "learning_rate": 7.712550607287449e-06, "loss": 1.0578, "step": 5650 }, { "epoch": 3.82, "learning_rate": 7.708502024291498e-06, "loss": 1.0747, "step": 5660 }, { "epoch": 3.82, "learning_rate": 7.704453441295546e-06, "loss": 1.0301, "step": 5670 }, { "epoch": 3.83, "learning_rate": 7.700404858299596e-06, "loss": 0.9873, "step": 5680 }, { "epoch": 3.84, "learning_rate": 7.696356275303644e-06, "loss": 1.0217, "step": 5690 }, { "epoch": 3.84, "learning_rate": 7.692307692307694e-06, "loss": 1.0941, "step": 5700 }, { "epoch": 3.85, "learning_rate": 7.688259109311742e-06, "loss": 1.0842, "step": 5710 }, { "epoch": 3.86, "learning_rate": 7.68421052631579e-06, "loss": 1.0163, "step": 5720 }, { "epoch": 3.87, "learning_rate": 7.680161943319839e-06, "loss": 1.0387, "step": 5730 }, { "epoch": 3.87, "learning_rate": 7.676113360323887e-06, "loss": 1.0719, "step": 5740 }, { "epoch": 3.88, "learning_rate": 7.672064777327935e-06, "loss": 1.0495, "step": 5750 }, { "epoch": 3.89, "learning_rate": 7.668016194331985e-06, "loss": 1.0669, "step": 5760 }, { "epoch": 3.89, "learning_rate": 7.663967611336033e-06, "loss": 1.1226, "step": 5770 }, { "epoch": 3.9, "learning_rate": 7.65991902834008e-06, "loss": 1.0147, "step": 5780 }, { "epoch": 3.91, "learning_rate": 7.65587044534413e-06, "loss": 1.0244, "step": 5790 }, { "epoch": 3.91, "learning_rate": 7.651821862348178e-06, "loss": 1.0134, "step": 5800 }, { "epoch": 3.92, "learning_rate": 7.647773279352228e-06, "loss": 1.0215, "step": 5810 }, { "epoch": 3.93, "learning_rate": 7.643724696356276e-06, "loss": 1.1177, "step": 5820 }, { "epoch": 3.93, "learning_rate": 7.639676113360325e-06, "loss": 1.0472, "step": 5830 }, { "epoch": 3.94, "learning_rate": 7.635627530364373e-06, "loss": 1.0335, "step": 5840 }, { "epoch": 3.95, "learning_rate": 7.631578947368423e-06, "loss": 1.0346, "step": 5850 }, { "epoch": 3.95, "learning_rate": 7.627530364372468e-06, "loss": 1.0354, "step": 5860 }, { "epoch": 3.96, "learning_rate": 7.623481781376519e-06, "loss": 1.0663, "step": 5870 }, { "epoch": 3.97, "learning_rate": 7.619433198380567e-06, "loss": 1.0602, "step": 5880 }, { "epoch": 3.97, "learning_rate": 7.615384615384615e-06, "loss": 1.0458, "step": 5890 }, { "epoch": 3.98, "learning_rate": 7.611336032388663e-06, "loss": 1.079, "step": 5900 }, { "epoch": 3.99, "learning_rate": 7.607287449392713e-06, "loss": 1.0888, "step": 5910 }, { "epoch": 3.99, "learning_rate": 7.60323886639676e-06, "loss": 1.0395, "step": 5920 }, { "epoch": 4.0, "learning_rate": 7.5991902834008105e-06, "loss": 1.1178, "step": 5930 }, { "epoch": 4.0, "eval_loss": 1.0923843383789062, "eval_runtime": 10.6309, "eval_samples_per_second": 103.472, "eval_steps_per_second": 12.981, "step": 5930 } ], "max_steps": 22230, "num_train_epochs": 15, "total_flos": 6.084121621168128e+18, "trial_name": null, "trial_params": null }