{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2754030638590854, "eval_steps": 500, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0833333333333334e-09, "loss": 0.6931, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.0416666666666666e-08, "loss": 0.6931, "step": 5 }, { "epoch": 0.0, "learning_rate": 2.083333333333333e-08, "loss": 0.6932, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.125e-08, "loss": 0.6931, "step": 15 }, { "epoch": 0.0, "learning_rate": 4.166666666666666e-08, "loss": 0.6931, "step": 20 }, { "epoch": 0.0, "learning_rate": 5.208333333333333e-08, "loss": 0.6929, "step": 25 }, { "epoch": 0.0, "learning_rate": 6.25e-08, "loss": 0.6928, "step": 30 }, { "epoch": 0.0, "learning_rate": 7.291666666666667e-08, "loss": 0.6923, "step": 35 }, { "epoch": 0.0, "learning_rate": 8.333333333333333e-08, "loss": 0.6919, "step": 40 }, { "epoch": 0.01, "learning_rate": 9.375e-08, "loss": 0.6914, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.0416666666666667e-07, "loss": 0.6907, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.1458333333333332e-07, "loss": 0.6898, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.25e-07, "loss": 0.6889, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.3541666666666666e-07, "loss": 0.6878, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.4583333333333335e-07, "loss": 0.6865, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.5624999999999999e-07, "loss": 0.685, "step": 75 }, { "epoch": 0.01, "learning_rate": 1.6666666666666665e-07, "loss": 0.6835, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.7708333333333334e-07, "loss": 0.6811, "step": 85 }, { "epoch": 0.01, "learning_rate": 1.875e-07, "loss": 0.6788, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.9791666666666664e-07, "loss": 0.6767, "step": 95 }, { "epoch": 0.01, "learning_rate": 2.0833333333333333e-07, "loss": 0.6733, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.1875e-07, "loss": 0.67, "step": 105 }, { "epoch": 0.01, "learning_rate": 2.2916666666666663e-07, "loss": 0.6669, "step": 110 }, { "epoch": 0.01, "learning_rate": 2.3958333333333335e-07, "loss": 0.6629, "step": 115 }, { "epoch": 0.01, "learning_rate": 2.5e-07, "loss": 0.6596, "step": 120 }, { "epoch": 0.01, "learning_rate": 2.604166666666667e-07, "loss": 0.6543, "step": 125 }, { "epoch": 0.01, "learning_rate": 2.708333333333333e-07, "loss": 0.6501, "step": 130 }, { "epoch": 0.02, "learning_rate": 2.8125e-07, "loss": 0.6456, "step": 135 }, { "epoch": 0.02, "learning_rate": 2.916666666666667e-07, "loss": 0.6391, "step": 140 }, { "epoch": 0.02, "learning_rate": 3.020833333333333e-07, "loss": 0.6346, "step": 145 }, { "epoch": 0.02, "learning_rate": 3.1249999999999997e-07, "loss": 0.6286, "step": 150 }, { "epoch": 0.02, "learning_rate": 3.2291666666666666e-07, "loss": 0.6219, "step": 155 }, { "epoch": 0.02, "learning_rate": 3.333333333333333e-07, "loss": 0.6141, "step": 160 }, { "epoch": 0.02, "learning_rate": 3.4166666666666664e-07, "loss": 0.6126, "step": 165 }, { "epoch": 0.02, "learning_rate": 3.5208333333333333e-07, "loss": 0.6033, "step": 170 }, { "epoch": 0.02, "learning_rate": 3.6249999999999997e-07, "loss": 0.5979, "step": 175 }, { "epoch": 0.02, "learning_rate": 3.7291666666666666e-07, "loss": 0.5881, "step": 180 }, { "epoch": 0.02, "learning_rate": 3.8333333333333335e-07, "loss": 0.5776, "step": 185 }, { "epoch": 0.02, "learning_rate": 3.9375e-07, "loss": 0.5714, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.041666666666667e-07, "loss": 0.562, "step": 195 }, { "epoch": 0.02, "learning_rate": 4.145833333333333e-07, "loss": 0.5572, "step": 200 }, { "epoch": 0.02, "learning_rate": 4.2499999999999995e-07, "loss": 0.5453, "step": 205 }, { "epoch": 0.02, "learning_rate": 4.3541666666666664e-07, "loss": 0.5399, "step": 210 }, { "epoch": 0.02, "learning_rate": 4.4583333333333334e-07, "loss": 0.5318, "step": 215 }, { "epoch": 0.03, "learning_rate": 4.5624999999999997e-07, "loss": 0.517, "step": 220 }, { "epoch": 0.03, "learning_rate": 4.6666666666666666e-07, "loss": 0.5073, "step": 225 }, { "epoch": 0.03, "learning_rate": 4.770833333333334e-07, "loss": 0.4968, "step": 230 }, { "epoch": 0.03, "learning_rate": 4.875e-07, "loss": 0.4831, "step": 235 }, { "epoch": 0.03, "learning_rate": 4.979166666666666e-07, "loss": 0.4778, "step": 240 }, { "epoch": 0.03, "learning_rate": 4.99995769214436e-07, "loss": 0.4682, "step": 245 }, { "epoch": 0.03, "learning_rate": 4.999785818935017e-07, "loss": 0.4524, "step": 250 }, { "epoch": 0.03, "learning_rate": 4.999481745213471e-07, "loss": 0.4422, "step": 255 }, { "epoch": 0.03, "learning_rate": 4.999045487060579e-07, "loss": 0.4382, "step": 260 }, { "epoch": 0.03, "learning_rate": 4.998477067547739e-07, "loss": 0.4222, "step": 265 }, { "epoch": 0.03, "learning_rate": 4.997776516735666e-07, "loss": 0.4109, "step": 270 }, { "epoch": 0.03, "learning_rate": 4.996943871672807e-07, "loss": 0.4046, "step": 275 }, { "epoch": 0.03, "learning_rate": 4.995979176393372e-07, "loss": 0.384, "step": 280 }, { "epoch": 0.03, "learning_rate": 4.994882481915018e-07, "loss": 0.3831, "step": 285 }, { "epoch": 0.03, "learning_rate": 4.993653846236143e-07, "loss": 0.3644, "step": 290 }, { "epoch": 0.03, "learning_rate": 4.99229333433282e-07, "loss": 0.3668, "step": 295 }, { "epoch": 0.03, "learning_rate": 4.99080101815536e-07, "loss": 0.3467, "step": 300 }, { "epoch": 0.03, "learning_rate": 4.989176976624511e-07, "loss": 0.3406, "step": 305 }, { "epoch": 0.04, "learning_rate": 4.987421295627278e-07, "loss": 0.3281, "step": 310 }, { "epoch": 0.04, "learning_rate": 4.98553406801239e-07, "loss": 0.3128, "step": 315 }, { "epoch": 0.04, "learning_rate": 4.983515393585378e-07, "loss": 0.3051, "step": 320 }, { "epoch": 0.04, "learning_rate": 4.981365379103305e-07, "loss": 0.3054, "step": 325 }, { "epoch": 0.04, "learning_rate": 4.97908413826912e-07, "loss": 0.2855, "step": 330 }, { "epoch": 0.04, "learning_rate": 4.97667179172564e-07, "loss": 0.2793, "step": 335 }, { "epoch": 0.04, "learning_rate": 4.974128467049176e-07, "loss": 0.2664, "step": 340 }, { "epoch": 0.04, "learning_rate": 4.971454298742779e-07, "loss": 0.2559, "step": 345 }, { "epoch": 0.04, "learning_rate": 4.968649428229134e-07, "loss": 0.2464, "step": 350 }, { "epoch": 0.04, "learning_rate": 4.965714003843078e-07, "loss": 0.2435, "step": 355 }, { "epoch": 0.04, "learning_rate": 4.962648180823752e-07, "loss": 0.2322, "step": 360 }, { "epoch": 0.04, "learning_rate": 4.959452121306397e-07, "loss": 0.22, "step": 365 }, { "epoch": 0.04, "learning_rate": 4.956125994313774e-07, "loss": 0.2156, "step": 370 }, { "epoch": 0.04, "learning_rate": 4.952669975747232e-07, "loss": 0.2027, "step": 375 }, { "epoch": 0.04, "learning_rate": 4.949084248377396e-07, "loss": 0.197, "step": 380 }, { "epoch": 0.04, "learning_rate": 4.945369001834514e-07, "loss": 0.1852, "step": 385 }, { "epoch": 0.04, "learning_rate": 4.941524432598415e-07, "loss": 0.1776, "step": 390 }, { "epoch": 0.05, "learning_rate": 4.937550743988127e-07, "loss": 0.1731, "step": 395 }, { "epoch": 0.05, "learning_rate": 4.933448146151121e-07, "loss": 0.1715, "step": 400 }, { "epoch": 0.05, "learning_rate": 4.9292168560522e-07, "loss": 0.1641, "step": 405 }, { "epoch": 0.05, "learning_rate": 4.924857097462023e-07, "loss": 0.1499, "step": 410 }, { "epoch": 0.05, "learning_rate": 4.920369100945269e-07, "loss": 0.1555, "step": 415 }, { "epoch": 0.05, "learning_rate": 4.915753103848449e-07, "loss": 0.139, "step": 420 }, { "epoch": 0.05, "learning_rate": 4.911009350287347e-07, "loss": 0.132, "step": 425 }, { "epoch": 0.05, "learning_rate": 4.906138091134118e-07, "loss": 0.1293, "step": 430 }, { "epoch": 0.05, "learning_rate": 4.901139584004014e-07, "loss": 0.1181, "step": 435 }, { "epoch": 0.05, "learning_rate": 4.896014093241763e-07, "loss": 0.1132, "step": 440 }, { "epoch": 0.05, "learning_rate": 4.890761889907589e-07, "loss": 0.1085, "step": 445 }, { "epoch": 0.05, "learning_rate": 4.885383251762876e-07, "loss": 0.1039, "step": 450 }, { "epoch": 0.05, "learning_rate": 4.879878463255482e-07, "loss": 0.1046, "step": 455 }, { "epoch": 0.05, "learning_rate": 4.874247815504692e-07, "loss": 0.101, "step": 460 }, { "epoch": 0.05, "learning_rate": 4.868491606285823e-07, "loss": 0.0934, "step": 465 }, { "epoch": 0.05, "learning_rate": 4.862610140014478e-07, "loss": 0.089, "step": 470 }, { "epoch": 0.05, "learning_rate": 4.856603727730446e-07, "loss": 0.0836, "step": 475 }, { "epoch": 0.06, "learning_rate": 4.850472687081252e-07, "loss": 0.08, "step": 480 }, { "epoch": 0.06, "learning_rate": 4.844217342305363e-07, "loss": 0.0765, "step": 485 }, { "epoch": 0.06, "learning_rate": 4.837838024215029e-07, "loss": 0.0708, "step": 490 }, { "epoch": 0.06, "learning_rate": 4.831335070178805e-07, "loss": 0.0693, "step": 495 }, { "epoch": 0.06, "learning_rate": 4.824708824103693e-07, "loss": 0.0651, "step": 500 }, { "epoch": 0.06, "learning_rate": 4.817959636416969e-07, "loss": 0.0638, "step": 505 }, { "epoch": 0.06, "learning_rate": 4.811087864047635e-07, "loss": 0.0582, "step": 510 }, { "epoch": 0.06, "learning_rate": 4.80409387040756e-07, "loss": 0.0535, "step": 515 }, { "epoch": 0.06, "learning_rate": 4.796978025372246e-07, "loss": 0.0574, "step": 520 }, { "epoch": 0.06, "learning_rate": 4.789740705261278e-07, "loss": 0.048, "step": 525 }, { "epoch": 0.06, "learning_rate": 4.782382292818416e-07, "loss": 0.0533, "step": 530 }, { "epoch": 0.06, "learning_rate": 4.774903177191357e-07, "loss": 0.0474, "step": 535 }, { "epoch": 0.06, "learning_rate": 4.767303753911156e-07, "loss": 0.0474, "step": 540 }, { "epoch": 0.06, "learning_rate": 4.759584424871301e-07, "loss": 0.0442, "step": 545 }, { "epoch": 0.06, "learning_rate": 4.751745598306469e-07, "loss": 0.0461, "step": 550 }, { "epoch": 0.06, "learning_rate": 4.7437876887709316e-07, "loss": 0.0433, "step": 555 }, { "epoch": 0.06, "learning_rate": 4.735711117116629e-07, "loss": 0.0392, "step": 560 }, { "epoch": 0.06, "learning_rate": 4.7275163104709194e-07, "loss": 0.039, "step": 565 }, { "epoch": 0.07, "learning_rate": 4.719203702213985e-07, "loss": 0.0368, "step": 570 }, { "epoch": 0.07, "learning_rate": 4.710773731955917e-07, "loss": 0.0355, "step": 575 }, { "epoch": 0.07, "learning_rate": 4.702226845513464e-07, "loss": 0.0321, "step": 580 }, { "epoch": 0.07, "learning_rate": 4.693563494886454e-07, "loss": 0.0306, "step": 585 }, { "epoch": 0.07, "learning_rate": 4.684784138233898e-07, "loss": 0.0345, "step": 590 }, { "epoch": 0.07, "learning_rate": 4.675889239849749e-07, "loss": 0.0276, "step": 595 }, { "epoch": 0.07, "learning_rate": 4.6668792701383576e-07, "loss": 0.0264, "step": 600 }, { "epoch": 0.07, "learning_rate": 4.6577547055895906e-07, "loss": 0.0322, "step": 605 }, { "epoch": 0.07, "learning_rate": 4.648516028753632e-07, "loss": 0.025, "step": 610 }, { "epoch": 0.07, "learning_rate": 4.6391637282154626e-07, "loss": 0.0277, "step": 615 }, { "epoch": 0.07, "learning_rate": 4.6296982985690256e-07, "loss": 0.0276, "step": 620 }, { "epoch": 0.07, "learning_rate": 4.6201202403910643e-07, "loss": 0.0252, "step": 625 }, { "epoch": 0.07, "learning_rate": 4.610430060214655e-07, "loss": 0.0256, "step": 630 }, { "epoch": 0.07, "learning_rate": 4.6006282705024143e-07, "loss": 0.0223, "step": 635 }, { "epoch": 0.07, "learning_rate": 4.5907153896193986e-07, "loss": 0.0252, "step": 640 }, { "epoch": 0.07, "learning_rate": 4.5806919418056944e-07, "loss": 0.022, "step": 645 }, { "epoch": 0.07, "learning_rate": 4.5705584571486887e-07, "loss": 0.02, "step": 650 }, { "epoch": 0.08, "learning_rate": 4.5603154715550386e-07, "loss": 0.0183, "step": 655 }, { "epoch": 0.08, "learning_rate": 4.549963526722331e-07, "loss": 0.0195, "step": 660 }, { "epoch": 0.08, "learning_rate": 4.5395031701104303e-07, "loss": 0.0169, "step": 665 }, { "epoch": 0.08, "learning_rate": 4.5289349549125306e-07, "loss": 0.0185, "step": 670 }, { "epoch": 0.08, "learning_rate": 4.5182594400259e-07, "loss": 0.0181, "step": 675 }, { "epoch": 0.08, "learning_rate": 4.5074771900223195e-07, "loss": 0.018, "step": 680 }, { "epoch": 0.08, "learning_rate": 4.4965887751182317e-07, "loss": 0.017, "step": 685 }, { "epoch": 0.08, "learning_rate": 4.48559477114458e-07, "loss": 0.017, "step": 690 }, { "epoch": 0.08, "learning_rate": 4.474495759516358e-07, "loss": 0.0164, "step": 695 }, { "epoch": 0.08, "learning_rate": 4.463292327201862e-07, "loss": 0.0171, "step": 700 }, { "epoch": 0.08, "learning_rate": 4.451985066691648e-07, "loss": 0.016, "step": 705 }, { "epoch": 0.08, "learning_rate": 4.4405745759671984e-07, "loss": 0.0154, "step": 710 }, { "epoch": 0.08, "learning_rate": 4.4290614584692996e-07, "loss": 0.0135, "step": 715 }, { "epoch": 0.08, "learning_rate": 4.4174463230661264e-07, "loss": 0.0154, "step": 720 }, { "epoch": 0.08, "learning_rate": 4.405729784021045e-07, "loss": 0.0137, "step": 725 }, { "epoch": 0.08, "learning_rate": 4.393912460960124e-07, "loss": 0.0124, "step": 730 }, { "epoch": 0.08, "learning_rate": 4.381994978839371e-07, "loss": 0.0122, "step": 735 }, { "epoch": 0.08, "learning_rate": 4.3699779679116753e-07, "loss": 0.0127, "step": 740 }, { "epoch": 0.09, "learning_rate": 4.357862063693485e-07, "loss": 0.0127, "step": 745 }, { "epoch": 0.09, "learning_rate": 4.3456479069311923e-07, "loss": 0.0135, "step": 750 }, { "epoch": 0.09, "learning_rate": 4.3333361435672465e-07, "loss": 0.0133, "step": 755 }, { "epoch": 0.09, "learning_rate": 4.320927424706e-07, "loss": 0.0138, "step": 760 }, { "epoch": 0.09, "learning_rate": 4.3084224065792695e-07, "loss": 0.0114, "step": 765 }, { "epoch": 0.09, "learning_rate": 4.2958217505116324e-07, "loss": 0.0122, "step": 770 }, { "epoch": 0.09, "learning_rate": 4.283126122885454e-07, "loss": 0.0126, "step": 775 }, { "epoch": 0.09, "learning_rate": 4.2703361951056444e-07, "loss": 0.0121, "step": 780 }, { "epoch": 0.09, "learning_rate": 4.2574526435641546e-07, "loss": 0.0099, "step": 785 }, { "epoch": 0.09, "learning_rate": 4.2444761496042005e-07, "loss": 0.0104, "step": 790 }, { "epoch": 0.09, "learning_rate": 4.2314073994842353e-07, "loss": 0.0095, "step": 795 }, { "epoch": 0.09, "learning_rate": 4.218247084341655e-07, "loss": 0.0108, "step": 800 }, { "epoch": 0.09, "learning_rate": 4.204995900156246e-07, "loss": 0.01, "step": 805 }, { "epoch": 0.09, "learning_rate": 4.1916545477133815e-07, "loss": 0.0102, "step": 810 }, { "epoch": 0.09, "learning_rate": 4.1782237325669587e-07, "loss": 0.0121, "step": 815 }, { "epoch": 0.09, "learning_rate": 4.1647041650020853e-07, "loss": 0.0075, "step": 820 }, { "epoch": 0.09, "learning_rate": 4.151096559997519e-07, "loss": 0.0089, "step": 825 }, { "epoch": 0.1, "learning_rate": 4.137401637187853e-07, "loss": 0.0078, "step": 830 }, { "epoch": 0.1, "learning_rate": 4.1236201208254586e-07, "loss": 0.0095, "step": 835 }, { "epoch": 0.1, "learning_rate": 4.109752739742187e-07, "loss": 0.0081, "step": 840 }, { "epoch": 0.1, "learning_rate": 4.0958002273108204e-07, "loss": 0.0082, "step": 845 }, { "epoch": 0.1, "learning_rate": 4.0817633214062904e-07, "loss": 0.0079, "step": 850 }, { "epoch": 0.1, "learning_rate": 4.067642764366654e-07, "loss": 0.0079, "step": 855 }, { "epoch": 0.1, "learning_rate": 4.053439302953838e-07, "loss": 0.0068, "step": 860 }, { "epoch": 0.1, "learning_rate": 4.039153688314145e-07, "loss": 0.008, "step": 865 }, { "epoch": 0.1, "learning_rate": 4.024786675938529e-07, "loss": 0.0076, "step": 870 }, { "epoch": 0.1, "learning_rate": 4.01033902562264e-07, "loss": 0.0072, "step": 875 }, { "epoch": 0.1, "learning_rate": 3.995811501426647e-07, "loss": 0.0073, "step": 880 }, { "epoch": 0.1, "learning_rate": 3.981204871634827e-07, "loss": 0.0083, "step": 885 }, { "epoch": 0.1, "learning_rate": 3.966519908714933e-07, "loss": 0.0067, "step": 890 }, { "epoch": 0.1, "learning_rate": 3.9517573892773493e-07, "loss": 0.0067, "step": 895 }, { "epoch": 0.1, "learning_rate": 3.936918094034013e-07, "loss": 0.0066, "step": 900 }, { "epoch": 0.1, "learning_rate": 3.922002807757129e-07, "loss": 0.0069, "step": 905 }, { "epoch": 0.1, "learning_rate": 3.9070123192376713e-07, "loss": 0.0072, "step": 910 }, { "epoch": 0.1, "learning_rate": 3.891947421243661e-07, "loss": 0.0056, "step": 915 }, { "epoch": 0.11, "learning_rate": 3.8768089104782464e-07, "loss": 0.0058, "step": 920 }, { "epoch": 0.11, "learning_rate": 3.8615975875375676e-07, "loss": 0.0075, "step": 925 }, { "epoch": 0.11, "learning_rate": 3.846314256868417e-07, "loss": 0.0063, "step": 930 }, { "epoch": 0.11, "learning_rate": 3.8309597267256966e-07, "loss": 0.0061, "step": 935 }, { "epoch": 0.11, "learning_rate": 3.8155348091296736e-07, "loss": 0.0057, "step": 940 }, { "epoch": 0.11, "learning_rate": 3.800040319823038e-07, "loss": 0.0058, "step": 945 }, { "epoch": 0.11, "learning_rate": 3.784477078227762e-07, "loss": 0.0052, "step": 950 }, { "epoch": 0.11, "learning_rate": 3.7688459074017603e-07, "loss": 0.006, "step": 955 }, { "epoch": 0.11, "learning_rate": 3.7531476339953714e-07, "loss": 0.0069, "step": 960 }, { "epoch": 0.11, "learning_rate": 3.737383088207635e-07, "loss": 0.0058, "step": 965 }, { "epoch": 0.11, "learning_rate": 3.7215531037423874e-07, "loss": 0.0053, "step": 970 }, { "epoch": 0.11, "learning_rate": 3.705658517764172e-07, "loss": 0.0053, "step": 975 }, { "epoch": 0.11, "learning_rate": 3.6897001708539655e-07, "loss": 0.0056, "step": 980 }, { "epoch": 0.11, "learning_rate": 3.673678906964727e-07, "loss": 0.0064, "step": 985 }, { "epoch": 0.11, "learning_rate": 3.657595573376761e-07, "loss": 0.0053, "step": 990 }, { "epoch": 0.11, "learning_rate": 3.6414510206529137e-07, "loss": 0.0053, "step": 995 }, { "epoch": 0.11, "learning_rate": 3.625246102593588e-07, "loss": 0.0058, "step": 1000 }, { "epoch": 0.12, "learning_rate": 3.6089816761915904e-07, "loss": 0.0049, "step": 1005 }, { "epoch": 0.12, "learning_rate": 3.592658601586811e-07, "loss": 0.0049, "step": 1010 }, { "epoch": 0.12, "learning_rate": 3.5762777420207377e-07, "loss": 0.0049, "step": 1015 }, { "epoch": 0.12, "learning_rate": 3.559839963790797e-07, "loss": 0.0056, "step": 1020 }, { "epoch": 0.12, "learning_rate": 3.5433461362045447e-07, "loss": 0.0046, "step": 1025 }, { "epoch": 0.12, "learning_rate": 3.526797131533693e-07, "loss": 0.0042, "step": 1030 }, { "epoch": 0.12, "learning_rate": 3.510193824967979e-07, "loss": 0.0039, "step": 1035 }, { "epoch": 0.12, "learning_rate": 3.493537094568882e-07, "loss": 0.0045, "step": 1040 }, { "epoch": 0.12, "learning_rate": 3.4768278212231837e-07, "loss": 0.0055, "step": 1045 }, { "epoch": 0.12, "learning_rate": 3.460066888596391e-07, "loss": 0.0046, "step": 1050 }, { "epoch": 0.12, "learning_rate": 3.4432551830859926e-07, "loss": 0.0047, "step": 1055 }, { "epoch": 0.12, "learning_rate": 3.4263935937745906e-07, "loss": 0.0035, "step": 1060 }, { "epoch": 0.12, "learning_rate": 3.4094830123828786e-07, "loss": 0.0043, "step": 1065 }, { "epoch": 0.12, "learning_rate": 3.3925243332224835e-07, "loss": 0.0036, "step": 1070 }, { "epoch": 0.12, "learning_rate": 3.3755184531486684e-07, "loss": 0.0042, "step": 1075 }, { "epoch": 0.12, "learning_rate": 3.3584662715129065e-07, "loss": 0.0044, "step": 1080 }, { "epoch": 0.12, "learning_rate": 3.341368690115316e-07, "loss": 0.0038, "step": 1085 }, { "epoch": 0.13, "learning_rate": 3.324226613156968e-07, "loss": 0.0039, "step": 1090 }, { "epoch": 0.13, "learning_rate": 3.3070409471920725e-07, "loss": 0.0037, "step": 1095 }, { "epoch": 0.13, "learning_rate": 3.289812601080029e-07, "loss": 0.004, "step": 1100 }, { "epoch": 0.13, "learning_rate": 3.272542485937368e-07, "loss": 0.0044, "step": 1105 }, { "epoch": 0.13, "learning_rate": 3.255231515089565e-07, "loss": 0.0033, "step": 1110 }, { "epoch": 0.13, "learning_rate": 3.237880604022735e-07, "loss": 0.0038, "step": 1115 }, { "epoch": 0.13, "learning_rate": 3.220490670335223e-07, "loss": 0.0042, "step": 1120 }, { "epoch": 0.13, "learning_rate": 3.2030626336890766e-07, "loss": 0.0039, "step": 1125 }, { "epoch": 0.13, "learning_rate": 3.185597415761405e-07, "loss": 0.0033, "step": 1130 }, { "epoch": 0.13, "learning_rate": 3.168095940195642e-07, "loss": 0.0036, "step": 1135 }, { "epoch": 0.13, "learning_rate": 3.150559132552697e-07, "loss": 0.0029, "step": 1140 }, { "epoch": 0.13, "learning_rate": 3.1329879202620047e-07, "loss": 0.0036, "step": 1145 }, { "epoch": 0.13, "learning_rate": 3.1153832325724825e-07, "loss": 0.004, "step": 1150 }, { "epoch": 0.13, "learning_rate": 3.0977460005033854e-07, "loss": 0.0037, "step": 1155 }, { "epoch": 0.13, "learning_rate": 3.0800771567950697e-07, "loss": 0.0027, "step": 1160 }, { "epoch": 0.13, "learning_rate": 3.062377635859663e-07, "loss": 0.0031, "step": 1165 }, { "epoch": 0.13, "learning_rate": 3.04464837373165e-07, "loss": 0.0036, "step": 1170 }, { "epoch": 0.13, "learning_rate": 3.026890308018374e-07, "loss": 0.0028, "step": 1175 }, { "epoch": 0.14, "learning_rate": 3.0091043778504433e-07, "loss": 0.0034, "step": 1180 }, { "epoch": 0.14, "learning_rate": 2.991291523832075e-07, "loss": 0.0035, "step": 1185 }, { "epoch": 0.14, "learning_rate": 2.9734526879913444e-07, "loss": 0.0032, "step": 1190 }, { "epoch": 0.14, "learning_rate": 2.955588813730369e-07, "loss": 0.0031, "step": 1195 }, { "epoch": 0.14, "learning_rate": 2.937700845775416e-07, "loss": 0.0032, "step": 1200 }, { "epoch": 0.14, "learning_rate": 2.919789730126943e-07, "loss": 0.004, "step": 1205 }, { "epoch": 0.14, "learning_rate": 2.9018564140095654e-07, "loss": 0.003, "step": 1210 }, { "epoch": 0.14, "learning_rate": 2.883901845821965e-07, "loss": 0.0026, "step": 1215 }, { "epoch": 0.14, "learning_rate": 2.8659269750867364e-07, "loss": 0.0033, "step": 1220 }, { "epoch": 0.14, "learning_rate": 2.8479327524001633e-07, "loss": 0.0026, "step": 1225 }, { "epoch": 0.14, "learning_rate": 2.8299201293819584e-07, "loss": 0.0034, "step": 1230 }, { "epoch": 0.14, "learning_rate": 2.811890058624926e-07, "loss": 0.0026, "step": 1235 }, { "epoch": 0.14, "learning_rate": 2.7938434936445943e-07, "loss": 0.003, "step": 1240 }, { "epoch": 0.14, "learning_rate": 2.7757813888287795e-07, "loss": 0.003, "step": 1245 }, { "epoch": 0.14, "learning_rate": 2.75770469938712e-07, "loss": 0.0026, "step": 1250 }, { "epoch": 0.14, "learning_rate": 2.73961438130056e-07, "loss": 0.002, "step": 1255 }, { "epoch": 0.14, "learning_rate": 2.721511391270788e-07, "loss": 0.0026, "step": 1260 }, { "epoch": 0.15, "learning_rate": 2.703396686669646e-07, "loss": 0.003, "step": 1265 }, { "epoch": 0.15, "learning_rate": 2.6852712254884985e-07, "loss": 0.0025, "step": 1270 }, { "epoch": 0.15, "learning_rate": 2.667135966287568e-07, "loss": 0.0021, "step": 1275 }, { "epoch": 0.15, "learning_rate": 2.648991868145244e-07, "loss": 0.0019, "step": 1280 }, { "epoch": 0.15, "learning_rate": 2.63083989060736e-07, "loss": 0.0027, "step": 1285 }, { "epoch": 0.15, "learning_rate": 2.6126809936364485e-07, "loss": 0.0027, "step": 1290 }, { "epoch": 0.15, "learning_rate": 2.5945161375609775e-07, "loss": 0.0024, "step": 1295 }, { "epoch": 0.15, "learning_rate": 2.576346283024557e-07, "loss": 0.0027, "step": 1300 }, { "epoch": 0.15, "learning_rate": 2.5581723909351404e-07, "loss": 0.0025, "step": 1305 }, { "epoch": 0.15, "learning_rate": 2.5399954224142086e-07, "loss": 0.0025, "step": 1310 }, { "epoch": 0.15, "learning_rate": 2.5218163387459346e-07, "loss": 0.0025, "step": 1315 }, { "epoch": 0.15, "learning_rate": 2.503636101326354e-07, "loss": 0.0024, "step": 1320 }, { "epoch": 0.15, "learning_rate": 2.485455671612515e-07, "loss": 0.0021, "step": 1325 }, { "epoch": 0.15, "learning_rate": 2.4672760110716393e-07, "loss": 0.0024, "step": 1330 }, { "epoch": 0.15, "learning_rate": 2.4490980811302657e-07, "loss": 0.0028, "step": 1335 }, { "epoch": 0.15, "learning_rate": 2.4309228431234167e-07, "loss": 0.0024, "step": 1340 }, { "epoch": 0.15, "learning_rate": 2.412751258243748e-07, "loss": 0.0021, "step": 1345 }, { "epoch": 0.15, "learning_rate": 2.394584287490721e-07, "loss": 0.0021, "step": 1350 }, { "epoch": 0.16, "learning_rate": 2.376422891619785e-07, "loss": 0.0019, "step": 1355 }, { "epoch": 0.16, "learning_rate": 2.3582680310915556e-07, "loss": 0.0024, "step": 1360 }, { "epoch": 0.16, "learning_rate": 2.340120666021036e-07, "loss": 0.0024, "step": 1365 }, { "epoch": 0.16, "learning_rate": 2.3219817561268286e-07, "loss": 0.0023, "step": 1370 }, { "epoch": 0.16, "learning_rate": 2.3038522606803878e-07, "loss": 0.002, "step": 1375 }, { "epoch": 0.16, "learning_rate": 2.2857331384552887e-07, "loss": 0.0025, "step": 1380 }, { "epoch": 0.16, "learning_rate": 2.2676253476765194e-07, "loss": 0.0018, "step": 1385 }, { "epoch": 0.16, "learning_rate": 2.2495298459698094e-07, "loss": 0.0024, "step": 1390 }, { "epoch": 0.16, "learning_rate": 2.2314475903109824e-07, "loss": 0.0021, "step": 1395 }, { "epoch": 0.16, "learning_rate": 2.2133795369753476e-07, "loss": 0.0019, "step": 1400 }, { "epoch": 0.16, "learning_rate": 2.1953266414871316e-07, "loss": 0.002, "step": 1405 }, { "epoch": 0.16, "learning_rate": 2.1772898585689376e-07, "loss": 0.0021, "step": 1410 }, { "epoch": 0.16, "learning_rate": 2.159270142091264e-07, "loss": 0.0016, "step": 1415 }, { "epoch": 0.16, "learning_rate": 2.1412684450220518e-07, "loss": 0.0018, "step": 1420 }, { "epoch": 0.16, "learning_rate": 2.123285719376292e-07, "loss": 0.002, "step": 1425 }, { "epoch": 0.16, "learning_rate": 2.1053229161656772e-07, "loss": 0.002, "step": 1430 }, { "epoch": 0.16, "learning_rate": 2.0873809853483056e-07, "loss": 0.0017, "step": 1435 }, { "epoch": 0.17, "learning_rate": 2.0694608757784466e-07, "loss": 0.0024, "step": 1440 }, { "epoch": 0.17, "learning_rate": 2.0515635351563562e-07, "loss": 0.002, "step": 1445 }, { "epoch": 0.17, "learning_rate": 2.0336899099781632e-07, "loss": 0.0021, "step": 1450 }, { "epoch": 0.17, "learning_rate": 2.0158409454858103e-07, "loss": 0.0018, "step": 1455 }, { "epoch": 0.17, "learning_rate": 1.9980175856170638e-07, "loss": 0.0023, "step": 1460 }, { "epoch": 0.17, "learning_rate": 1.980220772955602e-07, "loss": 0.0015, "step": 1465 }, { "epoch": 0.17, "learning_rate": 1.9624514486811548e-07, "loss": 0.0017, "step": 1470 }, { "epoch": 0.17, "learning_rate": 1.944710552519742e-07, "loss": 0.0021, "step": 1475 }, { "epoch": 0.17, "learning_rate": 1.926999022693965e-07, "loss": 0.002, "step": 1480 }, { "epoch": 0.17, "learning_rate": 1.9093177958733963e-07, "loss": 0.0022, "step": 1485 }, { "epoch": 0.17, "learning_rate": 1.8916678071250446e-07, "loss": 0.0027, "step": 1490 }, { "epoch": 0.17, "learning_rate": 1.8740499898638958e-07, "loss": 0.0019, "step": 1495 }, { "epoch": 0.17, "learning_rate": 1.8564652758035622e-07, "loss": 0.002, "step": 1500 }, { "epoch": 0.17, "learning_rate": 1.8389145949069951e-07, "loss": 0.0015, "step": 1505 }, { "epoch": 0.17, "learning_rate": 1.8213988753373145e-07, "loss": 0.002, "step": 1510 }, { "epoch": 0.17, "learning_rate": 1.803919043408721e-07, "loss": 0.0019, "step": 1515 }, { "epoch": 0.17, "learning_rate": 1.7864760235375035e-07, "loss": 0.0022, "step": 1520 }, { "epoch": 0.17, "learning_rate": 1.7690707381931582e-07, "loss": 0.0021, "step": 1525 }, { "epoch": 0.18, "learning_rate": 1.7517041078495992e-07, "loss": 0.0013, "step": 1530 }, { "epoch": 0.18, "learning_rate": 1.73437705093648e-07, "loss": 0.0015, "step": 1535 }, { "epoch": 0.18, "learning_rate": 1.7170904837906263e-07, "loss": 0.0017, "step": 1540 }, { "epoch": 0.18, "learning_rate": 1.6998453206075708e-07, "loss": 0.0017, "step": 1545 }, { "epoch": 0.18, "learning_rate": 1.682642473393211e-07, "loss": 0.0015, "step": 1550 }, { "epoch": 0.18, "learning_rate": 1.665482851915573e-07, "loss": 0.0011, "step": 1555 }, { "epoch": 0.18, "learning_rate": 1.6483673636567022e-07, "loss": 0.0015, "step": 1560 }, { "epoch": 0.18, "learning_rate": 1.6312969137646715e-07, "loss": 0.0014, "step": 1565 }, { "epoch": 0.18, "learning_rate": 1.61427240500571e-07, "loss": 0.0016, "step": 1570 }, { "epoch": 0.18, "learning_rate": 1.5972947377164642e-07, "loss": 0.0013, "step": 1575 }, { "epoch": 0.18, "learning_rate": 1.5803648097563787e-07, "loss": 0.0015, "step": 1580 }, { "epoch": 0.18, "learning_rate": 1.5634835164602196e-07, "loss": 0.0015, "step": 1585 }, { "epoch": 0.18, "learning_rate": 1.5466517505907207e-07, "loss": 0.0016, "step": 1590 }, { "epoch": 0.18, "learning_rate": 1.5298704022913676e-07, "loss": 0.0014, "step": 1595 }, { "epoch": 0.18, "learning_rate": 1.513140359039332e-07, "loss": 0.0017, "step": 1600 }, { "epoch": 0.18, "learning_rate": 1.4964625055985264e-07, "loss": 0.0015, "step": 1605 }, { "epoch": 0.18, "learning_rate": 1.4798377239728236e-07, "loss": 0.0016, "step": 1610 }, { "epoch": 0.19, "learning_rate": 1.4632668933594028e-07, "loss": 0.002, "step": 1615 }, { "epoch": 0.19, "learning_rate": 1.44675089010226e-07, "loss": 0.0015, "step": 1620 }, { "epoch": 0.19, "learning_rate": 1.430290587645865e-07, "loss": 0.0012, "step": 1625 }, { "epoch": 0.19, "learning_rate": 1.4138868564889573e-07, "loss": 0.0014, "step": 1630 }, { "epoch": 0.19, "learning_rate": 1.3975405641385253e-07, "loss": 0.0016, "step": 1635 }, { "epoch": 0.19, "learning_rate": 1.381252575063919e-07, "loss": 0.0016, "step": 1640 }, { "epoch": 0.19, "learning_rate": 1.365023750651133e-07, "loss": 0.0015, "step": 1645 }, { "epoch": 0.19, "learning_rate": 1.3488549491572576e-07, "loss": 0.0015, "step": 1650 }, { "epoch": 0.19, "learning_rate": 1.3327470256650846e-07, "loss": 0.0016, "step": 1655 }, { "epoch": 0.19, "learning_rate": 1.3167008320378916e-07, "loss": 0.0014, "step": 1660 }, { "epoch": 0.19, "learning_rate": 1.3007172168743852e-07, "loss": 0.002, "step": 1665 }, { "epoch": 0.19, "learning_rate": 1.2847970254638263e-07, "loss": 0.0012, "step": 1670 }, { "epoch": 0.19, "learning_rate": 1.2689410997413325e-07, "loss": 0.0014, "step": 1675 }, { "epoch": 0.19, "learning_rate": 1.2531502782433416e-07, "loss": 0.0018, "step": 1680 }, { "epoch": 0.19, "learning_rate": 1.2374253960632754e-07, "loss": 0.0012, "step": 1685 }, { "epoch": 0.19, "learning_rate": 1.22176728480737e-07, "loss": 0.002, "step": 1690 }, { "epoch": 0.19, "learning_rate": 1.2061767725507004e-07, "loss": 0.0014, "step": 1695 }, { "epoch": 0.2, "learning_rate": 1.1906546837933867e-07, "loss": 0.0015, "step": 1700 }, { "epoch": 0.2, "learning_rate": 1.175201839416988e-07, "loss": 0.0019, "step": 1705 }, { "epoch": 0.2, "learning_rate": 1.1598190566410946e-07, "loss": 0.0015, "step": 1710 }, { "epoch": 0.2, "learning_rate": 1.1445071489801073e-07, "loss": 0.0015, "step": 1715 }, { "epoch": 0.2, "learning_rate": 1.1292669262002158e-07, "loss": 0.0016, "step": 1720 }, { "epoch": 0.2, "learning_rate": 1.1140991942765713e-07, "loss": 0.0014, "step": 1725 }, { "epoch": 0.2, "learning_rate": 1.0990047553506676e-07, "loss": 0.0017, "step": 1730 }, { "epoch": 0.2, "learning_rate": 1.0839844076879185e-07, "loss": 0.0012, "step": 1735 }, { "epoch": 0.2, "learning_rate": 1.0690389456354367e-07, "loss": 0.0012, "step": 1740 }, { "epoch": 0.2, "learning_rate": 1.0541691595800336e-07, "loss": 0.0015, "step": 1745 }, { "epoch": 0.2, "learning_rate": 1.0393758359064144e-07, "loss": 0.0014, "step": 1750 }, { "epoch": 0.2, "learning_rate": 1.0246597569555892e-07, "loss": 0.0017, "step": 1755 }, { "epoch": 0.2, "learning_rate": 1.0100217009835039e-07, "loss": 0.002, "step": 1760 }, { "epoch": 0.2, "learning_rate": 9.95462442119879e-08, "loss": 0.0016, "step": 1765 }, { "epoch": 0.2, "learning_rate": 9.809827503272713e-08, "loss": 0.0011, "step": 1770 }, { "epoch": 0.2, "learning_rate": 9.665833913603522e-08, "loss": 0.0016, "step": 1775 }, { "epoch": 0.2, "learning_rate": 9.522651267254147e-08, "loss": 0.0018, "step": 1780 }, { "epoch": 0.2, "learning_rate": 9.380287136400999e-08, "loss": 0.0014, "step": 1785 }, { "epoch": 0.21, "learning_rate": 9.238749049933484e-08, "loss": 0.0012, "step": 1790 }, { "epoch": 0.21, "learning_rate": 9.098044493055898e-08, "loss": 0.0014, "step": 1795 }, { "epoch": 0.21, "learning_rate": 8.958180906891547e-08, "loss": 0.0014, "step": 1800 }, { "epoch": 0.21, "learning_rate": 8.819165688089192e-08, "loss": 0.0012, "step": 1805 }, { "epoch": 0.21, "learning_rate": 8.681006188431945e-08, "loss": 0.0017, "step": 1810 }, { "epoch": 0.21, "learning_rate": 8.543709714448402e-08, "loss": 0.0012, "step": 1815 }, { "epoch": 0.21, "learning_rate": 8.407283527026324e-08, "loss": 0.0017, "step": 1820 }, { "epoch": 0.21, "learning_rate": 8.271734841028552e-08, "loss": 0.0011, "step": 1825 }, { "epoch": 0.21, "learning_rate": 8.137070824911504e-08, "loss": 0.0014, "step": 1830 }, { "epoch": 0.21, "learning_rate": 8.003298600346085e-08, "loss": 0.0011, "step": 1835 }, { "epoch": 0.21, "learning_rate": 7.870425241841019e-08, "loss": 0.0016, "step": 1840 }, { "epoch": 0.21, "learning_rate": 7.738457776368765e-08, "loss": 0.0013, "step": 1845 }, { "epoch": 0.21, "learning_rate": 7.607403182993821e-08, "loss": 0.0018, "step": 1850 }, { "epoch": 0.21, "learning_rate": 7.477268392503728e-08, "loss": 0.0015, "step": 1855 }, { "epoch": 0.21, "learning_rate": 7.348060287042481e-08, "loss": 0.0011, "step": 1860 }, { "epoch": 0.21, "learning_rate": 7.219785699746572e-08, "loss": 0.0016, "step": 1865 }, { "epoch": 0.21, "learning_rate": 7.092451414383643e-08, "loss": 0.0013, "step": 1870 }, { "epoch": 0.22, "learning_rate": 6.966064164993715e-08, "loss": 0.0015, "step": 1875 }, { "epoch": 0.22, "learning_rate": 6.84063063553307e-08, "loss": 0.0011, "step": 1880 }, { "epoch": 0.22, "learning_rate": 6.716157459520738e-08, "loss": 0.0015, "step": 1885 }, { "epoch": 0.22, "learning_rate": 6.592651219687733e-08, "loss": 0.0014, "step": 1890 }, { "epoch": 0.22, "learning_rate": 6.470118447628911e-08, "loss": 0.0015, "step": 1895 }, { "epoch": 0.22, "learning_rate": 6.348565623457513e-08, "loss": 0.0011, "step": 1900 }, { "epoch": 0.22, "learning_rate": 6.22799917546252e-08, "loss": 0.0014, "step": 1905 }, { "epoch": 0.22, "learning_rate": 6.108425479768668e-08, "loss": 0.0013, "step": 1910 }, { "epoch": 0.22, "learning_rate": 5.989850859999227e-08, "loss": 0.0012, "step": 1915 }, { "epoch": 0.22, "learning_rate": 5.872281586941633e-08, "loss": 0.0013, "step": 1920 }, { "epoch": 0.22, "learning_rate": 5.755723878215801e-08, "loss": 0.0015, "step": 1925 }, { "epoch": 0.22, "learning_rate": 5.640183897945361e-08, "loss": 0.0013, "step": 1930 }, { "epoch": 0.22, "learning_rate": 5.525667756431615e-08, "loss": 0.0014, "step": 1935 }, { "epoch": 0.22, "learning_rate": 5.4121815098304186e-08, "loss": 0.0013, "step": 1940 }, { "epoch": 0.22, "learning_rate": 5.299731159831952e-08, "loss": 0.0014, "step": 1945 }, { "epoch": 0.22, "learning_rate": 5.18832265334323e-08, "loss": 0.0015, "step": 1950 }, { "epoch": 0.22, "learning_rate": 5.077961882173676e-08, "loss": 0.0015, "step": 1955 }, { "epoch": 0.22, "learning_rate": 4.968654682723486e-08, "loss": 0.0014, "step": 1960 }, { "epoch": 0.23, "learning_rate": 4.860406835675016e-08, "loss": 0.0018, "step": 1965 }, { "epoch": 0.23, "learning_rate": 4.753224065687047e-08, "loss": 0.0013, "step": 1970 }, { "epoch": 0.23, "learning_rate": 4.647112041092022e-08, "loss": 0.0013, "step": 1975 }, { "epoch": 0.23, "learning_rate": 4.542076373596318e-08, "loss": 0.0012, "step": 1980 }, { "epoch": 0.23, "learning_rate": 4.438122617983442e-08, "loss": 0.0011, "step": 1985 }, { "epoch": 0.23, "learning_rate": 4.3352562718202866e-08, "loss": 0.0019, "step": 1990 }, { "epoch": 0.23, "learning_rate": 4.233482775166364e-08, "loss": 0.0013, "step": 1995 }, { "epoch": 0.23, "learning_rate": 4.132807510286143e-08, "loss": 0.001, "step": 2000 }, { "epoch": 0.23, "learning_rate": 4.0332358013644015e-08, "loss": 0.0014, "step": 2005 }, { "epoch": 0.23, "learning_rate": 3.9347729142246325e-08, "loss": 0.0011, "step": 2010 }, { "epoch": 0.23, "learning_rate": 3.8374240560505974e-08, "loss": 0.0013, "step": 2015 }, { "epoch": 0.23, "learning_rate": 3.7411943751109314e-08, "loss": 0.0012, "step": 2020 }, { "epoch": 0.23, "learning_rate": 3.646088960486862e-08, "loss": 0.0011, "step": 2025 }, { "epoch": 0.23, "learning_rate": 3.552112841803104e-08, "loss": 0.0016, "step": 2030 }, { "epoch": 0.23, "learning_rate": 3.459270988961854e-08, "loss": 0.0014, "step": 2035 }, { "epoch": 0.23, "learning_rate": 3.367568311879959e-08, "loss": 0.0013, "step": 2040 }, { "epoch": 0.23, "learning_rate": 3.2770096602292464e-08, "loss": 0.0013, "step": 2045 }, { "epoch": 0.24, "learning_rate": 3.1875998231800704e-08, "loss": 0.0012, "step": 2050 }, { "epoch": 0.24, "learning_rate": 3.099343529148035e-08, "loss": 0.0012, "step": 2055 }, { "epoch": 0.24, "learning_rate": 3.0122454455439096e-08, "loss": 0.001, "step": 2060 }, { "epoch": 0.24, "learning_rate": 2.9263101785268252e-08, "loss": 0.001, "step": 2065 }, { "epoch": 0.24, "learning_rate": 2.8415422727606638e-08, "loss": 0.002, "step": 2070 }, { "epoch": 0.24, "learning_rate": 2.7579462111737063e-08, "loss": 0.0011, "step": 2075 }, { "epoch": 0.24, "learning_rate": 2.6755264147215794e-08, "loss": 0.0011, "step": 2080 }, { "epoch": 0.24, "learning_rate": 2.5942872421534147e-08, "loss": 0.0012, "step": 2085 }, { "epoch": 0.24, "learning_rate": 2.5142329897813952e-08, "loss": 0.0013, "step": 2090 }, { "epoch": 0.24, "learning_rate": 2.4353678912534896e-08, "loss": 0.0012, "step": 2095 }, { "epoch": 0.24, "learning_rate": 2.3576961173295772e-08, "loss": 0.0011, "step": 2100 }, { "epoch": 0.24, "learning_rate": 2.2812217756608937e-08, "loss": 0.0014, "step": 2105 }, { "epoch": 0.24, "learning_rate": 2.2059489105727856e-08, "loss": 0.0013, "step": 2110 }, { "epoch": 0.24, "learning_rate": 2.131881502850824e-08, "loss": 0.0014, "step": 2115 }, { "epoch": 0.24, "learning_rate": 2.0590234695302827e-08, "loss": 0.0009, "step": 2120 }, { "epoch": 0.24, "learning_rate": 1.9873786636889904e-08, "loss": 0.0011, "step": 2125 }, { "epoch": 0.24, "learning_rate": 1.916950874243575e-08, "loss": 0.0011, "step": 2130 }, { "epoch": 0.24, "learning_rate": 1.847743825749054e-08, "loss": 0.0013, "step": 2135 }, { "epoch": 0.25, "learning_rate": 1.779761178201894e-08, "loss": 0.0013, "step": 2140 }, { "epoch": 0.25, "learning_rate": 1.713006526846439e-08, "loss": 0.0013, "step": 2145 }, { "epoch": 0.25, "learning_rate": 1.6474834019847865e-08, "loss": 0.0015, "step": 2150 }, { "epoch": 0.25, "learning_rate": 1.5831952687900606e-08, "loss": 0.0012, "step": 2155 }, { "epoch": 0.25, "learning_rate": 1.5201455271231955e-08, "loss": 0.0012, "step": 2160 }, { "epoch": 0.25, "learning_rate": 1.4583375113531194e-08, "loss": 0.0012, "step": 2165 }, { "epoch": 0.25, "learning_rate": 1.3977744901803951e-08, "loss": 0.0011, "step": 2170 }, { "epoch": 0.25, "learning_rate": 1.3384596664643921e-08, "loss": 0.001, "step": 2175 }, { "epoch": 0.25, "learning_rate": 1.2803961770538885e-08, "loss": 0.0012, "step": 2180 }, { "epoch": 0.25, "learning_rate": 1.2235870926211616e-08, "loss": 0.0011, "step": 2185 }, { "epoch": 0.25, "learning_rate": 1.1680354174996299e-08, "loss": 0.0015, "step": 2190 }, { "epoch": 0.25, "learning_rate": 1.1137440895249456e-08, "loss": 0.0012, "step": 2195 }, { "epoch": 0.25, "learning_rate": 1.0607159798796394e-08, "loss": 0.0016, "step": 2200 }, { "epoch": 0.25, "learning_rate": 1.0089538929412723e-08, "loss": 0.0014, "step": 2205 }, { "epoch": 0.25, "learning_rate": 9.584605661341144e-09, "loss": 0.001, "step": 2210 }, { "epoch": 0.25, "learning_rate": 9.092386697844262e-09, "loss": 0.0011, "step": 2215 }, { "epoch": 0.25, "learning_rate": 8.612908069791703e-09, "loss": 0.0014, "step": 2220 }, { "epoch": 0.26, "learning_rate": 8.14619513428405e-09, "loss": 0.0013, "step": 2225 }, { "epoch": 0.26, "learning_rate": 7.692272573311426e-09, "loss": 0.0014, "step": 2230 }, { "epoch": 0.26, "learning_rate": 7.251164392448495e-09, "loss": 0.0011, "step": 2235 }, { "epoch": 0.26, "learning_rate": 6.822893919584877e-09, "loss": 0.0011, "step": 2240 }, { "epoch": 0.26, "learning_rate": 6.407483803691216e-09, "loss": 0.0012, "step": 2245 }, { "epoch": 0.26, "learning_rate": 6.0049560136216926e-09, "loss": 0.0012, "step": 2250 }, { "epoch": 0.26, "learning_rate": 5.615331836952119e-09, "loss": 0.001, "step": 2255 }, { "epoch": 0.26, "learning_rate": 5.238631878854038e-09, "loss": 0.0013, "step": 2260 }, { "epoch": 0.26, "learning_rate": 4.874876061005173e-09, "loss": 0.0011, "step": 2265 }, { "epoch": 0.26, "learning_rate": 4.5240836205357735e-09, "loss": 0.001, "step": 2270 }, { "epoch": 0.26, "learning_rate": 4.186273109011373e-09, "loss": 0.0012, "step": 2275 }, { "epoch": 0.26, "learning_rate": 3.861462391451492e-09, "loss": 0.0013, "step": 2280 }, { "epoch": 0.26, "learning_rate": 3.5496686453850843e-09, "loss": 0.0011, "step": 2285 }, { "epoch": 0.26, "learning_rate": 3.250908359942045e-09, "loss": 0.001, "step": 2290 }, { "epoch": 0.26, "learning_rate": 2.9651973349810177e-09, "loss": 0.0012, "step": 2295 }, { "epoch": 0.26, "learning_rate": 2.6925506802540353e-09, "loss": 0.0012, "step": 2300 }, { "epoch": 0.26, "learning_rate": 2.4329828146074096e-09, "loss": 0.0011, "step": 2305 }, { "epoch": 0.27, "learning_rate": 2.1865074652190894e-09, "loss": 0.0013, "step": 2310 }, { "epoch": 0.27, "learning_rate": 1.9531376668727174e-09, "loss": 0.0021, "step": 2315 }, { "epoch": 0.27, "learning_rate": 1.7328857612684266e-09, "loss": 0.0013, "step": 2320 }, { "epoch": 0.27, "learning_rate": 1.5257633963700055e-09, "loss": 0.0011, "step": 2325 }, { "epoch": 0.27, "learning_rate": 1.3317815257889998e-09, "loss": 0.0016, "step": 2330 }, { "epoch": 0.27, "learning_rate": 1.1509504082052867e-09, "loss": 0.0016, "step": 2335 }, { "epoch": 0.27, "learning_rate": 9.832796068247883e-10, "loss": 0.0015, "step": 2340 }, { "epoch": 0.27, "learning_rate": 8.287779888734858e-10, "loss": 0.0015, "step": 2345 }, { "epoch": 0.27, "learning_rate": 6.874537251286005e-10, "loss": 0.001, "step": 2350 }, { "epoch": 0.27, "learning_rate": 5.593142894864122e-10, "loss": 0.0011, "step": 2355 }, { "epoch": 0.27, "learning_rate": 4.443664585671858e-10, "loss": 0.0015, "step": 2360 }, { "epoch": 0.27, "learning_rate": 3.4261631135654167e-10, "loss": 0.0012, "step": 2365 }, { "epoch": 0.27, "learning_rate": 2.54069228884074e-10, "loss": 0.0012, "step": 2370 }, { "epoch": 0.27, "learning_rate": 1.7872989393888372e-10, "loss": 0.0014, "step": 2375 }, { "epoch": 0.27, "learning_rate": 1.1660229082177675e-10, "loss": 0.0012, "step": 2380 }, { "epoch": 0.27, "learning_rate": 6.768970513457151e-11, "loss": 0.001, "step": 2385 }, { "epoch": 0.27, "learning_rate": 3.19947236064877e-11, "loss": 0.0011, "step": 2390 }, { "epoch": 0.27, "learning_rate": 9.519233957172579e-12, "loss": 0.0011, "step": 2395 }, { "epoch": 0.28, "learning_rate": 2.644247969474378e-13, "loss": 0.0013, "step": 2400 }, { "epoch": 0.28, "step": 2400, "total_flos": 3.3948129315510026e+18, "train_loss": 0.09296217595246466, "train_runtime": 26220.1068, "train_samples_per_second": 1.465, "train_steps_per_second": 0.092 } ], "logging_steps": 5, "max_steps": 2400, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 3.3948129315510026e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }