{ "best_metric": 1.0445035696029663, "best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-05-09_21-49-53_experiment/checkpoint-248650", "epoch": 49.99748655305886, "global_step": 248650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.0054293183189222e-05, "loss": 4.306, "step": 100 }, { "epoch": 0.04, "learning_rate": 2.0108586366378444e-05, "loss": 3.8922, "step": 200 }, { "epoch": 0.06, "learning_rate": 3.016287954956767e-05, "loss": 3.6512, "step": 300 }, { "epoch": 0.08, "learning_rate": 4.021717273275689e-05, "loss": 3.44, "step": 400 }, { "epoch": 0.1, "learning_rate": 5.027146591594611e-05, "loss": 3.2164, "step": 500 }, { "epoch": 0.12, "learning_rate": 6.032575909913534e-05, "loss": 3.0072, "step": 600 }, { "epoch": 0.14, "learning_rate": 7.038005228232456e-05, "loss": 2.8143, "step": 700 }, { "epoch": 0.16, "learning_rate": 8.043434546551378e-05, "loss": 2.6472, "step": 800 }, { "epoch": 0.18, "learning_rate": 9.048863864870299e-05, "loss": 2.51, "step": 900 }, { "epoch": 0.2, "learning_rate": 0.00010054293183189222, "loss": 2.3891, "step": 1000 }, { "epoch": 0.22, "learning_rate": 0.00011059722501508144, "loss": 2.2905, "step": 1100 }, { "epoch": 0.24, "learning_rate": 0.00012065151819827068, "loss": 2.2056, "step": 1200 }, { "epoch": 0.26, "learning_rate": 0.00013070581138145988, "loss": 2.137, "step": 1300 }, { "epoch": 0.28, "learning_rate": 0.00014076010456464912, "loss": 2.0792, "step": 1400 }, { "epoch": 0.3, "learning_rate": 0.00015081439774783834, "loss": 2.0301, "step": 1500 }, { "epoch": 0.32, "learning_rate": 0.00016086869093102755, "loss": 1.9879, "step": 1600 }, { "epoch": 0.34, "learning_rate": 0.00017092298411421677, "loss": 1.9499, "step": 1700 }, { "epoch": 0.36, "learning_rate": 0.00018097727729740598, "loss": 1.9279, "step": 1800 }, { "epoch": 0.38, "learning_rate": 0.0001910315704805952, "loss": 1.8925, "step": 1900 }, { "epoch": 0.4, "learning_rate": 0.00020108586366378444, "loss": 1.8673, "step": 2000 }, { "epoch": 0.42, "learning_rate": 0.00021114015684697366, "loss": 1.8431, "step": 2100 }, { "epoch": 0.44, "learning_rate": 0.00022119445003016287, "loss": 1.8236, "step": 2200 }, { "epoch": 0.46, "learning_rate": 0.0002312487432133521, "loss": 1.8023, "step": 2300 }, { "epoch": 0.48, "learning_rate": 0.00024130303639654136, "loss": 1.7889, "step": 2400 }, { "epoch": 0.5, "learning_rate": 0.00025135732957973054, "loss": 1.7671, "step": 2500 }, { "epoch": 0.52, "learning_rate": 0.00026141162276291976, "loss": 1.7506, "step": 2600 }, { "epoch": 0.54, "learning_rate": 0.00027146591594610903, "loss": 1.737, "step": 2700 }, { "epoch": 0.56, "learning_rate": 0.00028152020912929824, "loss": 1.7272, "step": 2800 }, { "epoch": 0.58, "learning_rate": 0.00029157450231248746, "loss": 1.7112, "step": 2900 }, { "epoch": 0.6, "learning_rate": 0.0003016287954956767, "loss": 1.699, "step": 3000 }, { "epoch": 0.62, "learning_rate": 0.0003116830886788659, "loss": 1.6933, "step": 3100 }, { "epoch": 0.64, "learning_rate": 0.0003217373818620551, "loss": 1.6788, "step": 3200 }, { "epoch": 0.66, "learning_rate": 0.0003317916750452443, "loss": 1.6694, "step": 3300 }, { "epoch": 0.68, "learning_rate": 0.00034184596822843354, "loss": 1.6607, "step": 3400 }, { "epoch": 0.7, "learning_rate": 0.00035190026141162275, "loss": 1.6534, "step": 3500 }, { "epoch": 0.72, "learning_rate": 0.00036195455459481197, "loss": 1.645, "step": 3600 }, { "epoch": 0.74, "learning_rate": 0.0003720088477780012, "loss": 1.632, "step": 3700 }, { "epoch": 0.76, "learning_rate": 0.0003820631409611904, "loss": 1.6229, "step": 3800 }, { "epoch": 0.78, "learning_rate": 0.00039211743414437967, "loss": 1.6161, "step": 3900 }, { "epoch": 0.8, "learning_rate": 0.0004021717273275689, "loss": 1.6061, "step": 4000 }, { "epoch": 0.82, "learning_rate": 0.0004122260205107581, "loss": 1.6006, "step": 4100 }, { "epoch": 0.84, "learning_rate": 0.0004222803136939473, "loss": 1.5875, "step": 4200 }, { "epoch": 0.86, "learning_rate": 0.00043233460687713653, "loss": 1.5828, "step": 4300 }, { "epoch": 0.88, "learning_rate": 0.00044238890006032574, "loss": 1.5849, "step": 4400 }, { "epoch": 0.9, "learning_rate": 0.00045244319324351496, "loss": 1.572, "step": 4500 }, { "epoch": 0.92, "learning_rate": 0.0004624974864267042, "loss": 1.565, "step": 4600 }, { "epoch": 0.95, "learning_rate": 0.0004725517796098934, "loss": 1.5589, "step": 4700 }, { "epoch": 0.97, "learning_rate": 0.0004826060727930827, "loss": 1.5519, "step": 4800 }, { "epoch": 0.99, "learning_rate": 0.0004926603659762719, "loss": 1.5489, "step": 4900 }, { "epoch": 1.0, "eval_accuracy": 0.38639571407117423, "eval_loss": 1.490514874458313, "eval_runtime": 19.6663, "eval_samples_per_second": 4045.908, "eval_steps_per_second": 15.814, "step": 4973 }, { "epoch": 1.01, "learning_rate": 0.0005027146591594611, "loss": 1.5386, "step": 5000 }, { "epoch": 1.03, "learning_rate": 0.0005127689523426503, "loss": 1.5361, "step": 5100 }, { "epoch": 1.05, "learning_rate": 0.0005228232455258395, "loss": 1.5279, "step": 5200 }, { "epoch": 1.07, "learning_rate": 0.0005328775387090288, "loss": 1.5273, "step": 5300 }, { "epoch": 1.09, "learning_rate": 0.0005429318318922181, "loss": 1.5192, "step": 5400 }, { "epoch": 1.11, "learning_rate": 0.0005529861250754073, "loss": 1.5175, "step": 5500 }, { "epoch": 1.13, "learning_rate": 0.0005630404182585965, "loss": 1.5165, "step": 5600 }, { "epoch": 1.15, "learning_rate": 0.0005730947114417857, "loss": 1.5102, "step": 5700 }, { "epoch": 1.17, "learning_rate": 0.0005831490046249749, "loss": 1.5006, "step": 5800 }, { "epoch": 1.19, "learning_rate": 0.0005932032978081641, "loss": 1.4952, "step": 5900 }, { "epoch": 1.21, "learning_rate": 0.0006032575909913533, "loss": 1.4956, "step": 6000 }, { "epoch": 1.23, "learning_rate": 0.0006133118841745426, "loss": 1.4906, "step": 6100 }, { "epoch": 1.25, "learning_rate": 0.0006233661773577318, "loss": 1.4845, "step": 6200 }, { "epoch": 1.27, "learning_rate": 0.0006334204705409209, "loss": 1.4826, "step": 6300 }, { "epoch": 1.29, "learning_rate": 0.0006434747637241102, "loss": 1.4844, "step": 6400 }, { "epoch": 1.31, "learning_rate": 0.0006535290569072995, "loss": 1.4789, "step": 6500 }, { "epoch": 1.33, "learning_rate": 0.0006635833500904886, "loss": 1.474, "step": 6600 }, { "epoch": 1.35, "learning_rate": 0.000673637643273678, "loss": 1.4716, "step": 6700 }, { "epoch": 1.37, "learning_rate": 0.0006836919364568671, "loss": 1.471, "step": 6800 }, { "epoch": 1.39, "learning_rate": 0.0006937462296400564, "loss": 1.4675, "step": 6900 }, { "epoch": 1.41, "learning_rate": 0.0007038005228232455, "loss": 1.4627, "step": 7000 }, { "epoch": 1.43, "learning_rate": 0.0007138548160064348, "loss": 1.4575, "step": 7100 }, { "epoch": 1.45, "learning_rate": 0.0007239091091896239, "loss": 1.4615, "step": 7200 }, { "epoch": 1.47, "learning_rate": 0.0007339634023728133, "loss": 1.451, "step": 7300 }, { "epoch": 1.49, "learning_rate": 0.0007440176955560024, "loss": 1.4533, "step": 7400 }, { "epoch": 1.51, "learning_rate": 0.0007540719887391917, "loss": 1.4469, "step": 7500 }, { "epoch": 1.53, "learning_rate": 0.0007641262819223808, "loss": 1.4471, "step": 7600 }, { "epoch": 1.55, "learning_rate": 0.0007741805751055701, "loss": 1.4433, "step": 7700 }, { "epoch": 1.57, "learning_rate": 0.0007842348682887593, "loss": 1.4447, "step": 7800 }, { "epoch": 1.59, "learning_rate": 0.0007942891614719485, "loss": 1.4389, "step": 7900 }, { "epoch": 1.61, "learning_rate": 0.0008043434546551378, "loss": 1.4352, "step": 8000 }, { "epoch": 1.63, "learning_rate": 0.000814397747838327, "loss": 1.432, "step": 8100 }, { "epoch": 1.65, "learning_rate": 0.0008244520410215162, "loss": 1.4366, "step": 8200 }, { "epoch": 1.67, "learning_rate": 0.0008345063342047054, "loss": 1.4331, "step": 8300 }, { "epoch": 1.69, "learning_rate": 0.0008445606273878946, "loss": 1.4331, "step": 8400 }, { "epoch": 1.71, "learning_rate": 0.0008546149205710838, "loss": 1.4229, "step": 8500 }, { "epoch": 1.73, "learning_rate": 0.0008646692137542731, "loss": 1.4255, "step": 8600 }, { "epoch": 1.75, "learning_rate": 0.0008747235069374624, "loss": 1.4223, "step": 8700 }, { "epoch": 1.77, "learning_rate": 0.0008847778001206515, "loss": 1.4239, "step": 8800 }, { "epoch": 1.79, "learning_rate": 0.0008948320933038408, "loss": 1.4214, "step": 8900 }, { "epoch": 1.81, "learning_rate": 0.0009048863864870299, "loss": 1.4187, "step": 9000 }, { "epoch": 1.83, "learning_rate": 0.0009149406796702192, "loss": 1.4181, "step": 9100 }, { "epoch": 1.85, "learning_rate": 0.0009249949728534083, "loss": 1.4188, "step": 9200 }, { "epoch": 1.87, "learning_rate": 0.0009350492660365977, "loss": 1.4166, "step": 9300 }, { "epoch": 1.89, "learning_rate": 0.0009451035592197868, "loss": 1.4089, "step": 9400 }, { "epoch": 1.91, "learning_rate": 0.0009551578524029761, "loss": 1.4085, "step": 9500 }, { "epoch": 1.93, "learning_rate": 0.0009652121455861654, "loss": 1.4111, "step": 9600 }, { "epoch": 1.95, "learning_rate": 0.0009752664387693545, "loss": 1.4065, "step": 9700 }, { "epoch": 1.97, "learning_rate": 0.0009853207319525437, "loss": 1.4065, "step": 9800 }, { "epoch": 1.99, "learning_rate": 0.000995375025135733, "loss": 1.4079, "step": 9900 }, { "epoch": 2.0, "eval_accuracy": 0.39677088556687123, "eval_loss": 1.357865333557129, "eval_runtime": 19.5955, "eval_samples_per_second": 4060.518, "eval_steps_per_second": 15.871, "step": 9946 }, { "epoch": 2.01, "learning_rate": 0.0010054293183189222, "loss": 1.4024, "step": 10000 }, { "epoch": 2.03, "learning_rate": 0.0010154836115021115, "loss": 1.3983, "step": 10100 }, { "epoch": 2.05, "learning_rate": 0.0010255379046853006, "loss": 1.3965, "step": 10200 }, { "epoch": 2.07, "learning_rate": 0.00103559219786849, "loss": 1.3945, "step": 10300 }, { "epoch": 2.09, "learning_rate": 0.0010455459481198471, "loss": 1.3989, "step": 10400 }, { "epoch": 2.11, "learning_rate": 0.0010556002413030364, "loss": 1.3975, "step": 10500 }, { "epoch": 2.13, "learning_rate": 0.0010655539915543938, "loss": 1.3924, "step": 10600 }, { "epoch": 2.15, "learning_rate": 0.001075608284737583, "loss": 1.3933, "step": 10700 }, { "epoch": 2.17, "learning_rate": 0.0010856625779207723, "loss": 1.394, "step": 10800 }, { "epoch": 2.19, "learning_rate": 0.0010957168711039614, "loss": 1.3945, "step": 10900 }, { "epoch": 2.21, "learning_rate": 0.0011057711642871507, "loss": 1.3929, "step": 11000 }, { "epoch": 2.23, "learning_rate": 0.0011158254574703398, "loss": 1.3943, "step": 11100 }, { "epoch": 2.25, "learning_rate": 0.0011258797506535291, "loss": 1.3893, "step": 11200 }, { "epoch": 2.27, "learning_rate": 0.0011359340438367182, "loss": 1.3908, "step": 11300 }, { "epoch": 2.29, "learning_rate": 0.0011459883370199076, "loss": 1.3887, "step": 11400 }, { "epoch": 2.31, "learning_rate": 0.0011560426302030967, "loss": 1.3902, "step": 11500 }, { "epoch": 2.33, "learning_rate": 0.001166096923386286, "loss": 1.3934, "step": 11600 }, { "epoch": 2.35, "learning_rate": 0.001176151216569475, "loss": 1.3857, "step": 11700 }, { "epoch": 2.37, "learning_rate": 0.0011862055097526644, "loss": 1.3885, "step": 11800 }, { "epoch": 2.39, "learning_rate": 0.0011962598029358535, "loss": 1.3847, "step": 11900 }, { "epoch": 2.41, "learning_rate": 0.0012063140961190428, "loss": 1.3863, "step": 12000 }, { "epoch": 2.43, "learning_rate": 0.001216368389302232, "loss": 1.3846, "step": 12100 }, { "epoch": 2.45, "learning_rate": 0.0012264226824854213, "loss": 1.3815, "step": 12200 }, { "epoch": 2.47, "learning_rate": 0.0012364769756686104, "loss": 1.3876, "step": 12300 }, { "epoch": 2.49, "learning_rate": 0.0012465312688517997, "loss": 1.3812, "step": 12400 }, { "epoch": 2.51, "learning_rate": 0.0012565855620349888, "loss": 1.3838, "step": 12500 }, { "epoch": 2.53, "learning_rate": 0.0012666398552181781, "loss": 1.3891, "step": 12600 }, { "epoch": 2.55, "learning_rate": 0.0012766941484013675, "loss": 1.3852, "step": 12700 }, { "epoch": 2.57, "learning_rate": 0.0012867484415845566, "loss": 1.3856, "step": 12800 }, { "epoch": 2.59, "learning_rate": 0.0012968027347677459, "loss": 1.3816, "step": 12900 }, { "epoch": 2.61, "learning_rate": 0.001306857027950935, "loss": 1.3808, "step": 13000 }, { "epoch": 2.63, "learning_rate": 0.0013169113211341243, "loss": 1.3808, "step": 13100 }, { "epoch": 2.65, "learning_rate": 0.0013269656143173134, "loss": 1.3802, "step": 13200 }, { "epoch": 2.67, "learning_rate": 0.0013370199075005028, "loss": 1.3781, "step": 13300 }, { "epoch": 2.69, "learning_rate": 0.0013470742006836919, "loss": 1.3813, "step": 13400 }, { "epoch": 2.71, "learning_rate": 0.0013571284938668812, "loss": 1.3789, "step": 13500 }, { "epoch": 2.73, "learning_rate": 0.0013671827870500705, "loss": 1.3808, "step": 13600 }, { "epoch": 2.75, "learning_rate": 0.0013772370802332596, "loss": 1.3793, "step": 13700 }, { "epoch": 2.77, "learning_rate": 0.001387291373416449, "loss": 1.3839, "step": 13800 }, { "epoch": 2.79, "learning_rate": 0.001397345666599638, "loss": 1.3746, "step": 13900 }, { "epoch": 2.82, "learning_rate": 0.0014072994168509952, "loss": 1.3719, "step": 14000 }, { "epoch": 2.84, "learning_rate": 0.0014173537100341845, "loss": 1.3761, "step": 14100 }, { "epoch": 2.86, "learning_rate": 0.0014274080032173739, "loss": 1.3775, "step": 14200 }, { "epoch": 2.88, "learning_rate": 0.0014374622964005632, "loss": 1.3781, "step": 14300 }, { "epoch": 2.9, "learning_rate": 0.0014475165895837523, "loss": 1.376, "step": 14400 }, { "epoch": 2.92, "learning_rate": 0.0014575708827669414, "loss": 1.3759, "step": 14500 }, { "epoch": 2.94, "learning_rate": 0.0014676251759501307, "loss": 1.3709, "step": 14600 }, { "epoch": 2.96, "learning_rate": 0.00147767946913332, "loss": 1.3724, "step": 14700 }, { "epoch": 2.98, "learning_rate": 0.0014877337623165092, "loss": 1.3792, "step": 14800 }, { "epoch": 3.0, "learning_rate": 0.0014977880554996985, "loss": 1.3705, "step": 14900 }, { "epoch": 3.0, "eval_accuracy": 0.3990531464846882, "eval_loss": 1.3305182456970215, "eval_runtime": 19.8107, "eval_samples_per_second": 4016.413, "eval_steps_per_second": 15.699, "step": 14919 }, { "epoch": 3.02, "learning_rate": 0.0015078423486828876, "loss": 1.3671, "step": 15000 }, { "epoch": 3.04, "learning_rate": 0.001517896641866077, "loss": 1.3701, "step": 15100 }, { "epoch": 3.06, "learning_rate": 0.001527950935049266, "loss": 1.3686, "step": 15200 }, { "epoch": 3.08, "learning_rate": 0.0015380052282324553, "loss": 1.3682, "step": 15300 }, { "epoch": 3.1, "learning_rate": 0.0015480595214156445, "loss": 1.3661, "step": 15400 }, { "epoch": 3.12, "learning_rate": 0.0015581138145988338, "loss": 1.3605, "step": 15500 }, { "epoch": 3.14, "learning_rate": 0.0015681681077820229, "loss": 1.3689, "step": 15600 }, { "epoch": 3.16, "learning_rate": 0.0015782224009652122, "loss": 1.3741, "step": 15700 }, { "epoch": 3.18, "learning_rate": 0.0015882766941484015, "loss": 1.3674, "step": 15800 }, { "epoch": 3.2, "learning_rate": 0.0015983309873315906, "loss": 1.367, "step": 15900 }, { "epoch": 3.22, "learning_rate": 0.0016083852805147797, "loss": 1.3694, "step": 16000 }, { "epoch": 3.24, "learning_rate": 0.001618439573697969, "loss": 1.3683, "step": 16100 }, { "epoch": 3.26, "learning_rate": 0.0016284938668811584, "loss": 1.3669, "step": 16200 }, { "epoch": 3.28, "learning_rate": 0.0016385481600643475, "loss": 1.3713, "step": 16300 }, { "epoch": 3.3, "learning_rate": 0.0016486024532475366, "loss": 1.3663, "step": 16400 }, { "epoch": 3.32, "learning_rate": 0.001658656746430726, "loss": 1.3708, "step": 16500 }, { "epoch": 3.34, "learning_rate": 0.0016687110396139153, "loss": 1.3693, "step": 16600 }, { "epoch": 3.36, "learning_rate": 0.0016787653327971046, "loss": 1.3685, "step": 16700 }, { "epoch": 3.38, "learning_rate": 0.0016888196259802935, "loss": 1.3692, "step": 16800 }, { "epoch": 3.4, "learning_rate": 0.0016988739191634828, "loss": 1.3682, "step": 16900 }, { "epoch": 3.42, "learning_rate": 0.0017089282123466721, "loss": 1.3657, "step": 17000 }, { "epoch": 3.44, "learning_rate": 0.0017189825055298614, "loss": 1.3685, "step": 17100 }, { "epoch": 3.46, "learning_rate": 0.0017290367987130505, "loss": 1.3683, "step": 17200 }, { "epoch": 3.48, "learning_rate": 0.0017390910918962397, "loss": 1.368, "step": 17300 }, { "epoch": 3.5, "learning_rate": 0.001749145385079429, "loss": 1.368, "step": 17400 }, { "epoch": 3.52, "learning_rate": 0.0017591996782626183, "loss": 1.3668, "step": 17500 }, { "epoch": 3.54, "learning_rate": 0.0017692539714458076, "loss": 1.363, "step": 17600 }, { "epoch": 3.56, "learning_rate": 0.0017793082646289965, "loss": 1.3651, "step": 17700 }, { "epoch": 3.58, "learning_rate": 0.0017893625578121858, "loss": 1.363, "step": 17800 }, { "epoch": 3.6, "learning_rate": 0.0017994168509953752, "loss": 1.3652, "step": 17900 }, { "epoch": 3.62, "learning_rate": 0.0018094711441785645, "loss": 1.3666, "step": 18000 }, { "epoch": 3.64, "learning_rate": 0.0018194248944299217, "loss": 1.3653, "step": 18100 }, { "epoch": 3.66, "learning_rate": 0.001829479187613111, "loss": 1.3685, "step": 18200 }, { "epoch": 3.68, "learning_rate": 0.0018395334807963, "loss": 1.3641, "step": 18300 }, { "epoch": 3.7, "learning_rate": 0.0018495877739794892, "loss": 1.3642, "step": 18400 }, { "epoch": 3.72, "learning_rate": 0.0018596420671626785, "loss": 1.3668, "step": 18500 }, { "epoch": 3.74, "learning_rate": 0.0018696963603458678, "loss": 1.3662, "step": 18600 }, { "epoch": 3.76, "learning_rate": 0.001879650110597225, "loss": 1.3659, "step": 18700 }, { "epoch": 3.78, "learning_rate": 0.0018897044037804143, "loss": 1.368, "step": 18800 }, { "epoch": 3.8, "learning_rate": 0.0018997586969636035, "loss": 1.3631, "step": 18900 }, { "epoch": 3.82, "learning_rate": 0.0019098129901467928, "loss": 1.3629, "step": 19000 }, { "epoch": 3.84, "learning_rate": 0.0019198672833299819, "loss": 1.3609, "step": 19100 }, { "epoch": 3.86, "learning_rate": 0.0019299215765131712, "loss": 1.368, "step": 19200 }, { "epoch": 3.88, "learning_rate": 0.0019399758696963603, "loss": 1.3663, "step": 19300 }, { "epoch": 3.9, "learning_rate": 0.0019500301628795496, "loss": 1.3684, "step": 19400 }, { "epoch": 3.92, "learning_rate": 0.0019600844560627387, "loss": 1.3653, "step": 19500 }, { "epoch": 3.94, "learning_rate": 0.001970138749245928, "loss": 1.3657, "step": 19600 }, { "epoch": 3.96, "learning_rate": 0.0019801930424291174, "loss": 1.3619, "step": 19700 }, { "epoch": 3.98, "learning_rate": 0.0019902473356123067, "loss": 1.3648, "step": 19800 }, { "epoch": 4.0, "eval_accuracy": 0.39993003357370305, "eval_loss": 1.3229814767837524, "eval_runtime": 19.7901, "eval_samples_per_second": 4020.592, "eval_steps_per_second": 15.715, "step": 19893 }, { "epoch": 4.0, "learning_rate": 0.0020003016287954956, "loss": 1.3611, "step": 19900 }, { "epoch": 4.02, "learning_rate": 0.002010355921978685, "loss": 1.3592, "step": 20000 }, { "epoch": 4.04, "learning_rate": 0.0020204102151618743, "loss": 1.3543, "step": 20100 }, { "epoch": 4.06, "learning_rate": 0.0020304645083450636, "loss": 1.3601, "step": 20200 }, { "epoch": 4.08, "learning_rate": 0.0020405188015282525, "loss": 1.3539, "step": 20300 }, { "epoch": 4.1, "learning_rate": 0.002050573094711442, "loss": 1.3579, "step": 20400 }, { "epoch": 4.12, "learning_rate": 0.002060627387894631, "loss": 1.3596, "step": 20500 }, { "epoch": 4.14, "learning_rate": 0.0020706816810778204, "loss": 1.3603, "step": 20600 }, { "epoch": 4.16, "learning_rate": 0.0020807359742610093, "loss": 1.3594, "step": 20700 }, { "epoch": 4.18, "learning_rate": 0.0020907902674441987, "loss": 1.3609, "step": 20800 }, { "epoch": 4.2, "learning_rate": 0.002100844560627388, "loss": 1.3648, "step": 20900 }, { "epoch": 4.22, "learning_rate": 0.0021108988538105773, "loss": 1.3616, "step": 21000 }, { "epoch": 4.24, "learning_rate": 0.0021209531469937666, "loss": 1.3631, "step": 21100 }, { "epoch": 4.26, "learning_rate": 0.0021310074401769555, "loss": 1.3639, "step": 21200 }, { "epoch": 4.28, "learning_rate": 0.002140961190428313, "loss": 1.3646, "step": 21300 }, { "epoch": 4.3, "learning_rate": 0.0021510154836115025, "loss": 1.3593, "step": 21400 }, { "epoch": 4.32, "learning_rate": 0.0021610697767946913, "loss": 1.3604, "step": 21500 }, { "epoch": 4.34, "learning_rate": 0.0021711240699778807, "loss": 1.362, "step": 21600 }, { "epoch": 4.36, "learning_rate": 0.00218117836316107, "loss": 1.3583, "step": 21700 }, { "epoch": 4.38, "learning_rate": 0.0021912326563442593, "loss": 1.364, "step": 21800 }, { "epoch": 4.4, "learning_rate": 0.002201286949527448, "loss": 1.3625, "step": 21900 }, { "epoch": 4.42, "learning_rate": 0.0022113412427106375, "loss": 1.367, "step": 22000 }, { "epoch": 4.44, "learning_rate": 0.002221395535893827, "loss": 1.3646, "step": 22100 }, { "epoch": 4.46, "learning_rate": 0.002231449829077016, "loss": 1.3625, "step": 22200 }, { "epoch": 4.48, "learning_rate": 0.002241504122260205, "loss": 1.3617, "step": 22300 }, { "epoch": 4.5, "learning_rate": 0.0022515584154433944, "loss": 1.3616, "step": 22400 }, { "epoch": 4.52, "learning_rate": 0.0022616127086265837, "loss": 1.3624, "step": 22500 }, { "epoch": 4.54, "learning_rate": 0.002271667001809773, "loss": 1.3635, "step": 22600 }, { "epoch": 4.56, "learning_rate": 0.002281721294992962, "loss": 1.36, "step": 22700 }, { "epoch": 4.58, "learning_rate": 0.0022917755881761512, "loss": 1.3599, "step": 22800 }, { "epoch": 4.6, "learning_rate": 0.0023018298813593406, "loss": 1.3674, "step": 22900 }, { "epoch": 4.62, "learning_rate": 0.00231188417454253, "loss": 1.3667, "step": 23000 }, { "epoch": 4.64, "learning_rate": 0.002321938467725719, "loss": 1.3555, "step": 23100 }, { "epoch": 4.66, "learning_rate": 0.002331992760908908, "loss": 1.3594, "step": 23200 }, { "epoch": 4.69, "learning_rate": 0.0023420470540920974, "loss": 1.3629, "step": 23300 }, { "epoch": 4.71, "learning_rate": 0.0023521013472752868, "loss": 1.3641, "step": 23400 }, { "epoch": 4.73, "learning_rate": 0.0023621556404584756, "loss": 1.3594, "step": 23500 }, { "epoch": 4.75, "learning_rate": 0.002372209933641665, "loss": 1.3594, "step": 23600 }, { "epoch": 4.77, "learning_rate": 0.0023821636838930226, "loss": 1.3639, "step": 23700 }, { "epoch": 4.79, "learning_rate": 0.0023922179770762115, "loss": 1.3609, "step": 23800 }, { "epoch": 4.81, "learning_rate": 0.002402272270259401, "loss": 1.3605, "step": 23900 }, { "epoch": 4.83, "learning_rate": 0.00241232656344259, "loss": 1.3608, "step": 24000 }, { "epoch": 4.85, "learning_rate": 0.0024223808566257794, "loss": 1.3646, "step": 24100 }, { "epoch": 4.87, "learning_rate": 0.0024324351498089683, "loss": 1.3599, "step": 24200 }, { "epoch": 4.89, "learning_rate": 0.0024424894429921577, "loss": 1.3586, "step": 24300 }, { "epoch": 4.91, "learning_rate": 0.002452543736175347, "loss": 1.3608, "step": 24400 }, { "epoch": 4.93, "learning_rate": 0.0024625980293585363, "loss": 1.3615, "step": 24500 }, { "epoch": 4.95, "learning_rate": 0.002472652322541725, "loss": 1.3629, "step": 24600 }, { "epoch": 4.97, "learning_rate": 0.0024827066157249145, "loss": 1.3638, "step": 24700 }, { "epoch": 4.99, "learning_rate": 0.002492760908908104, "loss": 1.3652, "step": 24800 }, { "epoch": 5.0, "eval_accuracy": 0.39963747273250233, "eval_loss": 1.3215824365615845, "eval_runtime": 19.5099, "eval_samples_per_second": 4078.343, "eval_steps_per_second": 15.941, "step": 24866 }, { "epoch": 5.01, "learning_rate": 0.002499999903431858, "loss": 1.3594, "step": 24900 }, { "epoch": 5.03, "learning_rate": 0.0024999979819234353, "loss": 1.3548, "step": 25000 }, { "epoch": 5.05, "learning_rate": 0.0024999935969463615, "loss": 1.3572, "step": 25100 }, { "epoch": 5.07, "learning_rate": 0.0024999867485092793, "loss": 1.3579, "step": 25200 }, { "epoch": 5.09, "learning_rate": 0.002499977436625685, "loss": 1.3553, "step": 25300 }, { "epoch": 5.11, "learning_rate": 0.0024999656613139305, "loss": 1.3558, "step": 25400 }, { "epoch": 5.13, "learning_rate": 0.0024999514225972227, "loss": 1.3556, "step": 25500 }, { "epoch": 5.15, "learning_rate": 0.002499934720503622, "loss": 1.356, "step": 25600 }, { "epoch": 5.17, "learning_rate": 0.0024999155550660458, "loss": 1.3537, "step": 25700 }, { "epoch": 5.19, "learning_rate": 0.002499893926322264, "loss": 1.3582, "step": 25800 }, { "epoch": 5.21, "learning_rate": 0.0024998698343149024, "loss": 1.3528, "step": 25900 }, { "epoch": 5.23, "learning_rate": 0.0024998432790914404, "loss": 1.3531, "step": 26000 }, { "epoch": 5.25, "learning_rate": 0.0024998145630805626, "loss": 1.3578, "step": 26100 }, { "epoch": 5.27, "learning_rate": 0.002499783106217526, "loss": 1.3548, "step": 26200 }, { "epoch": 5.29, "learning_rate": 0.0024997491863093103, "loss": 1.3516, "step": 26300 }, { "epoch": 5.31, "learning_rate": 0.002499712803422766, "loss": 1.3549, "step": 26400 }, { "epoch": 5.33, "learning_rate": 0.0024996739576295945, "loss": 1.3539, "step": 26500 }, { "epoch": 5.35, "learning_rate": 0.0024996326490063525, "loss": 1.356, "step": 26600 }, { "epoch": 5.37, "learning_rate": 0.0024995888776344504, "loss": 1.3513, "step": 26700 }, { "epoch": 5.39, "learning_rate": 0.002499542643600152, "loss": 1.354, "step": 26800 }, { "epoch": 5.41, "learning_rate": 0.0024994939469945737, "loss": 1.3535, "step": 26900 }, { "epoch": 5.43, "learning_rate": 0.0024994427879136854, "loss": 1.3483, "step": 27000 }, { "epoch": 5.45, "learning_rate": 0.002499389166458312, "loss": 1.354, "step": 27100 }, { "epoch": 5.47, "learning_rate": 0.0024993330827341276, "loss": 1.3512, "step": 27200 }, { "epoch": 5.49, "learning_rate": 0.0024992745368516618, "loss": 1.3492, "step": 27300 }, { "epoch": 5.51, "learning_rate": 0.0024992135289262953, "loss": 1.3521, "step": 27400 }, { "epoch": 5.53, "learning_rate": 0.002499150059078261, "loss": 1.3494, "step": 27500 }, { "epoch": 5.55, "learning_rate": 0.0024990841274326442, "loss": 1.3497, "step": 27600 }, { "epoch": 5.57, "learning_rate": 0.0024990157341193814, "loss": 1.3494, "step": 27700 }, { "epoch": 5.59, "learning_rate": 0.0024989448792732604, "loss": 1.3487, "step": 27800 }, { "epoch": 5.61, "learning_rate": 0.0024988715630339213, "loss": 1.3468, "step": 27900 }, { "epoch": 5.63, "learning_rate": 0.002498795785545853, "loss": 1.3465, "step": 28000 }, { "epoch": 5.65, "learning_rate": 0.002498718341526211, "loss": 1.3505, "step": 28100 }, { "epoch": 5.67, "learning_rate": 0.00249863766660223, "loss": 1.3512, "step": 28200 }, { "epoch": 5.69, "learning_rate": 0.0024985545308904788, "loss": 1.3497, "step": 28300 }, { "epoch": 5.71, "learning_rate": 0.0024984689345547983, "loss": 1.3495, "step": 28400 }, { "epoch": 5.73, "learning_rate": 0.002498380877763881, "loss": 1.3484, "step": 28500 }, { "epoch": 5.75, "learning_rate": 0.0024982903606912666, "loss": 1.3465, "step": 28600 }, { "epoch": 5.77, "learning_rate": 0.0024981973835153442, "loss": 1.3494, "step": 28700 }, { "epoch": 5.79, "learning_rate": 0.0024981019464193513, "loss": 1.3448, "step": 28800 }, { "epoch": 5.81, "learning_rate": 0.002498004049591373, "loss": 1.3447, "step": 28900 }, { "epoch": 5.83, "learning_rate": 0.002497903693224343, "loss": 1.3489, "step": 29000 }, { "epoch": 5.85, "learning_rate": 0.0024978008775160404, "loss": 1.3491, "step": 29100 }, { "epoch": 5.87, "learning_rate": 0.002497695602669093, "loss": 1.3511, "step": 29200 }, { "epoch": 5.89, "learning_rate": 0.002497587868890974, "loss": 1.344, "step": 29300 }, { "epoch": 5.91, "learning_rate": 0.002497477676394002, "loss": 1.3442, "step": 29400 }, { "epoch": 5.93, "learning_rate": 0.002497365025395343, "loss": 1.3518, "step": 29500 }, { "epoch": 5.95, "learning_rate": 0.0024972499161170065, "loss": 1.3396, "step": 29600 }, { "epoch": 5.97, "learning_rate": 0.0024971323487858474, "loss": 1.3427, "step": 29700 }, { "epoch": 5.99, "learning_rate": 0.002497012323633565, "loss": 1.3434, "step": 29800 }, { "epoch": 6.0, "eval_accuracy": 0.4013078853657854, "eval_loss": 1.3029085397720337, "eval_runtime": 20.0428, "eval_samples_per_second": 3969.911, "eval_steps_per_second": 15.517, "step": 29839 }, { "epoch": 6.01, "learning_rate": 0.002496889840896702, "loss": 1.3447, "step": 29900 }, { "epoch": 6.03, "learning_rate": 0.0024967649008166455, "loss": 1.3366, "step": 30000 }, { "epoch": 6.05, "learning_rate": 0.0024966375036396234, "loss": 1.3399, "step": 30100 }, { "epoch": 6.07, "learning_rate": 0.0024965076496167083, "loss": 1.3375, "step": 30200 }, { "epoch": 6.09, "learning_rate": 0.00249637667426921, "loss": 1.3349, "step": 30300 }, { "epoch": 6.11, "learning_rate": 0.0024962419318890756, "loss": 1.3401, "step": 30400 }, { "epoch": 6.13, "learning_rate": 0.0024961047334426316, "loss": 1.339, "step": 30500 }, { "epoch": 6.15, "learning_rate": 0.0024959650792002663, "loss": 1.3457, "step": 30600 }, { "epoch": 6.17, "learning_rate": 0.0024958229694372063, "loss": 1.34, "step": 30700 }, { "epoch": 6.19, "learning_rate": 0.00249567840443352, "loss": 1.3389, "step": 30800 }, { "epoch": 6.21, "learning_rate": 0.0024955313844741115, "loss": 1.3385, "step": 30900 }, { "epoch": 6.23, "learning_rate": 0.002495381909848725, "loss": 1.3409, "step": 31000 }, { "epoch": 6.25, "learning_rate": 0.002495229980851942, "loss": 1.3394, "step": 31100 }, { "epoch": 6.27, "learning_rate": 0.002495075597783181, "loss": 1.3392, "step": 31200 }, { "epoch": 6.29, "learning_rate": 0.0024949187609466963, "loss": 1.3401, "step": 31300 }, { "epoch": 6.31, "learning_rate": 0.002494759470651578, "loss": 1.3375, "step": 31400 }, { "epoch": 6.33, "learning_rate": 0.0024945977272117534, "loss": 1.342, "step": 31500 }, { "epoch": 6.35, "learning_rate": 0.002494433530945982, "loss": 1.335, "step": 31600 }, { "epoch": 6.37, "learning_rate": 0.002494266882177858, "loss": 1.3347, "step": 31700 }, { "epoch": 6.39, "learning_rate": 0.0024940977812358094, "loss": 1.3392, "step": 31800 }, { "epoch": 6.41, "learning_rate": 0.002493926228453097, "loss": 1.3365, "step": 31900 }, { "epoch": 6.43, "learning_rate": 0.0024937522241678133, "loss": 1.3374, "step": 32000 }, { "epoch": 6.45, "learning_rate": 0.0024935757687228814, "loss": 1.3372, "step": 32100 }, { "epoch": 6.47, "learning_rate": 0.002493396862466056, "loss": 1.3375, "step": 32200 }, { "epoch": 6.49, "learning_rate": 0.0024932173314456876, "loss": 1.3388, "step": 32300 }, { "epoch": 6.51, "learning_rate": 0.002493033549126893, "loss": 1.3394, "step": 32400 }, { "epoch": 6.53, "learning_rate": 0.002492847317064799, "loss": 1.3353, "step": 32500 }, { "epoch": 6.56, "learning_rate": 0.002492658635626429, "loss": 1.334, "step": 32600 }, { "epoch": 6.58, "learning_rate": 0.0024924675051836314, "loss": 1.3385, "step": 32700 }, { "epoch": 6.6, "learning_rate": 0.0024922739261130818, "loss": 1.3378, "step": 32800 }, { "epoch": 6.62, "learning_rate": 0.002492077898796282, "loss": 1.3403, "step": 32900 }, { "epoch": 6.64, "learning_rate": 0.002491879423619558, "loss": 1.3408, "step": 33000 }, { "epoch": 6.66, "learning_rate": 0.002491678500974061, "loss": 1.3345, "step": 33100 }, { "epoch": 6.68, "learning_rate": 0.002491475131255764, "loss": 1.3331, "step": 33200 }, { "epoch": 6.7, "learning_rate": 0.002491269314865464, "loss": 1.3382, "step": 33300 }, { "epoch": 6.72, "learning_rate": 0.00249106105220878, "loss": 1.3372, "step": 33400 }, { "epoch": 6.74, "learning_rate": 0.0024908503436961503, "loss": 1.3308, "step": 33500 }, { "epoch": 6.76, "learning_rate": 0.002490637189742836, "loss": 1.3346, "step": 33600 }, { "epoch": 6.78, "learning_rate": 0.002490421590768915, "loss": 1.3356, "step": 33700 }, { "epoch": 6.8, "learning_rate": 0.0024902035471992857, "loss": 1.3314, "step": 33800 }, { "epoch": 6.82, "learning_rate": 0.002489983059463664, "loss": 1.3342, "step": 33900 }, { "epoch": 6.84, "learning_rate": 0.002489760127996581, "loss": 1.3349, "step": 34000 }, { "epoch": 6.86, "learning_rate": 0.0024895347532373864, "loss": 1.336, "step": 34100 }, { "epoch": 6.88, "learning_rate": 0.002489306935630243, "loss": 1.3345, "step": 34200 }, { "epoch": 6.9, "learning_rate": 0.0024890766756241293, "loss": 1.3351, "step": 34300 }, { "epoch": 6.92, "learning_rate": 0.0024888463127784766, "loss": 1.3308, "step": 34400 }, { "epoch": 6.94, "learning_rate": 0.0024886111937531884, "loss": 1.3369, "step": 34500 }, { "epoch": 6.96, "learning_rate": 0.0024883736337000827, "loss": 1.3351, "step": 34600 }, { "epoch": 6.98, "learning_rate": 0.0024881336330873373, "loss": 1.333, "step": 34700 }, { "epoch": 7.0, "learning_rate": 0.0024878911923879405, "loss": 1.3314, "step": 34800 }, { "epoch": 7.0, "eval_accuracy": 0.4019466193393693, "eval_loss": 1.2937954664230347, "eval_runtime": 19.8606, "eval_samples_per_second": 4006.315, "eval_steps_per_second": 15.659, "step": 34812 }, { "epoch": 7.02, "learning_rate": 0.0024876463120796894, "loss": 1.3277, "step": 34900 }, { "epoch": 7.04, "learning_rate": 0.002487398992645188, "loss": 1.3304, "step": 35000 }, { "epoch": 7.06, "learning_rate": 0.002487149234571848, "loss": 1.327, "step": 35100 }, { "epoch": 7.08, "learning_rate": 0.002486897038351888, "loss": 1.3317, "step": 35200 }, { "epoch": 7.1, "learning_rate": 0.0024866424044823303, "loss": 1.3264, "step": 35300 }, { "epoch": 7.12, "learning_rate": 0.0024863853334650015, "loss": 1.3287, "step": 35400 }, { "epoch": 7.14, "learning_rate": 0.002486125825806532, "loss": 1.3266, "step": 35500 }, { "epoch": 7.16, "learning_rate": 0.002485863882018354, "loss": 1.3311, "step": 35600 }, { "epoch": 7.18, "learning_rate": 0.0024855995026166994, "loss": 1.3303, "step": 35700 }, { "epoch": 7.2, "learning_rate": 0.0024853326881226026, "loss": 1.328, "step": 35800 }, { "epoch": 7.22, "learning_rate": 0.002485063439061895, "loss": 1.3339, "step": 35900 }, { "epoch": 7.24, "learning_rate": 0.0024847917559652067, "loss": 1.3296, "step": 36000 }, { "epoch": 7.26, "learning_rate": 0.002484517639367966, "loss": 1.3294, "step": 36100 }, { "epoch": 7.28, "learning_rate": 0.0024842410898103947, "loss": 1.3311, "step": 36200 }, { "epoch": 7.3, "learning_rate": 0.0024839621078375103, "loss": 1.3282, "step": 36300 }, { "epoch": 7.32, "learning_rate": 0.002483680693999126, "loss": 1.3248, "step": 36400 }, { "epoch": 7.34, "learning_rate": 0.0024833996993344934, "loss": 1.3279, "step": 36500 }, { "epoch": 7.36, "learning_rate": 0.002483113447738441, "loss": 1.3267, "step": 36600 }, { "epoch": 7.38, "learning_rate": 0.0024828247659494087, "loss": 1.3265, "step": 36700 }, { "epoch": 7.4, "learning_rate": 0.0024825336545363243, "loss": 1.3287, "step": 36800 }, { "epoch": 7.42, "learning_rate": 0.0024822401140729027, "loss": 1.3274, "step": 36900 }, { "epoch": 7.44, "learning_rate": 0.0024819441451376482, "loss": 1.3317, "step": 37000 }, { "epoch": 7.46, "learning_rate": 0.00248164574831385, "loss": 1.3274, "step": 37100 }, { "epoch": 7.48, "learning_rate": 0.002481344924189581, "loss": 1.3275, "step": 37200 }, { "epoch": 7.5, "learning_rate": 0.0024810416733576997, "loss": 1.3314, "step": 37300 }, { "epoch": 7.52, "learning_rate": 0.002480735996415845, "loss": 1.3308, "step": 37400 }, { "epoch": 7.54, "learning_rate": 0.00248042789396644, "loss": 1.3268, "step": 37500 }, { "epoch": 7.56, "learning_rate": 0.002480117366616685, "loss": 1.3238, "step": 37600 }, { "epoch": 7.58, "learning_rate": 0.002479804414978561, "loss": 1.3264, "step": 37700 }, { "epoch": 7.6, "learning_rate": 0.0024794890396688256, "loss": 1.3302, "step": 37800 }, { "epoch": 7.62, "learning_rate": 0.002479171241309015, "loss": 1.3292, "step": 37900 }, { "epoch": 7.64, "learning_rate": 0.002478851020525438, "loss": 1.3292, "step": 38000 }, { "epoch": 7.66, "learning_rate": 0.0024785283779491787, "loss": 1.3286, "step": 38100 }, { "epoch": 7.68, "learning_rate": 0.0024782033142160946, "loss": 1.3253, "step": 38200 }, { "epoch": 7.7, "learning_rate": 0.002477875829966814, "loss": 1.3284, "step": 38300 }, { "epoch": 7.72, "learning_rate": 0.0024775459258467355, "loss": 1.3302, "step": 38400 }, { "epoch": 7.74, "learning_rate": 0.00247721693771243, "loss": 1.3224, "step": 38500 }, { "epoch": 7.76, "learning_rate": 0.0024768822199884265, "loss": 1.3232, "step": 38600 }, { "epoch": 7.78, "learning_rate": 0.00247654508435181, "loss": 1.3267, "step": 38700 }, { "epoch": 7.8, "learning_rate": 0.002476205531467, "loss": 1.3268, "step": 38800 }, { "epoch": 7.82, "learning_rate": 0.0024758635620031806, "loss": 1.319, "step": 38900 }, { "epoch": 7.84, "learning_rate": 0.0024755191766342964, "loss": 1.3272, "step": 39000 }, { "epoch": 7.86, "learning_rate": 0.0024751723760390552, "loss": 1.3269, "step": 39100 }, { "epoch": 7.88, "learning_rate": 0.0024748231609009247, "loss": 1.3272, "step": 39200 }, { "epoch": 7.9, "learning_rate": 0.0024744715319081293, "loss": 1.3293, "step": 39300 }, { "epoch": 7.92, "learning_rate": 0.002474117489753652, "loss": 1.3235, "step": 39400 }, { "epoch": 7.94, "learning_rate": 0.002473761035135232, "loss": 1.3279, "step": 39500 }, { "epoch": 7.96, "learning_rate": 0.0024734021687553617, "loss": 1.3226, "step": 39600 }, { "epoch": 7.98, "learning_rate": 0.002473040891321287, "loss": 1.3244, "step": 39700 }, { "epoch": 8.0, "eval_accuracy": 0.4026586923819002, "eval_loss": 1.2859399318695068, "eval_runtime": 19.8804, "eval_samples_per_second": 4002.332, "eval_steps_per_second": 15.644, "step": 39786 }, { "epoch": 8.0, "learning_rate": 0.002472677203545006, "loss": 1.3265, "step": 39800 }, { "epoch": 8.02, "learning_rate": 0.002472311106143266, "loss": 1.3222, "step": 39900 }, { "epoch": 8.04, "learning_rate": 0.0024719425998375646, "loss": 1.3229, "step": 40000 }, { "epoch": 8.06, "learning_rate": 0.002471571685354145, "loss": 1.3203, "step": 40100 }, { "epoch": 8.08, "learning_rate": 0.002471198363423998, "loss": 1.324, "step": 40200 }, { "epoch": 8.1, "learning_rate": 0.002470822634782858, "loss": 1.3205, "step": 40300 }, { "epoch": 8.12, "learning_rate": 0.0024704445001712027, "loss": 1.3261, "step": 40400 }, { "epoch": 8.14, "learning_rate": 0.002470067777636028, "loss": 1.3198, "step": 40500 }, { "epoch": 8.16, "learning_rate": 0.0024696848573647666, "loss": 1.319, "step": 40600 }, { "epoch": 8.18, "learning_rate": 0.0024692995333652967, "loss": 1.3251, "step": 40700 }, { "epoch": 8.2, "learning_rate": 0.0024689118063970067, "loss": 1.3224, "step": 40800 }, { "epoch": 8.22, "learning_rate": 0.0024685216772240203, "loss": 1.3196, "step": 40900 }, { "epoch": 8.24, "learning_rate": 0.0024681291466151956, "loss": 1.3172, "step": 41000 }, { "epoch": 8.26, "learning_rate": 0.0024677342153441232, "loss": 1.3211, "step": 41100 }, { "epoch": 8.28, "learning_rate": 0.002467336884189125, "loss": 1.3166, "step": 41200 }, { "epoch": 8.3, "learning_rate": 0.0024669371539332526, "loss": 1.3225, "step": 41300 }, { "epoch": 8.32, "learning_rate": 0.0024665350253642855, "loss": 1.3187, "step": 41400 }, { "epoch": 8.34, "learning_rate": 0.00246613049927473, "loss": 1.3188, "step": 41500 }, { "epoch": 8.36, "learning_rate": 0.002465723576461818, "loss": 1.3194, "step": 41600 }, { "epoch": 8.38, "learning_rate": 0.0024653142577275022, "loss": 1.322, "step": 41700 }, { "epoch": 8.4, "learning_rate": 0.002464902543878461, "loss": 1.3192, "step": 41800 }, { "epoch": 8.43, "learning_rate": 0.00246448843572609, "loss": 1.3229, "step": 41900 }, { "epoch": 8.45, "learning_rate": 0.0024640719340865043, "loss": 1.3235, "step": 42000 }, { "epoch": 8.47, "learning_rate": 0.0024636530397805366, "loss": 1.3213, "step": 42100 }, { "epoch": 8.49, "learning_rate": 0.002463231753633735, "loss": 1.3187, "step": 42200 }, { "epoch": 8.51, "learning_rate": 0.0024628080764763603, "loss": 1.3226, "step": 42300 }, { "epoch": 8.53, "learning_rate": 0.002462382009143387, "loss": 1.3192, "step": 42400 }, { "epoch": 8.55, "learning_rate": 0.002461953552474498, "loss": 1.3222, "step": 42500 }, { "epoch": 8.57, "learning_rate": 0.0024615270275859403, "loss": 1.3172, "step": 42600 }, { "epoch": 8.59, "learning_rate": 0.002461093818655314, "loss": 1.3191, "step": 42700 }, { "epoch": 8.61, "learning_rate": 0.0024606582229275095, "loss": 1.3203, "step": 42800 }, { "epoch": 8.63, "learning_rate": 0.0024602202412609907, "loss": 1.3171, "step": 42900 }, { "epoch": 8.65, "learning_rate": 0.0024597798745189217, "loss": 1.3239, "step": 43000 }, { "epoch": 8.67, "learning_rate": 0.0024593371235691673, "loss": 1.3189, "step": 43100 }, { "epoch": 8.69, "learning_rate": 0.0024588919892842924, "loss": 1.3168, "step": 43200 }, { "epoch": 8.71, "learning_rate": 0.002458444472541557, "loss": 1.3214, "step": 43300 }, { "epoch": 8.73, "learning_rate": 0.0024579945742229177, "loss": 1.321, "step": 43400 }, { "epoch": 8.75, "learning_rate": 0.0024575422952150235, "loss": 1.3202, "step": 43500 }, { "epoch": 8.77, "learning_rate": 0.002457087636409217, "loss": 1.3188, "step": 43600 }, { "epoch": 8.79, "learning_rate": 0.0024566305987015298, "loss": 1.3166, "step": 43700 }, { "epoch": 8.81, "learning_rate": 0.002456171182992681, "loss": 1.3244, "step": 43800 }, { "epoch": 8.83, "learning_rate": 0.002455709390188077, "loss": 1.3186, "step": 43900 }, { "epoch": 8.85, "learning_rate": 0.00245524522119781, "loss": 1.3191, "step": 44000 }, { "epoch": 8.87, "learning_rate": 0.002454778676936653, "loss": 1.3218, "step": 44100 }, { "epoch": 8.89, "learning_rate": 0.0024543097583240615, "loss": 1.3217, "step": 44200 }, { "epoch": 8.91, "learning_rate": 0.0024538384662841704, "loss": 1.3169, "step": 44300 }, { "epoch": 8.93, "learning_rate": 0.0024533648017457917, "loss": 1.3201, "step": 44400 }, { "epoch": 8.95, "learning_rate": 0.002452888765642413, "loss": 1.3189, "step": 44500 }, { "epoch": 8.97, "learning_rate": 0.0024524103589121955, "loss": 1.3206, "step": 44600 }, { "epoch": 8.99, "learning_rate": 0.002451934401988944, "loss": 1.3163, "step": 44700 }, { "epoch": 9.0, "eval_accuracy": 0.4035565789440192, "eval_loss": 1.2766544818878174, "eval_runtime": 19.7806, "eval_samples_per_second": 4022.534, "eval_steps_per_second": 15.723, "step": 44759 }, { "epoch": 9.01, "learning_rate": 0.0024514512805208794, "loss": 1.3158, "step": 44800 }, { "epoch": 9.03, "learning_rate": 0.0024509657912589406, "loss": 1.3109, "step": 44900 }, { "epoch": 9.05, "learning_rate": 0.0024504779351599195, "loss": 1.312, "step": 45000 }, { "epoch": 9.07, "learning_rate": 0.002449987713185271, "loss": 1.3134, "step": 45100 }, { "epoch": 9.09, "learning_rate": 0.002449495126301115, "loss": 1.3156, "step": 45200 }, { "epoch": 9.11, "learning_rate": 0.002449000175478231, "loss": 1.3188, "step": 45300 }, { "epoch": 9.13, "learning_rate": 0.0024485028616920562, "loss": 1.3159, "step": 45400 }, { "epoch": 9.15, "learning_rate": 0.0024480031859226863, "loss": 1.3154, "step": 45500 }, { "epoch": 9.17, "learning_rate": 0.0024475011491548715, "loss": 1.3151, "step": 45600 }, { "epoch": 9.19, "learning_rate": 0.002446996752378015, "loss": 1.3152, "step": 45700 }, { "epoch": 9.21, "learning_rate": 0.0024464899965861704, "loss": 1.3159, "step": 45800 }, { "epoch": 9.23, "learning_rate": 0.002445980882778041, "loss": 1.3189, "step": 45900 }, { "epoch": 9.25, "learning_rate": 0.0024454694119569777, "loss": 1.3132, "step": 46000 }, { "epoch": 9.27, "learning_rate": 0.0024449555851309753, "loss": 1.3136, "step": 46100 }, { "epoch": 9.29, "learning_rate": 0.0024444394033126733, "loss": 1.3154, "step": 46200 }, { "epoch": 9.31, "learning_rate": 0.0024439208675193502, "loss": 1.3148, "step": 46300 }, { "epoch": 9.33, "learning_rate": 0.0024433999787729266, "loss": 1.3119, "step": 46400 }, { "epoch": 9.35, "learning_rate": 0.002442876738099957, "loss": 1.3143, "step": 46500 }, { "epoch": 9.37, "learning_rate": 0.002442351146531634, "loss": 1.316, "step": 46600 }, { "epoch": 9.39, "learning_rate": 0.0024418284961464525, "loss": 1.3157, "step": 46700 }, { "epoch": 9.41, "learning_rate": 0.002441298229382551, "loss": 1.3142, "step": 46800 }, { "epoch": 9.43, "learning_rate": 0.002440765614834186, "loss": 1.3125, "step": 46900 }, { "epoch": 9.45, "learning_rate": 0.0024402306535510226, "loss": 1.3187, "step": 47000 }, { "epoch": 9.47, "learning_rate": 0.0024396933465873506, "loss": 1.316, "step": 47100 }, { "epoch": 9.49, "learning_rate": 0.002439153695002083, "loss": 1.3129, "step": 47200 }, { "epoch": 9.51, "learning_rate": 0.0024386116998587537, "loss": 1.3164, "step": 47300 }, { "epoch": 9.53, "learning_rate": 0.002438067362225514, "loss": 1.3156, "step": 47400 }, { "epoch": 9.55, "learning_rate": 0.0024375206831751335, "loss": 1.3151, "step": 47500 }, { "epoch": 9.57, "learning_rate": 0.0024369716637849944, "loss": 1.3131, "step": 47600 }, { "epoch": 9.59, "learning_rate": 0.0024364203051370926, "loss": 1.3154, "step": 47700 }, { "epoch": 9.61, "learning_rate": 0.0024358666083180336, "loss": 1.3141, "step": 47800 }, { "epoch": 9.63, "learning_rate": 0.0024353105744190314, "loss": 1.3131, "step": 47900 }, { "epoch": 9.65, "learning_rate": 0.0024347522045359044, "loss": 1.3147, "step": 48000 }, { "epoch": 9.67, "learning_rate": 0.0024341914997690764, "loss": 1.3159, "step": 48100 }, { "epoch": 9.69, "learning_rate": 0.002433628461223572, "loss": 1.3135, "step": 48200 }, { "epoch": 9.71, "learning_rate": 0.0024330630900090155, "loss": 1.3133, "step": 48300 }, { "epoch": 9.73, "learning_rate": 0.0024324953872396277, "loss": 1.3126, "step": 48400 }, { "epoch": 9.75, "learning_rate": 0.0024319253540342253, "loss": 1.3144, "step": 48500 }, { "epoch": 9.77, "learning_rate": 0.0024313529915162163, "loss": 1.315, "step": 48600 }, { "epoch": 9.79, "learning_rate": 0.002430784059241426, "loss": 1.315, "step": 48700 }, { "epoch": 9.81, "learning_rate": 0.0024302070647516908, "loss": 1.3129, "step": 48800 }, { "epoch": 9.83, "learning_rate": 0.002429627744335717, "loss": 1.3163, "step": 48900 }, { "epoch": 9.85, "learning_rate": 0.0024290460991352155, "loss": 1.3101, "step": 49000 }, { "epoch": 9.87, "learning_rate": 0.0024284621302964804, "loss": 1.3152, "step": 49100 }, { "epoch": 9.89, "learning_rate": 0.0024278758389703853, "loss": 1.3163, "step": 49200 }, { "epoch": 9.91, "learning_rate": 0.002427287226312379, "loss": 1.3089, "step": 49300 }, { "epoch": 9.93, "learning_rate": 0.002426696293482488, "loss": 1.3141, "step": 49400 }, { "epoch": 9.95, "learning_rate": 0.002426103041645309, "loss": 1.3113, "step": 49500 }, { "epoch": 9.97, "learning_rate": 0.0024255074719700094, "loss": 1.3098, "step": 49600 }, { "epoch": 9.99, "learning_rate": 0.0024249095856303253, "loss": 1.3112, "step": 49700 }, { "epoch": 10.0, "eval_accuracy": 0.4034808535713104, "eval_loss": 1.2766634225845337, "eval_runtime": 19.8931, "eval_samples_per_second": 3999.773, "eval_steps_per_second": 15.634, "step": 49732 }, { "epoch": 10.01, "learning_rate": 0.0024243093838045584, "loss": 1.3131, "step": 49800 }, { "epoch": 10.03, "learning_rate": 0.002423706867675572, "loss": 1.3102, "step": 49900 }, { "epoch": 10.05, "learning_rate": 0.002423102038430793, "loss": 1.3043, "step": 50000 }, { "epoch": 10.07, "learning_rate": 0.0024224948972622054, "loss": 1.3073, "step": 50100 }, { "epoch": 10.09, "learning_rate": 0.002421885445366349, "loss": 1.3097, "step": 50200 }, { "epoch": 10.11, "learning_rate": 0.002421273683944319, "loss": 1.3065, "step": 50300 }, { "epoch": 10.13, "learning_rate": 0.002420659614201761, "loss": 1.3096, "step": 50400 }, { "epoch": 10.15, "learning_rate": 0.002420043237348871, "loss": 1.3118, "step": 50500 }, { "epoch": 10.17, "learning_rate": 0.0024194245546003894, "loss": 1.3081, "step": 50600 }, { "epoch": 10.19, "learning_rate": 0.002418803567175604, "loss": 1.3087, "step": 50700 }, { "epoch": 10.21, "learning_rate": 0.002418186520605176, "loss": 1.3084, "step": 50800 }, { "epoch": 10.23, "learning_rate": 0.0024175609505199523, "loss": 1.3071, "step": 50900 }, { "epoch": 10.25, "learning_rate": 0.002416933079431175, "loss": 1.3071, "step": 51000 }, { "epoch": 10.27, "learning_rate": 0.0024163029085762376, "loss": 1.3126, "step": 51100 }, { "epoch": 10.3, "learning_rate": 0.0024156704391970684, "loss": 1.3117, "step": 51200 }, { "epoch": 10.32, "learning_rate": 0.0024150356725401233, "loss": 1.3088, "step": 51300 }, { "epoch": 10.34, "learning_rate": 0.002414404991844438, "loss": 1.3101, "step": 51400 }, { "epoch": 10.36, "learning_rate": 0.0024137656573309015, "loss": 1.3126, "step": 51500 }, { "epoch": 10.38, "learning_rate": 0.002413124029293492, "loss": 1.3098, "step": 51600 }, { "epoch": 10.4, "learning_rate": 0.0024124801089967156, "loss": 1.3148, "step": 51700 }, { "epoch": 10.42, "learning_rate": 0.0024118338977095963, "loss": 1.3099, "step": 51800 }, { "epoch": 10.44, "learning_rate": 0.0024111853967056732, "loss": 1.3077, "step": 51900 }, { "epoch": 10.46, "learning_rate": 0.002410534607262998, "loss": 1.3099, "step": 52000 }, { "epoch": 10.48, "learning_rate": 0.0024098815306641316, "loss": 1.3082, "step": 52100 }, { "epoch": 10.5, "learning_rate": 0.002409226168196144, "loss": 1.3068, "step": 52200 }, { "epoch": 10.52, "learning_rate": 0.0024085685211506086, "loss": 1.3094, "step": 52300 }, { "epoch": 10.54, "learning_rate": 0.002407908590823602, "loss": 1.3076, "step": 52400 }, { "epoch": 10.56, "learning_rate": 0.0024072463785157006, "loss": 1.3075, "step": 52500 }, { "epoch": 10.58, "learning_rate": 0.0024065818855319773, "loss": 1.3118, "step": 52600 }, { "epoch": 10.6, "learning_rate": 0.0024059151131820013, "loss": 1.3101, "step": 52700 }, { "epoch": 10.62, "learning_rate": 0.002405246062779832, "loss": 1.3058, "step": 52800 }, { "epoch": 10.64, "learning_rate": 0.0024045747356440197, "loss": 1.3079, "step": 52900 }, { "epoch": 10.66, "learning_rate": 0.0024039011330976015, "loss": 1.309, "step": 53000 }, { "epoch": 10.68, "learning_rate": 0.0024032252564680978, "loss": 1.3076, "step": 53100 }, { "epoch": 10.7, "learning_rate": 0.0024025471070875117, "loss": 1.3102, "step": 53200 }, { "epoch": 10.72, "learning_rate": 0.002401866686292326, "loss": 1.3088, "step": 53300 }, { "epoch": 10.74, "learning_rate": 0.0024011839954234983, "loss": 1.3089, "step": 53400 }, { "epoch": 10.76, "learning_rate": 0.0024004990358264607, "loss": 1.3063, "step": 53500 }, { "epoch": 10.78, "learning_rate": 0.0023998118088511167, "loss": 1.3066, "step": 53600 }, { "epoch": 10.8, "learning_rate": 0.0023991223158518387, "loss": 1.3071, "step": 53700 }, { "epoch": 10.82, "learning_rate": 0.0023984305581874637, "loss": 1.3097, "step": 53800 }, { "epoch": 10.84, "learning_rate": 0.002397736537221293, "loss": 1.3097, "step": 53900 }, { "epoch": 10.86, "learning_rate": 0.002397040254321088, "loss": 1.3088, "step": 54000 }, { "epoch": 10.88, "learning_rate": 0.0023963417108590675, "loss": 1.3071, "step": 54100 }, { "epoch": 10.9, "learning_rate": 0.0023956409082119055, "loss": 1.3055, "step": 54200 }, { "epoch": 10.92, "learning_rate": 0.002394937847760729, "loss": 1.3059, "step": 54300 }, { "epoch": 10.94, "learning_rate": 0.002394232530891114, "loss": 1.3016, "step": 54400 }, { "epoch": 10.96, "learning_rate": 0.0023935249589930835, "loss": 1.308, "step": 54500 }, { "epoch": 10.98, "learning_rate": 0.0023928151334611045, "loss": 1.3063, "step": 54600 }, { "epoch": 11.0, "learning_rate": 0.002392103055694086, "loss": 1.3058, "step": 54700 }, { "epoch": 11.0, "eval_accuracy": 0.4042048581326505, "eval_loss": 1.2673618793487549, "eval_runtime": 19.7081, "eval_samples_per_second": 4037.327, "eval_steps_per_second": 15.78, "step": 54705 }, { "epoch": 11.02, "learning_rate": 0.0023913887270953743, "loss": 1.2989, "step": 54800 }, { "epoch": 11.04, "learning_rate": 0.002390672149072754, "loss": 1.302, "step": 54900 }, { "epoch": 11.06, "learning_rate": 0.0023899605224217933, "loss": 1.2996, "step": 55000 }, { "epoch": 11.08, "learning_rate": 0.002389239472251357, "loss": 1.2986, "step": 55100 }, { "epoch": 11.1, "learning_rate": 0.0023885161768927156, "loss": 1.3003, "step": 55200 }, { "epoch": 11.12, "learning_rate": 0.002387790637771324, "loss": 1.3006, "step": 55300 }, { "epoch": 11.14, "learning_rate": 0.0023870628563170586, "loss": 1.3063, "step": 55400 }, { "epoch": 11.16, "learning_rate": 0.0023863328339642155, "loss": 1.3082, "step": 55500 }, { "epoch": 11.18, "learning_rate": 0.002385600572151506, "loss": 1.3049, "step": 55600 }, { "epoch": 11.2, "learning_rate": 0.002384866072322057, "loss": 1.3041, "step": 55700 }, { "epoch": 11.22, "learning_rate": 0.0023841293359234033, "loss": 1.305, "step": 55800 }, { "epoch": 11.24, "learning_rate": 0.00238339036440749, "loss": 1.3048, "step": 55900 }, { "epoch": 11.26, "learning_rate": 0.002382649159230665, "loss": 1.3019, "step": 56000 }, { "epoch": 11.28, "learning_rate": 0.00238190572185368, "loss": 1.3049, "step": 56100 }, { "epoch": 11.3, "learning_rate": 0.002381160053741684, "loss": 1.306, "step": 56200 }, { "epoch": 11.32, "learning_rate": 0.0023804121563642247, "loss": 1.3058, "step": 56300 }, { "epoch": 11.34, "learning_rate": 0.0023796620311952415, "loss": 1.305, "step": 56400 }, { "epoch": 11.36, "learning_rate": 0.0023789096797130643, "loss": 1.3037, "step": 56500 }, { "epoch": 11.38, "learning_rate": 0.002378155103400411, "loss": 1.3064, "step": 56600 }, { "epoch": 11.4, "learning_rate": 0.002377398303744384, "loss": 1.3038, "step": 56700 }, { "epoch": 11.42, "learning_rate": 0.002376639282236468, "loss": 1.3057, "step": 56800 }, { "epoch": 11.44, "learning_rate": 0.0023758780403725257, "loss": 1.3044, "step": 56900 }, { "epoch": 11.46, "learning_rate": 0.0023751145796527956, "loss": 1.3057, "step": 57000 }, { "epoch": 11.48, "learning_rate": 0.00237434890158189, "loss": 1.3034, "step": 57100 }, { "epoch": 11.5, "learning_rate": 0.0023735810076687893, "loss": 1.3055, "step": 57200 }, { "epoch": 11.52, "learning_rate": 0.0023728108994268433, "loss": 1.3021, "step": 57300 }, { "epoch": 11.54, "learning_rate": 0.0023720385783737637, "loss": 1.3051, "step": 57400 }, { "epoch": 11.56, "learning_rate": 0.0023712640460316244, "loss": 1.3066, "step": 57500 }, { "epoch": 11.58, "learning_rate": 0.0023704873039268565, "loss": 1.306, "step": 57600 }, { "epoch": 11.6, "learning_rate": 0.0023697083535902464, "loss": 1.2988, "step": 57700 }, { "epoch": 11.62, "learning_rate": 0.0023689271965569325, "loss": 1.3044, "step": 57800 }, { "epoch": 11.64, "learning_rate": 0.0023681438343664016, "loss": 1.3054, "step": 57900 }, { "epoch": 11.66, "learning_rate": 0.002367358268562487, "loss": 1.3035, "step": 58000 }, { "epoch": 11.68, "learning_rate": 0.0023665705006933645, "loss": 1.2999, "step": 58100 }, { "epoch": 11.7, "learning_rate": 0.00236578053231155, "loss": 1.3027, "step": 58200 }, { "epoch": 11.72, "learning_rate": 0.0023649883649738954, "loss": 1.3035, "step": 58300 }, { "epoch": 11.74, "learning_rate": 0.0023641940002415876, "loss": 1.3023, "step": 58400 }, { "epoch": 11.76, "learning_rate": 0.002363397439680142, "loss": 1.2999, "step": 58500 }, { "epoch": 11.78, "learning_rate": 0.002362598684859403, "loss": 1.2994, "step": 58600 }, { "epoch": 11.8, "learning_rate": 0.0023617977373535403, "loss": 1.3024, "step": 58700 }, { "epoch": 11.82, "learning_rate": 0.0023609945987410432, "loss": 1.302, "step": 58800 }, { "epoch": 11.84, "learning_rate": 0.0023601892706047187, "loss": 1.3044, "step": 58900 }, { "epoch": 11.86, "learning_rate": 0.002359381754531691, "loss": 1.3026, "step": 59000 }, { "epoch": 11.88, "learning_rate": 0.0023585801599547503, "loss": 1.3061, "step": 59100 }, { "epoch": 11.9, "learning_rate": 0.002357768294626511, "loss": 1.2997, "step": 59200 }, { "epoch": 11.92, "learning_rate": 0.0023569542461327744, "loss": 1.2999, "step": 59300 }, { "epoch": 11.94, "learning_rate": 0.0023561380160778494, "loss": 1.3, "step": 59400 }, { "epoch": 11.96, "learning_rate": 0.0023553196060703448, "loss": 1.3009, "step": 59500 }, { "epoch": 11.98, "learning_rate": 0.0023544990177231644, "loss": 1.3043, "step": 59600 }, { "epoch": 12.0, "eval_accuracy": 0.40439273978216544, "eval_loss": 1.2642629146575928, "eval_runtime": 19.7374, "eval_samples_per_second": 4031.338, "eval_steps_per_second": 15.757, "step": 59679 }, { "epoch": 12.0, "learning_rate": 0.0023536762526535065, "loss": 1.3005, "step": 59700 }, { "epoch": 12.02, "learning_rate": 0.002352851312482858, "loss": 1.2972, "step": 59800 }, { "epoch": 12.04, "learning_rate": 0.0023520241988369927, "loss": 1.2979, "step": 59900 }, { "epoch": 12.06, "learning_rate": 0.0023511949133459688, "loss": 1.2933, "step": 60000 }, { "epoch": 12.08, "learning_rate": 0.0023503634576441234, "loss": 1.2949, "step": 60100 }, { "epoch": 12.1, "learning_rate": 0.0023495298333700712, "loss": 1.3001, "step": 60200 }, { "epoch": 12.12, "learning_rate": 0.0023486940421667012, "loss": 1.2958, "step": 60300 }, { "epoch": 12.14, "learning_rate": 0.0023478560856811715, "loss": 1.2996, "step": 60400 }, { "epoch": 12.17, "learning_rate": 0.0023470159655649093, "loss": 1.2979, "step": 60500 }, { "epoch": 12.19, "learning_rate": 0.0023461736834736045, "loss": 1.2997, "step": 60600 }, { "epoch": 12.21, "learning_rate": 0.0023453292410672085, "loss": 1.3016, "step": 60700 }, { "epoch": 12.23, "learning_rate": 0.002344482640009929, "loss": 1.299, "step": 60800 }, { "epoch": 12.25, "learning_rate": 0.00234363388197023, "loss": 1.3009, "step": 60900 }, { "epoch": 12.27, "learning_rate": 0.0023427829686208245, "loss": 1.3003, "step": 61000 }, { "epoch": 12.29, "learning_rate": 0.002341938442963461, "loss": 1.303, "step": 61100 }, { "epoch": 12.31, "learning_rate": 0.0023410832455409507, "loss": 1.2995, "step": 61200 }, { "epoch": 12.33, "learning_rate": 0.002340225897835473, "loss": 1.2984, "step": 61300 }, { "epoch": 12.35, "learning_rate": 0.0023393664015366696, "loss": 1.3007, "step": 61400 }, { "epoch": 12.37, "learning_rate": 0.0023385047583384167, "loss": 1.3039, "step": 61500 }, { "epoch": 12.39, "learning_rate": 0.0023376582667187836, "loss": 1.2987, "step": 61600 }, { "epoch": 12.41, "learning_rate": 0.0023367923776734523, "loss": 1.295, "step": 61700 }, { "epoch": 12.43, "learning_rate": 0.0023359243468015017, "loss": 1.3033, "step": 61800 }, { "epoch": 12.45, "learning_rate": 0.002335054175813628, "loss": 1.3007, "step": 61900 }, { "epoch": 12.47, "learning_rate": 0.002334181866424745, "loss": 1.3005, "step": 62000 }, { "epoch": 12.49, "learning_rate": 0.002333307420353981, "loss": 1.3006, "step": 62100 }, { "epoch": 12.51, "learning_rate": 0.0023324308393246752, "loss": 1.2948, "step": 62200 }, { "epoch": 12.53, "learning_rate": 0.002331552125064374, "loss": 1.2972, "step": 62300 }, { "epoch": 12.55, "learning_rate": 0.0023306712793048287, "loss": 1.3009, "step": 62400 }, { "epoch": 12.57, "learning_rate": 0.0023297883037819906, "loss": 1.2992, "step": 62500 }, { "epoch": 12.59, "learning_rate": 0.002328903200236008, "loss": 1.3011, "step": 62600 }, { "epoch": 12.61, "learning_rate": 0.0023280159704112255, "loss": 1.3016, "step": 62700 }, { "epoch": 12.63, "learning_rate": 0.0023271266160561736, "loss": 1.2949, "step": 62800 }, { "epoch": 12.65, "learning_rate": 0.0023262351389235743, "loss": 1.2994, "step": 62900 }, { "epoch": 12.67, "learning_rate": 0.00232534154077033, "loss": 1.3009, "step": 63000 }, { "epoch": 12.69, "learning_rate": 0.002324445823357525, "loss": 1.2988, "step": 63100 }, { "epoch": 12.71, "learning_rate": 0.0023235479884504196, "loss": 1.2966, "step": 63200 }, { "epoch": 12.73, "learning_rate": 0.0023226480378184472, "loss": 1.2976, "step": 63300 }, { "epoch": 12.75, "learning_rate": 0.0023217459732352104, "loss": 1.2981, "step": 63400 }, { "epoch": 12.77, "learning_rate": 0.002320841796478479, "loss": 1.2969, "step": 63500 }, { "epoch": 12.79, "learning_rate": 0.0023199355093301835, "loss": 1.2999, "step": 63600 }, { "epoch": 12.81, "learning_rate": 0.0023190271135764165, "loss": 1.295, "step": 63700 }, { "epoch": 12.83, "learning_rate": 0.0023181166110074224, "loss": 1.2908, "step": 63800 }, { "epoch": 12.85, "learning_rate": 0.002317204003417602, "loss": 1.2966, "step": 63900 }, { "epoch": 12.87, "learning_rate": 0.0023162892926055006, "loss": 1.2965, "step": 64000 }, { "epoch": 12.89, "learning_rate": 0.002315372480373811, "loss": 1.2946, "step": 64100 }, { "epoch": 12.91, "learning_rate": 0.0023144535685293665, "loss": 1.2988, "step": 64200 }, { "epoch": 12.93, "learning_rate": 0.0023135325588831386, "loss": 1.3009, "step": 64300 }, { "epoch": 12.95, "learning_rate": 0.0023126094532502327, "loss": 1.2955, "step": 64400 }, { "epoch": 12.97, "learning_rate": 0.0023116842534498857, "loss": 1.3004, "step": 64500 }, { "epoch": 12.99, "learning_rate": 0.00231075696130546, "loss": 1.2943, "step": 64600 }, { "epoch": 13.0, "eval_accuracy": 0.4050755408318846, "eval_loss": 1.25895094871521, "eval_runtime": 19.7319, "eval_samples_per_second": 4032.464, "eval_steps_per_second": 15.761, "step": 64652 }, { "epoch": 13.01, "learning_rate": 0.0023098275786444435, "loss": 1.2935, "step": 64700 }, { "epoch": 13.03, "learning_rate": 0.002308896107298443, "loss": 1.2893, "step": 64800 }, { "epoch": 13.05, "learning_rate": 0.0023079625491031824, "loss": 1.2912, "step": 64900 }, { "epoch": 13.07, "learning_rate": 0.002307026905898497, "loss": 1.2944, "step": 65000 }, { "epoch": 13.09, "learning_rate": 0.002306089179528332, "loss": 1.2949, "step": 65100 }, { "epoch": 13.11, "learning_rate": 0.002305149371840738, "loss": 1.2898, "step": 65200 }, { "epoch": 13.13, "learning_rate": 0.002304207484687868, "loss": 1.2934, "step": 65300 }, { "epoch": 13.15, "learning_rate": 0.002303263519925972, "loss": 1.2922, "step": 65400 }, { "epoch": 13.17, "learning_rate": 0.002302317479415395, "loss": 1.2913, "step": 65500 }, { "epoch": 13.19, "learning_rate": 0.002301369365020573, "loss": 1.2921, "step": 65600 }, { "epoch": 13.21, "learning_rate": 0.0023004191786100297, "loss": 1.2935, "step": 65700 }, { "epoch": 13.23, "learning_rate": 0.002299466922056371, "loss": 1.298, "step": 65800 }, { "epoch": 13.25, "learning_rate": 0.0022985125972362834, "loss": 1.2949, "step": 65900 }, { "epoch": 13.27, "learning_rate": 0.0022975562060305295, "loss": 1.2926, "step": 66000 }, { "epoch": 13.29, "learning_rate": 0.002296597750323944, "loss": 1.293, "step": 66100 }, { "epoch": 13.31, "learning_rate": 0.0022956372320054306, "loss": 1.2942, "step": 66200 }, { "epoch": 13.33, "learning_rate": 0.0022946746529679575, "loss": 1.2944, "step": 66300 }, { "epoch": 13.35, "learning_rate": 0.002293710015108555, "loss": 1.297, "step": 66400 }, { "epoch": 13.37, "learning_rate": 0.0022927433203283093, "loss": 1.2982, "step": 66500 }, { "epoch": 13.39, "learning_rate": 0.0022917745705323617, "loss": 1.2904, "step": 66600 }, { "epoch": 13.41, "learning_rate": 0.0022908037676299038, "loss": 1.2948, "step": 66700 }, { "epoch": 13.43, "learning_rate": 0.0022898309135341716, "loss": 1.2965, "step": 66800 }, { "epoch": 13.45, "learning_rate": 0.0022888560101624452, "loss": 1.2892, "step": 66900 }, { "epoch": 13.47, "learning_rate": 0.002287888839071394, "loss": 1.2942, "step": 67000 }, { "epoch": 13.49, "learning_rate": 0.0022869098633604185, "loss": 1.2886, "step": 67100 }, { "epoch": 13.51, "learning_rate": 0.0022859288441301902, "loss": 1.2901, "step": 67200 }, { "epoch": 13.53, "learning_rate": 0.0022849457833140803, "loss": 1.2941, "step": 67300 }, { "epoch": 13.55, "learning_rate": 0.0022839606828494842, "loss": 1.2947, "step": 67400 }, { "epoch": 13.57, "learning_rate": 0.002282973544677816, "loss": 1.2952, "step": 67500 }, { "epoch": 13.59, "learning_rate": 0.0022819843707445058, "loss": 1.2945, "step": 67600 }, { "epoch": 13.61, "learning_rate": 0.002280993162998996, "loss": 1.2935, "step": 67700 }, { "epoch": 13.63, "learning_rate": 0.002279999923394737, "loss": 1.2948, "step": 67800 }, { "epoch": 13.65, "learning_rate": 0.0022790046538891844, "loss": 1.2895, "step": 67900 }, { "epoch": 13.67, "learning_rate": 0.0022780073564437927, "loss": 1.2952, "step": 68000 }, { "epoch": 13.69, "learning_rate": 0.002277008033024014, "loss": 1.2946, "step": 68100 }, { "epoch": 13.71, "learning_rate": 0.002276006685599293, "loss": 1.298, "step": 68200 }, { "epoch": 13.73, "learning_rate": 0.002275003316143064, "loss": 1.2942, "step": 68300 }, { "epoch": 13.75, "learning_rate": 0.0022739979266327448, "loss": 1.2936, "step": 68400 }, { "epoch": 13.77, "learning_rate": 0.002272990519049735, "loss": 1.2937, "step": 68500 }, { "epoch": 13.79, "learning_rate": 0.0022719810953794116, "loss": 1.2991, "step": 68600 }, { "epoch": 13.81, "learning_rate": 0.002270969657611125, "loss": 1.2924, "step": 68700 }, { "epoch": 13.83, "learning_rate": 0.0022699562077381943, "loss": 1.2924, "step": 68800 }, { "epoch": 13.85, "learning_rate": 0.002268940747757904, "loss": 1.2936, "step": 68900 }, { "epoch": 13.87, "learning_rate": 0.002267923279671501, "loss": 1.2963, "step": 69000 }, { "epoch": 13.89, "learning_rate": 0.0022669038054841887, "loss": 1.2875, "step": 69100 }, { "epoch": 13.91, "learning_rate": 0.0022658823272051244, "loss": 1.2912, "step": 69200 }, { "epoch": 13.93, "learning_rate": 0.0022648588468474158, "loss": 1.2943, "step": 69300 }, { "epoch": 13.95, "learning_rate": 0.002263833366428115, "loss": 1.2974, "step": 69400 }, { "epoch": 13.97, "learning_rate": 0.002262816172636471, "loss": 1.2929, "step": 69500 }, { "epoch": 13.99, "learning_rate": 0.002261786718111027, "loss": 1.2926, "step": 69600 }, { "epoch": 14.0, "eval_accuracy": 0.40531926198942647, "eval_loss": 1.2532228231430054, "eval_runtime": 19.5692, "eval_samples_per_second": 4065.991, "eval_steps_per_second": 15.892, "step": 69625 }, { "epoch": 14.01, "learning_rate": 0.002260755269578475, "loss": 1.2829, "step": 69700 }, { "epoch": 14.04, "learning_rate": 0.0022597218290715715, "loss": 1.2825, "step": 69800 }, { "epoch": 14.06, "learning_rate": 0.002258686398626998, "loss": 1.2912, "step": 69900 }, { "epoch": 14.08, "learning_rate": 0.0022576489802853578, "loss": 1.2877, "step": 70000 }, { "epoch": 14.1, "learning_rate": 0.002256609576091173, "loss": 1.2876, "step": 70100 }, { "epoch": 14.12, "learning_rate": 0.0022555681880928784, "loss": 1.2921, "step": 70200 }, { "epoch": 14.14, "learning_rate": 0.0022545248183428184, "loss": 1.2898, "step": 70300 }, { "epoch": 14.16, "learning_rate": 0.0022534794688972436, "loss": 1.2881, "step": 70400 }, { "epoch": 14.18, "learning_rate": 0.0022524321418163056, "loss": 1.2887, "step": 70500 }, { "epoch": 14.2, "learning_rate": 0.0022513828391640535, "loss": 1.2835, "step": 70600 }, { "epoch": 14.22, "learning_rate": 0.00225033156300843, "loss": 1.2945, "step": 70700 }, { "epoch": 14.24, "learning_rate": 0.0022492783154212676, "loss": 1.2912, "step": 70800 }, { "epoch": 14.26, "learning_rate": 0.002248223098478284, "loss": 1.2864, "step": 70900 }, { "epoch": 14.28, "learning_rate": 0.0022471659142590758, "loss": 1.2914, "step": 71000 }, { "epoch": 14.3, "learning_rate": 0.00224610676484712, "loss": 1.2877, "step": 71100 }, { "epoch": 14.32, "learning_rate": 0.0022450456523297646, "loss": 1.288, "step": 71200 }, { "epoch": 14.34, "learning_rate": 0.0022439825787982275, "loss": 1.2901, "step": 71300 }, { "epoch": 14.36, "learning_rate": 0.0022429175463475897, "loss": 1.2885, "step": 71400 }, { "epoch": 14.38, "learning_rate": 0.002241861236648861, "loss": 1.2915, "step": 71500 }, { "epoch": 14.4, "learning_rate": 0.00224079231219746, "loss": 1.2862, "step": 71600 }, { "epoch": 14.42, "learning_rate": 0.002239721435114266, "loss": 1.2887, "step": 71700 }, { "epoch": 14.44, "learning_rate": 0.0022386486075097406, "loss": 1.2953, "step": 71800 }, { "epoch": 14.46, "learning_rate": 0.0022375738314981885, "loss": 1.2835, "step": 71900 }, { "epoch": 14.48, "learning_rate": 0.0022364971091977555, "loss": 1.2931, "step": 72000 }, { "epoch": 14.5, "learning_rate": 0.002235418442730422, "loss": 1.2878, "step": 72100 }, { "epoch": 14.52, "learning_rate": 0.0022343378342220004, "loss": 1.2877, "step": 72200 }, { "epoch": 14.54, "learning_rate": 0.0022332552858021298, "loss": 1.2856, "step": 72300 }, { "epoch": 14.56, "learning_rate": 0.002232170799604273, "loss": 1.2855, "step": 72400 }, { "epoch": 14.58, "learning_rate": 0.002231084377765712, "loss": 1.2872, "step": 72500 }, { "epoch": 14.6, "learning_rate": 0.002229996022427543, "loss": 1.2863, "step": 72600 }, { "epoch": 14.62, "learning_rate": 0.002228905735734673, "loss": 1.2894, "step": 72700 }, { "epoch": 14.64, "learning_rate": 0.002227813519835815, "loss": 1.2877, "step": 72800 }, { "epoch": 14.66, "learning_rate": 0.0022267193768834843, "loss": 1.2847, "step": 72900 }, { "epoch": 14.68, "learning_rate": 0.002225623309033993, "loss": 1.2925, "step": 73000 }, { "epoch": 14.7, "learning_rate": 0.0022245253184474496, "loss": 1.2866, "step": 73100 }, { "epoch": 14.72, "learning_rate": 0.002223425407287748, "loss": 1.2889, "step": 73200 }, { "epoch": 14.74, "learning_rate": 0.00222232357772257, "loss": 1.2886, "step": 73300 }, { "epoch": 14.76, "learning_rate": 0.0022212198319233765, "loss": 1.2896, "step": 73400 }, { "epoch": 14.78, "learning_rate": 0.0022201141720654062, "loss": 1.2915, "step": 73500 }, { "epoch": 14.8, "learning_rate": 0.002219006600327669, "loss": 1.287, "step": 73600 }, { "epoch": 14.82, "learning_rate": 0.002217897118892943, "loss": 1.2942, "step": 73700 }, { "epoch": 14.84, "learning_rate": 0.0022167857299477704, "loss": 1.2878, "step": 73800 }, { "epoch": 14.86, "learning_rate": 0.0022156724356824516, "loss": 1.285, "step": 73900 }, { "epoch": 14.88, "learning_rate": 0.0022145683996782177, "loss": 1.2891, "step": 74000 }, { "epoch": 14.9, "learning_rate": 0.0022134513203569167, "loss": 1.29, "step": 74100 }, { "epoch": 14.92, "learning_rate": 0.0022123323422868512, "loss": 1.2884, "step": 74200 }, { "epoch": 14.94, "learning_rate": 0.002211211467673279, "loss": 1.2901, "step": 74300 }, { "epoch": 14.96, "learning_rate": 0.0022100886987251943, "loss": 1.2901, "step": 74400 }, { "epoch": 14.98, "learning_rate": 0.0022089640376553267, "loss": 1.2917, "step": 74500 }, { "epoch": 15.0, "eval_accuracy": 0.40564348112720083, "eval_loss": 1.2492839097976685, "eval_runtime": 19.7618, "eval_samples_per_second": 4026.36, "eval_steps_per_second": 15.737, "step": 74598 }, { "epoch": 15.0, "learning_rate": 0.0022078374866801326, "loss": 1.2901, "step": 74600 }, { "epoch": 15.02, "learning_rate": 0.002206709048019794, "loss": 1.2815, "step": 74700 }, { "epoch": 15.04, "learning_rate": 0.0022055787238982145, "loss": 1.2824, "step": 74800 }, { "epoch": 15.06, "learning_rate": 0.0022044465165430107, "loss": 1.2836, "step": 74900 }, { "epoch": 15.08, "learning_rate": 0.002203312428185512, "loss": 1.2848, "step": 75000 }, { "epoch": 15.1, "learning_rate": 0.002202176461060756, "loss": 1.2856, "step": 75100 }, { "epoch": 15.12, "learning_rate": 0.0022010386174074813, "loss": 1.2858, "step": 75200 }, { "epoch": 15.14, "learning_rate": 0.002199898899468125, "loss": 1.2827, "step": 75300 }, { "epoch": 15.16, "learning_rate": 0.0021987573094888182, "loss": 1.2816, "step": 75400 }, { "epoch": 15.18, "learning_rate": 0.0021976138497193823, "loss": 1.2845, "step": 75500 }, { "epoch": 15.2, "learning_rate": 0.0021964685224133214, "loss": 1.2876, "step": 75600 }, { "epoch": 15.22, "learning_rate": 0.0021953213298278223, "loss": 1.2815, "step": 75700 }, { "epoch": 15.24, "learning_rate": 0.0021941837739942976, "loss": 1.2838, "step": 75800 }, { "epoch": 15.26, "learning_rate": 0.002193032876232499, "loss": 1.2826, "step": 75900 }, { "epoch": 15.28, "learning_rate": 0.002191880119962158, "loss": 1.2794, "step": 76000 }, { "epoch": 15.3, "learning_rate": 0.0021907255074551024, "loss": 1.2845, "step": 76100 }, { "epoch": 15.32, "learning_rate": 0.0021895690409868165, "loss": 1.2861, "step": 76200 }, { "epoch": 15.34, "learning_rate": 0.0021884107228364387, "loss": 1.2845, "step": 76300 }, { "epoch": 15.36, "learning_rate": 0.002187250555286758, "loss": 1.2845, "step": 76400 }, { "epoch": 15.38, "learning_rate": 0.0021860885406242065, "loss": 1.2879, "step": 76500 }, { "epoch": 15.4, "learning_rate": 0.002184924681138858, "loss": 1.2798, "step": 76600 }, { "epoch": 15.42, "learning_rate": 0.0021837589791244205, "loss": 1.2823, "step": 76700 }, { "epoch": 15.44, "learning_rate": 0.0021825914368782343, "loss": 1.282, "step": 76800 }, { "epoch": 15.46, "learning_rate": 0.002181422056701266, "loss": 1.2844, "step": 76900 }, { "epoch": 15.48, "learning_rate": 0.002180250840898105, "loss": 1.2854, "step": 77000 }, { "epoch": 15.5, "learning_rate": 0.002179077791776957, "loss": 1.287, "step": 77100 }, { "epoch": 15.52, "learning_rate": 0.0021779029116496423, "loss": 1.284, "step": 77200 }, { "epoch": 15.54, "learning_rate": 0.002176726202831588, "loss": 1.2866, "step": 77300 }, { "epoch": 15.56, "learning_rate": 0.0021755476676418267, "loss": 1.29, "step": 77400 }, { "epoch": 15.58, "learning_rate": 0.0021743673084029897, "loss": 1.2852, "step": 77500 }, { "epoch": 15.6, "learning_rate": 0.002173185127441303, "loss": 1.28, "step": 77600 }, { "epoch": 15.62, "learning_rate": 0.0021720129760884666, "loss": 1.2861, "step": 77700 }, { "epoch": 15.64, "learning_rate": 0.0021708271768331494, "loss": 1.2843, "step": 77800 }, { "epoch": 15.66, "learning_rate": 0.002169639562831796, "loss": 1.2825, "step": 77900 }, { "epoch": 15.68, "learning_rate": 0.002168450136424931, "loss": 1.2862, "step": 78000 }, { "epoch": 15.7, "learning_rate": 0.0021672588999566487, "loss": 1.2842, "step": 78100 }, { "epoch": 15.72, "learning_rate": 0.0021660658557746126, "loss": 1.2826, "step": 78200 }, { "epoch": 15.74, "learning_rate": 0.0021648710062300482, "loss": 1.2851, "step": 78300 }, { "epoch": 15.76, "learning_rate": 0.002163674353677738, "loss": 1.28, "step": 78400 }, { "epoch": 15.78, "learning_rate": 0.0021624759004760198, "loss": 1.2835, "step": 78500 }, { "epoch": 15.8, "learning_rate": 0.0021612756489867773, "loss": 1.282, "step": 78600 }, { "epoch": 15.82, "learning_rate": 0.002160073601575442, "loss": 1.2815, "step": 78700 }, { "epoch": 15.84, "learning_rate": 0.0021588697606109808, "loss": 1.2854, "step": 78800 }, { "epoch": 15.86, "learning_rate": 0.0021576641284658978, "loss": 1.2859, "step": 78900 }, { "epoch": 15.88, "learning_rate": 0.0021564567075162263, "loss": 1.2845, "step": 79000 }, { "epoch": 15.91, "learning_rate": 0.002155247500141525, "loss": 1.286, "step": 79100 }, { "epoch": 15.93, "learning_rate": 0.0021540365087248737, "loss": 1.2849, "step": 79200 }, { "epoch": 15.95, "learning_rate": 0.0021528237356528675, "loss": 1.2842, "step": 79300 }, { "epoch": 15.97, "learning_rate": 0.002151609183315613, "loss": 1.283, "step": 79400 }, { "epoch": 15.99, "learning_rate": 0.0021503928541067234, "loss": 1.2873, "step": 79500 }, { "epoch": 16.0, "eval_accuracy": 0.40597676821927015, "eval_loss": 1.2457048892974854, "eval_runtime": 19.9932, "eval_samples_per_second": 3979.752, "eval_steps_per_second": 15.555, "step": 79572 }, { "epoch": 16.01, "learning_rate": 0.002149174750423314, "loss": 1.2817, "step": 79600 }, { "epoch": 16.03, "learning_rate": 0.0021479548746659964, "loss": 1.2792, "step": 79700 }, { "epoch": 16.05, "learning_rate": 0.002146733229238875, "loss": 1.2774, "step": 79800 }, { "epoch": 16.07, "learning_rate": 0.002145509816549542, "loss": 1.2808, "step": 79900 }, { "epoch": 16.09, "learning_rate": 0.0021442846390090724, "loss": 1.2771, "step": 80000 }, { "epoch": 16.11, "learning_rate": 0.0021430576990320196, "loss": 1.2769, "step": 80100 }, { "epoch": 16.13, "learning_rate": 0.002141828999036409, "loss": 1.2819, "step": 80200 }, { "epoch": 16.15, "learning_rate": 0.0021405985414437367, "loss": 1.2789, "step": 80300 }, { "epoch": 16.17, "learning_rate": 0.002139366328678961, "loss": 1.2777, "step": 80400 }, { "epoch": 16.19, "learning_rate": 0.0021381323631705002, "loss": 1.2804, "step": 80500 }, { "epoch": 16.21, "learning_rate": 0.002136896647350226, "loss": 1.2802, "step": 80600 }, { "epoch": 16.23, "learning_rate": 0.0021356591836534607, "loss": 1.2759, "step": 80700 }, { "epoch": 16.25, "learning_rate": 0.00213441997451897, "loss": 1.2834, "step": 80800 }, { "epoch": 16.27, "learning_rate": 0.002133179022388961, "loss": 1.2772, "step": 80900 }, { "epoch": 16.29, "learning_rate": 0.0021319363297090744, "loss": 1.2805, "step": 81000 }, { "epoch": 16.31, "learning_rate": 0.0021306918989283815, "loss": 1.2752, "step": 81100 }, { "epoch": 16.33, "learning_rate": 0.0021294457324993795, "loss": 1.279, "step": 81200 }, { "epoch": 16.35, "learning_rate": 0.0021281978328779863, "loss": 1.2825, "step": 81300 }, { "epoch": 16.37, "learning_rate": 0.0021269482025235344, "loss": 1.2824, "step": 81400 }, { "epoch": 16.39, "learning_rate": 0.002125696843898769, "loss": 1.2801, "step": 81500 }, { "epoch": 16.41, "learning_rate": 0.0021244437594698383, "loss": 1.2782, "step": 81600 }, { "epoch": 16.43, "learning_rate": 0.0021231889517062965, "loss": 1.2817, "step": 81700 }, { "epoch": 16.45, "learning_rate": 0.0021219324230810884, "loss": 1.2788, "step": 81800 }, { "epoch": 16.47, "learning_rate": 0.002120674176070555, "loss": 1.2796, "step": 81900 }, { "epoch": 16.49, "learning_rate": 0.0021194142131544212, "loss": 1.2799, "step": 82000 }, { "epoch": 16.51, "learning_rate": 0.002118152536815795, "loss": 1.2786, "step": 82100 }, { "epoch": 16.53, "learning_rate": 0.0021168891495411592, "loss": 1.2794, "step": 82200 }, { "epoch": 16.55, "learning_rate": 0.0021156240538203713, "loss": 1.2814, "step": 82300 }, { "epoch": 16.57, "learning_rate": 0.0021143572521466533, "loss": 1.2811, "step": 82400 }, { "epoch": 16.59, "learning_rate": 0.00211308874701659, "loss": 1.2821, "step": 82500 }, { "epoch": 16.61, "learning_rate": 0.002111818540930124, "loss": 1.2811, "step": 82600 }, { "epoch": 16.63, "learning_rate": 0.00211054663639055, "loss": 1.2759, "step": 82700 }, { "epoch": 16.65, "learning_rate": 0.0021092730359045086, "loss": 1.2797, "step": 82800 }, { "epoch": 16.67, "learning_rate": 0.0021079977419819853, "loss": 1.2784, "step": 82900 }, { "epoch": 16.69, "learning_rate": 0.0021067207571362997, "loss": 1.2816, "step": 83000 }, { "epoch": 16.71, "learning_rate": 0.0021054420838841066, "loss": 1.2801, "step": 83100 }, { "epoch": 16.73, "learning_rate": 0.0021041617247453863, "loss": 1.284, "step": 83200 }, { "epoch": 16.75, "learning_rate": 0.0021028796822434442, "loss": 1.2809, "step": 83300 }, { "epoch": 16.77, "learning_rate": 0.0021015959589049003, "loss": 1.2786, "step": 83400 }, { "epoch": 16.79, "learning_rate": 0.0021003105572596887, "loss": 1.2824, "step": 83500 }, { "epoch": 16.81, "learning_rate": 0.0020990234798410508, "loss": 1.2771, "step": 83600 }, { "epoch": 16.83, "learning_rate": 0.0020977347291855314, "loss": 1.2829, "step": 83700 }, { "epoch": 16.85, "learning_rate": 0.0020964443078329703, "loss": 1.2805, "step": 83800 }, { "epoch": 16.87, "learning_rate": 0.0020951522183265034, "loss": 1.2761, "step": 83900 }, { "epoch": 16.89, "learning_rate": 0.0020938584632125513, "loss": 1.2776, "step": 84000 }, { "epoch": 16.91, "learning_rate": 0.002092563045040819, "loss": 1.2783, "step": 84100 }, { "epoch": 16.93, "learning_rate": 0.0020912659663642878, "loss": 1.2802, "step": 84200 }, { "epoch": 16.95, "learning_rate": 0.002089967229739212, "loss": 1.2749, "step": 84300 }, { "epoch": 16.97, "learning_rate": 0.0020886668377251135, "loss": 1.2798, "step": 84400 }, { "epoch": 16.99, "learning_rate": 0.002087364792884776, "loss": 1.2791, "step": 84500 }, { "epoch": 17.0, "eval_accuracy": 0.40644209745282756, "eval_loss": 1.2400243282318115, "eval_runtime": 19.8275, "eval_samples_per_second": 4013.004, "eval_steps_per_second": 15.685, "step": 84545 }, { "epoch": 17.01, "learning_rate": 0.0020860610977842414, "loss": 1.2757, "step": 84600 }, { "epoch": 17.03, "learning_rate": 0.0020847557549928037, "loss": 1.2693, "step": 84700 }, { "epoch": 17.05, "learning_rate": 0.002083448767083003, "loss": 1.2748, "step": 84800 }, { "epoch": 17.07, "learning_rate": 0.002082140136630623, "loss": 1.2713, "step": 84900 }, { "epoch": 17.09, "learning_rate": 0.002080829866214684, "loss": 1.2743, "step": 85000 }, { "epoch": 17.11, "learning_rate": 0.0020795310855919614, "loss": 1.2762, "step": 85100 }, { "epoch": 17.13, "learning_rate": 0.0020782175593340372, "loss": 1.2779, "step": 85200 }, { "epoch": 17.15, "learning_rate": 0.0020769024008430834, "loss": 1.272, "step": 85300 }, { "epoch": 17.17, "learning_rate": 0.0020755856127109857, "loss": 1.2736, "step": 85400 }, { "epoch": 17.19, "learning_rate": 0.0020742671975328406, "loss": 1.2722, "step": 85500 }, { "epoch": 17.21, "learning_rate": 0.0020729471579069526, "loss": 1.2711, "step": 85600 }, { "epoch": 17.23, "learning_rate": 0.002071625496434827, "loss": 1.2761, "step": 85700 }, { "epoch": 17.25, "learning_rate": 0.0020703022157211644, "loss": 1.2755, "step": 85800 }, { "epoch": 17.27, "learning_rate": 0.002068977318373858, "loss": 1.28, "step": 85900 }, { "epoch": 17.29, "learning_rate": 0.002067650807003987, "loss": 1.2775, "step": 86000 }, { "epoch": 17.31, "learning_rate": 0.00206632268422581, "loss": 1.2762, "step": 86100 }, { "epoch": 17.33, "learning_rate": 0.002064992952656763, "loss": 1.2791, "step": 86200 }, { "epoch": 17.35, "learning_rate": 0.0020636616149174508, "loss": 1.2801, "step": 86300 }, { "epoch": 17.37, "learning_rate": 0.002062328673631646, "loss": 1.2765, "step": 86400 }, { "epoch": 17.39, "learning_rate": 0.002060994131426279, "loss": 1.271, "step": 86500 }, { "epoch": 17.41, "learning_rate": 0.0020596713602392754, "loss": 1.2774, "step": 86600 }, { "epoch": 17.43, "learning_rate": 0.002058333640031713, "loss": 1.2742, "step": 86700 }, { "epoch": 17.45, "learning_rate": 0.002056994326777913, "loss": 1.2761, "step": 86800 }, { "epoch": 17.47, "learning_rate": 0.0020556534231173655, "loss": 1.2749, "step": 86900 }, { "epoch": 17.49, "learning_rate": 0.002054310931692694, "loss": 1.2757, "step": 87000 }, { "epoch": 17.51, "learning_rate": 0.0020529668551496506, "loss": 1.2755, "step": 87100 }, { "epoch": 17.53, "learning_rate": 0.0020516211961371126, "loss": 1.2766, "step": 87200 }, { "epoch": 17.55, "learning_rate": 0.0020502739573070757, "loss": 1.2719, "step": 87300 }, { "epoch": 17.57, "learning_rate": 0.002048925141314647, "loss": 1.2771, "step": 87400 }, { "epoch": 17.59, "learning_rate": 0.0020475747508180457, "loss": 1.2732, "step": 87500 }, { "epoch": 17.61, "learning_rate": 0.00204622278847859, "loss": 1.2741, "step": 87600 }, { "epoch": 17.63, "learning_rate": 0.0020448692569606988, "loss": 1.275, "step": 87700 }, { "epoch": 17.65, "learning_rate": 0.0020435141589318817, "loss": 1.2755, "step": 87800 }, { "epoch": 17.67, "learning_rate": 0.0020421574970627366, "loss": 1.2727, "step": 87900 }, { "epoch": 17.69, "learning_rate": 0.002040799274026943, "loss": 1.2753, "step": 88000 }, { "epoch": 17.71, "learning_rate": 0.0020394394925012565, "loss": 1.2749, "step": 88100 }, { "epoch": 17.73, "learning_rate": 0.002038078155165506, "loss": 1.2776, "step": 88200 }, { "epoch": 17.75, "learning_rate": 0.002036715264702584, "loss": 1.2752, "step": 88300 }, { "epoch": 17.78, "learning_rate": 0.0020353508237984466, "loss": 1.2771, "step": 88400 }, { "epoch": 17.8, "learning_rate": 0.002033984835142102, "loss": 1.2761, "step": 88500 }, { "epoch": 17.82, "learning_rate": 0.002032617301425613, "loss": 1.2707, "step": 88600 }, { "epoch": 17.84, "learning_rate": 0.0020312482253440835, "loss": 1.2749, "step": 88700 }, { "epoch": 17.86, "learning_rate": 0.0020298776095956594, "loss": 1.2735, "step": 88800 }, { "epoch": 17.88, "learning_rate": 0.00202850545688152, "loss": 1.2736, "step": 88900 }, { "epoch": 17.9, "learning_rate": 0.002027131769905874, "loss": 1.2763, "step": 89000 }, { "epoch": 17.92, "learning_rate": 0.002025756551375953, "loss": 1.2728, "step": 89100 }, { "epoch": 17.94, "learning_rate": 0.0020243798040020084, "loss": 1.2773, "step": 89200 }, { "epoch": 17.96, "learning_rate": 0.002023001530497303, "loss": 1.2736, "step": 89300 }, { "epoch": 17.98, "learning_rate": 0.0020216217335781088, "loss": 1.2755, "step": 89400 }, { "epoch": 18.0, "learning_rate": 0.002020240415963699, "loss": 1.2724, "step": 89500 }, { "epoch": 18.0, "eval_accuracy": 0.40673402194635844, "eval_loss": 1.2363520860671997, "eval_runtime": 19.8757, "eval_samples_per_second": 4003.284, "eval_steps_per_second": 15.647, "step": 89518 }, { "epoch": 18.02, "learning_rate": 0.0020188575803763435, "loss": 1.2653, "step": 89600 }, { "epoch": 18.04, "learning_rate": 0.0020174732295413058, "loss": 1.265, "step": 89700 }, { "epoch": 18.06, "learning_rate": 0.0020160873661868328, "loss": 1.2724, "step": 89800 }, { "epoch": 18.08, "learning_rate": 0.0020146999930441547, "loss": 1.2692, "step": 89900 }, { "epoch": 18.1, "learning_rate": 0.002013311112847475, "loss": 1.2678, "step": 90000 }, { "epoch": 18.12, "learning_rate": 0.0020119207283339695, "loss": 1.2709, "step": 90100 }, { "epoch": 18.14, "learning_rate": 0.0020105288422437775, "loss": 1.2674, "step": 90200 }, { "epoch": 18.16, "learning_rate": 0.0020091354573199965, "loss": 1.2694, "step": 90300 }, { "epoch": 18.18, "learning_rate": 0.00200774057630868, "loss": 1.2691, "step": 90400 }, { "epoch": 18.2, "learning_rate": 0.0020063442019588283, "loss": 1.2665, "step": 90500 }, { "epoch": 18.22, "learning_rate": 0.002004946337022386, "loss": 1.2669, "step": 90600 }, { "epoch": 18.24, "learning_rate": 0.0020035469842542347, "loss": 1.272, "step": 90700 }, { "epoch": 18.26, "learning_rate": 0.002002146146412188, "loss": 1.2658, "step": 90800 }, { "epoch": 18.28, "learning_rate": 0.002000743826256986, "loss": 1.2719, "step": 90900 }, { "epoch": 18.3, "learning_rate": 0.0019993400265522917, "loss": 1.2716, "step": 91000 }, { "epoch": 18.32, "learning_rate": 0.001997934750064681, "loss": 1.2676, "step": 91100 }, { "epoch": 18.34, "learning_rate": 0.0019965279995636438, "loss": 1.2705, "step": 91200 }, { "epoch": 18.36, "learning_rate": 0.001995119777821572, "loss": 1.2741, "step": 91300 }, { "epoch": 18.38, "learning_rate": 0.0019937100876137592, "loss": 1.2688, "step": 91400 }, { "epoch": 18.4, "learning_rate": 0.001992298931718391, "loss": 1.2709, "step": 91500 }, { "epoch": 18.42, "learning_rate": 0.0019908863129165432, "loss": 1.2719, "step": 91600 }, { "epoch": 18.44, "learning_rate": 0.0019894722339921737, "loss": 1.2726, "step": 91700 }, { "epoch": 18.46, "learning_rate": 0.0019880566977321184, "loss": 1.2734, "step": 91800 }, { "epoch": 18.48, "learning_rate": 0.001986639706926085, "loss": 1.2703, "step": 91900 }, { "epoch": 18.5, "learning_rate": 0.0019852354559692456, "loss": 1.2692, "step": 92000 }, { "epoch": 18.52, "learning_rate": 0.0019838155789275737, "loss": 1.2722, "step": 92100 }, { "epoch": 18.54, "learning_rate": 0.0019823942556982275, "loss": 1.2754, "step": 92200 }, { "epoch": 18.56, "learning_rate": 0.001980971489082321, "loss": 1.2689, "step": 92300 }, { "epoch": 18.58, "learning_rate": 0.00197954728188381, "loss": 1.2756, "step": 92400 }, { "epoch": 18.6, "learning_rate": 0.0019781216369094915, "loss": 1.2725, "step": 92500 }, { "epoch": 18.62, "learning_rate": 0.001976694556968995, "loss": 1.2689, "step": 92600 }, { "epoch": 18.64, "learning_rate": 0.0019752660448747795, "loss": 1.2701, "step": 92700 }, { "epoch": 18.66, "learning_rate": 0.001973836103442124, "loss": 1.2694, "step": 92800 }, { "epoch": 18.68, "learning_rate": 0.0019724047354891263, "loss": 1.2722, "step": 92900 }, { "epoch": 18.7, "learning_rate": 0.001970971943836695, "loss": 1.2724, "step": 93000 }, { "epoch": 18.72, "learning_rate": 0.0019695377313085453, "loss": 1.2683, "step": 93100 }, { "epoch": 18.74, "learning_rate": 0.0019681021007311905, "loss": 1.2739, "step": 93200 }, { "epoch": 18.76, "learning_rate": 0.001966665054933941, "loss": 1.2722, "step": 93300 }, { "epoch": 18.78, "learning_rate": 0.0019652265967488943, "loss": 1.2705, "step": 93400 }, { "epoch": 18.8, "learning_rate": 0.0019637867290109327, "loss": 1.2687, "step": 93500 }, { "epoch": 18.82, "learning_rate": 0.001962345454557716, "loss": 1.2698, "step": 93600 }, { "epoch": 18.84, "learning_rate": 0.0019609027762296765, "loss": 1.2722, "step": 93700 }, { "epoch": 18.86, "learning_rate": 0.001959458696870013, "loss": 1.274, "step": 93800 }, { "epoch": 18.88, "learning_rate": 0.0019580132193246854, "loss": 1.2683, "step": 93900 }, { "epoch": 18.9, "learning_rate": 0.001956566346442409, "loss": 1.268, "step": 94000 }, { "epoch": 18.92, "learning_rate": 0.001955118081074649, "loss": 1.2674, "step": 94100 }, { "epoch": 18.94, "learning_rate": 0.001953668426075616, "loss": 1.267, "step": 94200 }, { "epoch": 18.96, "learning_rate": 0.0019522173843022578, "loss": 1.2677, "step": 94300 }, { "epoch": 18.98, "learning_rate": 0.0019507649586142553, "loss": 1.2669, "step": 94400 }, { "epoch": 19.0, "eval_accuracy": 0.4069155401191752, "eval_loss": 1.2321081161499023, "eval_runtime": 19.6805, "eval_samples_per_second": 4042.992, "eval_steps_per_second": 15.802, "step": 94491 }, { "epoch": 19.0, "learning_rate": 0.0019493111518740181, "loss": 1.2724, "step": 94500 }, { "epoch": 19.02, "learning_rate": 0.001947855966946676, "loss": 1.2629, "step": 94600 }, { "epoch": 19.04, "learning_rate": 0.0019463994067000763, "loss": 1.2614, "step": 94700 }, { "epoch": 19.06, "learning_rate": 0.001944941474004775, "loss": 1.2603, "step": 94800 }, { "epoch": 19.08, "learning_rate": 0.0019434821717340346, "loss": 1.2643, "step": 94900 }, { "epoch": 19.1, "learning_rate": 0.001942021502763816, "loss": 1.2619, "step": 95000 }, { "epoch": 19.12, "learning_rate": 0.001940559469972774, "loss": 1.2676, "step": 95100 }, { "epoch": 19.14, "learning_rate": 0.0019390960762422499, "loss": 1.2606, "step": 95200 }, { "epoch": 19.16, "learning_rate": 0.0019376313244562687, "loss": 1.2628, "step": 95300 }, { "epoch": 19.18, "learning_rate": 0.0019361798852696786, "loss": 1.2691, "step": 95400 }, { "epoch": 19.2, "learning_rate": 0.0019347124395440384, "loss": 1.2652, "step": 95500 }, { "epoch": 19.22, "learning_rate": 0.0019332436444021162, "loss": 1.2672, "step": 95600 }, { "epoch": 19.24, "learning_rate": 0.0019317735027385814, "loss": 1.2668, "step": 95700 }, { "epoch": 19.26, "learning_rate": 0.0019303020174507568, "loss": 1.2604, "step": 95800 }, { "epoch": 19.28, "learning_rate": 0.001928829191438613, "loss": 1.2699, "step": 95900 }, { "epoch": 19.3, "learning_rate": 0.0019273550276047641, "loss": 1.2617, "step": 96000 }, { "epoch": 19.32, "learning_rate": 0.0019258795288544595, "loss": 1.2651, "step": 96100 }, { "epoch": 19.34, "learning_rate": 0.0019244026980955796, "loss": 1.2651, "step": 96200 }, { "epoch": 19.36, "learning_rate": 0.0019229245382386302, "loss": 1.2654, "step": 96300 }, { "epoch": 19.38, "learning_rate": 0.0019214450521967369, "loss": 1.2645, "step": 96400 }, { "epoch": 19.4, "learning_rate": 0.0019199642428856373, "loss": 1.2674, "step": 96500 }, { "epoch": 19.42, "learning_rate": 0.0019184821132236796, "loss": 1.2707, "step": 96600 }, { "epoch": 19.44, "learning_rate": 0.0019169986661318106, "loss": 1.2653, "step": 96700 }, { "epoch": 19.46, "learning_rate": 0.0019155139045335771, "loss": 1.2664, "step": 96800 }, { "epoch": 19.48, "learning_rate": 0.0019140278313551134, "loss": 1.2657, "step": 96900 }, { "epoch": 19.5, "learning_rate": 0.0019125404495251408, "loss": 1.2665, "step": 97000 }, { "epoch": 19.52, "learning_rate": 0.001911066655304144, "loss": 1.2674, "step": 97100 }, { "epoch": 19.54, "learning_rate": 0.0019095766779809568, "loss": 1.2644, "step": 97200 }, { "epoch": 19.56, "learning_rate": 0.0019080854007784964, "loss": 1.2648, "step": 97300 }, { "epoch": 19.58, "learning_rate": 0.0019065928266357385, "loss": 1.2669, "step": 97400 }, { "epoch": 19.6, "learning_rate": 0.001905098958494216, "loss": 1.2676, "step": 97500 }, { "epoch": 19.62, "learning_rate": 0.001903618757271021, "loss": 1.2662, "step": 97600 }, { "epoch": 19.65, "learning_rate": 0.001902122322833248, "loss": 1.2677, "step": 97700 }, { "epoch": 19.67, "learning_rate": 0.0019006246032070807, "loss": 1.2659, "step": 97800 }, { "epoch": 19.69, "learning_rate": 0.0018991256013441932, "loss": 1.2669, "step": 97900 }, { "epoch": 19.71, "learning_rate": 0.0018976253201987848, "loss": 1.2649, "step": 98000 }, { "epoch": 19.73, "learning_rate": 0.0018961237627275773, "loss": 1.2679, "step": 98100 }, { "epoch": 19.75, "learning_rate": 0.001894620931889807, "loss": 1.2692, "step": 98200 }, { "epoch": 19.77, "learning_rate": 0.0018931168306472199, "loss": 1.2655, "step": 98300 }, { "epoch": 19.79, "learning_rate": 0.0018916114619640656, "loss": 1.2646, "step": 98400 }, { "epoch": 19.81, "learning_rate": 0.0018901048288070927, "loss": 1.2647, "step": 98500 }, { "epoch": 19.83, "learning_rate": 0.0018885969341455395, "loss": 1.2651, "step": 98600 }, { "epoch": 19.85, "learning_rate": 0.0018870877809511327, "loss": 1.2642, "step": 98700 }, { "epoch": 19.87, "learning_rate": 0.001885577372198078, "loss": 1.2656, "step": 98800 }, { "epoch": 19.89, "learning_rate": 0.001884065710863056, "loss": 1.2627, "step": 98900 }, { "epoch": 19.91, "learning_rate": 0.0018825527999252157, "loss": 1.266, "step": 99000 }, { "epoch": 19.93, "learning_rate": 0.0018810386423661694, "loss": 1.2654, "step": 99100 }, { "epoch": 19.95, "learning_rate": 0.0018795232411699847, "loss": 1.264, "step": 99200 }, { "epoch": 19.97, "learning_rate": 0.0018780065993231816, "loss": 1.2633, "step": 99300 }, { "epoch": 19.99, "learning_rate": 0.0018764887198147245, "loss": 1.2683, "step": 99400 }, { "epoch": 20.0, "eval_accuracy": 0.4075025708445861, "eval_loss": 1.2268297672271729, "eval_runtime": 19.801, "eval_samples_per_second": 4018.383, "eval_steps_per_second": 15.706, "step": 99465 }, { "epoch": 20.01, "learning_rate": 0.0018749696056360177, "loss": 1.2603, "step": 99500 }, { "epoch": 20.03, "learning_rate": 0.0018734492597808972, "loss": 1.2609, "step": 99600 }, { "epoch": 20.05, "learning_rate": 0.001871927685245628, "loss": 1.2587, "step": 99700 }, { "epoch": 20.07, "learning_rate": 0.0018704048850288952, "loss": 1.2575, "step": 99800 }, { "epoch": 20.09, "learning_rate": 0.001868880862131801, "loss": 1.2582, "step": 99900 }, { "epoch": 20.11, "learning_rate": 0.0018673556195578558, "loss": 1.2595, "step": 100000 }, { "epoch": 20.13, "learning_rate": 0.0018658291603129745, "loss": 1.2615, "step": 100100 }, { "epoch": 20.15, "learning_rate": 0.0018643014874054691, "loss": 1.2603, "step": 100200 }, { "epoch": 20.17, "learning_rate": 0.0018627726038460447, "loss": 1.2634, "step": 100300 }, { "epoch": 20.19, "learning_rate": 0.001861242512647791, "loss": 1.2634, "step": 100400 }, { "epoch": 20.21, "learning_rate": 0.0018597112168261781, "loss": 1.2646, "step": 100500 }, { "epoch": 20.23, "learning_rate": 0.0018581787193990508, "loss": 1.2591, "step": 100600 }, { "epoch": 20.25, "learning_rate": 0.0018566450233866208, "loss": 1.2609, "step": 100700 }, { "epoch": 20.27, "learning_rate": 0.0018551101318114629, "loss": 1.2647, "step": 100800 }, { "epoch": 20.29, "learning_rate": 0.0018535740476985083, "loss": 1.2634, "step": 100900 }, { "epoch": 20.31, "learning_rate": 0.0018520367740750374, "loss": 1.2589, "step": 101000 }, { "epoch": 20.33, "learning_rate": 0.0018504983139706755, "loss": 1.2615, "step": 101100 }, { "epoch": 20.35, "learning_rate": 0.001848958670417386, "loss": 1.2561, "step": 101200 }, { "epoch": 20.37, "learning_rate": 0.0018474178464494648, "loss": 1.2627, "step": 101300 }, { "epoch": 20.39, "learning_rate": 0.001845875845103534, "loss": 1.2583, "step": 101400 }, { "epoch": 20.41, "learning_rate": 0.0018443326694185364, "loss": 1.2633, "step": 101500 }, { "epoch": 20.43, "learning_rate": 0.0018427883224357284, "loss": 1.2611, "step": 101600 }, { "epoch": 20.45, "learning_rate": 0.0018412428071986754, "loss": 1.2625, "step": 101700 }, { "epoch": 20.47, "learning_rate": 0.001839711599315474, "loss": 1.2607, "step": 101800 }, { "epoch": 20.49, "learning_rate": 0.001838163768316336, "loss": 1.2623, "step": 101900 }, { "epoch": 20.51, "learning_rate": 0.0018366147781769232, "loss": 1.2569, "step": 102000 }, { "epoch": 20.53, "learning_rate": 0.001835064631949951, "loss": 1.2585, "step": 102100 }, { "epoch": 20.55, "learning_rate": 0.0018335133326904144, "loss": 1.2631, "step": 102200 }, { "epoch": 20.57, "learning_rate": 0.001831960883455579, "loss": 1.2592, "step": 102300 }, { "epoch": 20.59, "learning_rate": 0.0018304072873049785, "loss": 1.2613, "step": 102400 }, { "epoch": 20.61, "learning_rate": 0.0018288525473004055, "loss": 1.2601, "step": 102500 }, { "epoch": 20.63, "learning_rate": 0.0018272966665059086, "loss": 1.2608, "step": 102600 }, { "epoch": 20.65, "learning_rate": 0.0018257396479877822, "loss": 1.2608, "step": 102700 }, { "epoch": 20.67, "learning_rate": 0.0018241814948145656, "loss": 1.2619, "step": 102800 }, { "epoch": 20.69, "learning_rate": 0.0018226222100570322, "loss": 1.2599, "step": 102900 }, { "epoch": 20.71, "learning_rate": 0.0018210617967881865, "loss": 1.2589, "step": 103000 }, { "epoch": 20.73, "learning_rate": 0.0018195002580832567, "loss": 1.2573, "step": 103100 }, { "epoch": 20.75, "learning_rate": 0.0018179375970196893, "loss": 1.2582, "step": 103200 }, { "epoch": 20.77, "learning_rate": 0.0018163738166771422, "loss": 1.2581, "step": 103300 }, { "epoch": 20.79, "learning_rate": 0.0018148089201374795, "loss": 1.2628, "step": 103400 }, { "epoch": 20.81, "learning_rate": 0.0018132429104847653, "loss": 1.2598, "step": 103500 }, { "epoch": 20.83, "learning_rate": 0.0018116757908052572, "loss": 1.2618, "step": 103600 }, { "epoch": 20.85, "learning_rate": 0.0018101075641874, "loss": 1.266, "step": 103700 }, { "epoch": 20.87, "learning_rate": 0.0018085382337218203, "loss": 1.2608, "step": 103800 }, { "epoch": 20.89, "learning_rate": 0.0018069678025013202, "loss": 1.2583, "step": 103900 }, { "epoch": 20.91, "learning_rate": 0.0018053962736208717, "loss": 1.2578, "step": 104000 }, { "epoch": 20.93, "learning_rate": 0.0018038236501776092, "loss": 1.2621, "step": 104100 }, { "epoch": 20.95, "learning_rate": 0.0018022499352708247, "loss": 1.2574, "step": 104200 }, { "epoch": 20.97, "learning_rate": 0.0018006751320019604, "loss": 1.2589, "step": 104300 }, { "epoch": 20.99, "learning_rate": 0.001799099243474605, "loss": 1.2632, "step": 104400 }, { "epoch": 21.0, "eval_accuracy": 0.40797378629408937, "eval_loss": 1.2228056192398071, "eval_runtime": 19.5365, "eval_samples_per_second": 4072.794, "eval_steps_per_second": 15.919, "step": 104438 }, { "epoch": 21.01, "learning_rate": 0.0017975222727944844, "loss": 1.2558, "step": 104500 }, { "epoch": 21.03, "learning_rate": 0.0017959442230694584, "loss": 1.2493, "step": 104600 }, { "epoch": 21.05, "learning_rate": 0.0017943650974095123, "loss": 1.2564, "step": 104700 }, { "epoch": 21.07, "learning_rate": 0.001792784898926753, "loss": 1.2552, "step": 104800 }, { "epoch": 21.09, "learning_rate": 0.0017912036307354006, "loss": 1.2495, "step": 104900 }, { "epoch": 21.11, "learning_rate": 0.0017896212959517843, "loss": 1.2554, "step": 105000 }, { "epoch": 21.13, "learning_rate": 0.0017880378976943344, "loss": 1.2587, "step": 105100 }, { "epoch": 21.15, "learning_rate": 0.0017864534390835776, "loss": 1.2574, "step": 105200 }, { "epoch": 21.17, "learning_rate": 0.00178486792324213, "loss": 1.2543, "step": 105300 }, { "epoch": 21.19, "learning_rate": 0.0017832813532946924, "loss": 1.2566, "step": 105400 }, { "epoch": 21.21, "learning_rate": 0.001781693732368041, "loss": 1.2563, "step": 105500 }, { "epoch": 21.23, "learning_rate": 0.001780105063591025, "loss": 1.259, "step": 105600 }, { "epoch": 21.25, "learning_rate": 0.0017785153500945576, "loss": 1.2568, "step": 105700 }, { "epoch": 21.27, "learning_rate": 0.001776924595011612, "loss": 1.2594, "step": 105800 }, { "epoch": 21.29, "learning_rate": 0.0017753328014772126, "loss": 1.2547, "step": 105900 }, { "epoch": 21.31, "learning_rate": 0.0017737399726284325, "loss": 1.2585, "step": 106000 }, { "epoch": 21.33, "learning_rate": 0.0017721461116043825, "loss": 1.2554, "step": 106100 }, { "epoch": 21.35, "learning_rate": 0.00177055122154621, "loss": 1.2586, "step": 106200 }, { "epoch": 21.37, "learning_rate": 0.001768955305597089, "loss": 1.2551, "step": 106300 }, { "epoch": 21.39, "learning_rate": 0.0017673583669022158, "loss": 1.256, "step": 106400 }, { "epoch": 21.41, "learning_rate": 0.0017657604086088023, "loss": 1.2537, "step": 106500 }, { "epoch": 21.43, "learning_rate": 0.0017641614338660694, "loss": 1.2552, "step": 106600 }, { "epoch": 21.45, "learning_rate": 0.0017625614458252417, "loss": 1.2555, "step": 106700 }, { "epoch": 21.47, "learning_rate": 0.0017609604476395407, "loss": 1.2555, "step": 106800 }, { "epoch": 21.49, "learning_rate": 0.0017593584424641785, "loss": 1.253, "step": 106900 }, { "epoch": 21.52, "learning_rate": 0.0017577714685050292, "loss": 1.2582, "step": 107000 }, { "epoch": 21.54, "learning_rate": 0.0017561674688150015, "loss": 1.2545, "step": 107100 }, { "epoch": 21.56, "learning_rate": 0.0017545624715812104, "loss": 1.2584, "step": 107200 }, { "epoch": 21.58, "learning_rate": 0.0017529564799667488, "loss": 1.2594, "step": 107300 }, { "epoch": 21.6, "learning_rate": 0.001751365571861091, "loss": 1.2576, "step": 107400 }, { "epoch": 21.62, "learning_rate": 0.0017497576108472055, "loss": 1.256, "step": 107500 }, { "epoch": 21.64, "learning_rate": 0.0017481486649219638, "loss": 1.2531, "step": 107600 }, { "epoch": 21.66, "learning_rate": 0.001746538737256242, "loss": 1.255, "step": 107700 }, { "epoch": 21.68, "learning_rate": 0.0017449278310228496, "loss": 1.258, "step": 107800 }, { "epoch": 21.7, "learning_rate": 0.0017433159493965259, "loss": 1.2543, "step": 107900 }, { "epoch": 21.72, "learning_rate": 0.0017417030955539316, "loss": 1.2588, "step": 108000 }, { "epoch": 21.74, "learning_rate": 0.0017400892726736443, "loss": 1.2568, "step": 108100 }, { "epoch": 21.76, "learning_rate": 0.0017384744839361499, "loss": 1.254, "step": 108200 }, { "epoch": 21.78, "learning_rate": 0.0017368587325238393, "loss": 1.2562, "step": 108300 }, { "epoch": 21.8, "learning_rate": 0.0017352420216209996, "loss": 1.2515, "step": 108400 }, { "epoch": 21.82, "learning_rate": 0.0017336243544138097, "loss": 1.2532, "step": 108500 }, { "epoch": 21.84, "learning_rate": 0.0017320057340903326, "loss": 1.2579, "step": 108600 }, { "epoch": 21.86, "learning_rate": 0.0017303861638405097, "loss": 1.2541, "step": 108700 }, { "epoch": 21.88, "learning_rate": 0.001728765646856154, "loss": 1.2553, "step": 108800 }, { "epoch": 21.9, "learning_rate": 0.0017271441863309462, "loss": 1.2541, "step": 108900 }, { "epoch": 21.92, "learning_rate": 0.0017255217854604242, "loss": 1.2558, "step": 109000 }, { "epoch": 21.94, "learning_rate": 0.0017238984474419804, "loss": 1.2548, "step": 109100 }, { "epoch": 21.96, "learning_rate": 0.0017222741754748536, "loss": 1.2568, "step": 109200 }, { "epoch": 21.98, "learning_rate": 0.0017206489727601237, "loss": 1.2524, "step": 109300 }, { "epoch": 22.0, "learning_rate": 0.0017190228425007044, "loss": 1.2563, "step": 109400 }, { "epoch": 22.0, "eval_accuracy": 0.40835639033056986, "eval_loss": 1.2162704467773438, "eval_runtime": 19.8119, "eval_samples_per_second": 4016.173, "eval_steps_per_second": 15.698, "step": 109411 }, { "epoch": 22.02, "learning_rate": 0.001717395787901338, "loss": 1.2459, "step": 109500 }, { "epoch": 22.04, "learning_rate": 0.0017157678121685874, "loss": 1.2485, "step": 109600 }, { "epoch": 22.06, "learning_rate": 0.0017141389185108325, "loss": 1.25, "step": 109700 }, { "epoch": 22.08, "learning_rate": 0.0017125091101382601, "loss": 1.2473, "step": 109800 }, { "epoch": 22.1, "learning_rate": 0.0017108783902628616, "loss": 1.2489, "step": 109900 }, { "epoch": 22.12, "learning_rate": 0.0017092467620984239, "loss": 1.2498, "step": 110000 }, { "epoch": 22.14, "learning_rate": 0.0017076142288605242, "loss": 1.2485, "step": 110100 }, { "epoch": 22.16, "learning_rate": 0.0017059807937665227, "loss": 1.2542, "step": 110200 }, { "epoch": 22.18, "learning_rate": 0.0017043464600355585, "loss": 1.2515, "step": 110300 }, { "epoch": 22.2, "learning_rate": 0.0017027112308885396, "loss": 1.2534, "step": 110400 }, { "epoch": 22.22, "learning_rate": 0.0017010751095481403, "loss": 1.2504, "step": 110500 }, { "epoch": 22.24, "learning_rate": 0.001699438099238793, "loss": 1.25, "step": 110600 }, { "epoch": 22.26, "learning_rate": 0.0016978002031866812, "loss": 1.2516, "step": 110700 }, { "epoch": 22.28, "learning_rate": 0.0016961614246197348, "loss": 1.2541, "step": 110800 }, { "epoch": 22.3, "learning_rate": 0.001694521766767623, "loss": 1.2501, "step": 110900 }, { "epoch": 22.32, "learning_rate": 0.001692881232861747, "loss": 1.2477, "step": 111000 }, { "epoch": 22.34, "learning_rate": 0.001691239826135236, "loss": 1.2528, "step": 111100 }, { "epoch": 22.36, "learning_rate": 0.0016895975498229378, "loss": 1.2535, "step": 111200 }, { "epoch": 22.38, "learning_rate": 0.0016879544071614144, "loss": 1.2523, "step": 111300 }, { "epoch": 22.4, "learning_rate": 0.0016863104013889359, "loss": 1.2528, "step": 111400 }, { "epoch": 22.42, "learning_rate": 0.001684681988647626, "loss": 1.248, "step": 111500 }, { "epoch": 22.44, "learning_rate": 0.0016830362749250844, "loss": 1.2526, "step": 111600 }, { "epoch": 22.46, "learning_rate": 0.0016813897077841348, "loss": 1.2497, "step": 111700 }, { "epoch": 22.48, "learning_rate": 0.0016797422904697957, "loss": 1.2518, "step": 111800 }, { "epoch": 22.5, "learning_rate": 0.001678094026228761, "loss": 1.2476, "step": 111900 }, { "epoch": 22.52, "learning_rate": 0.0016764614135541252, "loss": 1.2516, "step": 112000 }, { "epoch": 22.54, "learning_rate": 0.001674811473594641, "loss": 1.2452, "step": 112100 }, { "epoch": 22.56, "learning_rate": 0.0016731606964260073, "loss": 1.2497, "step": 112200 }, { "epoch": 22.58, "learning_rate": 0.0016715090853015398, "loss": 1.2523, "step": 112300 }, { "epoch": 22.6, "learning_rate": 0.0016698566434761963, "loss": 1.2479, "step": 112400 }, { "epoch": 22.62, "learning_rate": 0.0016682033742065746, "loss": 1.2541, "step": 112500 }, { "epoch": 22.64, "learning_rate": 0.0016665492807509006, "loss": 1.2487, "step": 112600 }, { "epoch": 22.66, "learning_rate": 0.0016648943663690257, "loss": 1.2482, "step": 112700 }, { "epoch": 22.68, "learning_rate": 0.0016632386343224186, "loss": 1.2489, "step": 112800 }, { "epoch": 22.7, "learning_rate": 0.0016615820878741606, "loss": 1.2516, "step": 112900 }, { "epoch": 22.72, "learning_rate": 0.0016599247302889358, "loss": 1.2487, "step": 113000 }, { "epoch": 22.74, "learning_rate": 0.0016582665648330298, "loss": 1.2538, "step": 113100 }, { "epoch": 22.76, "learning_rate": 0.0016566075947743175, "loss": 1.2529, "step": 113200 }, { "epoch": 22.78, "learning_rate": 0.0016549478233822618, "loss": 1.2498, "step": 113300 }, { "epoch": 22.8, "learning_rate": 0.0016532872539279028, "loss": 1.2516, "step": 113400 }, { "epoch": 22.82, "learning_rate": 0.0016516258896838553, "loss": 1.2495, "step": 113500 }, { "epoch": 22.84, "learning_rate": 0.0016499637339242989, "loss": 1.2488, "step": 113600 }, { "epoch": 22.86, "learning_rate": 0.0016483007899249745, "loss": 1.2488, "step": 113700 }, { "epoch": 22.88, "learning_rate": 0.0016466370609631749, "loss": 1.2482, "step": 113800 }, { "epoch": 22.9, "learning_rate": 0.0016449725503177412, "loss": 1.2518, "step": 113900 }, { "epoch": 22.92, "learning_rate": 0.0016433072612690542, "loss": 1.2484, "step": 114000 }, { "epoch": 22.94, "learning_rate": 0.0016416411970990297, "loss": 1.2538, "step": 114100 }, { "epoch": 22.96, "learning_rate": 0.0016399743610911097, "loss": 1.2527, "step": 114200 }, { "epoch": 22.98, "learning_rate": 0.0016383067565302588, "loss": 1.2523, "step": 114300 }, { "epoch": 23.0, "eval_accuracy": 0.4088067653938865, "eval_loss": 1.212856650352478, "eval_runtime": 19.7647, "eval_samples_per_second": 4025.761, "eval_steps_per_second": 15.735, "step": 114384 }, { "epoch": 23.0, "learning_rate": 0.001636638386702955, "loss": 1.2473, "step": 114400 }, { "epoch": 23.02, "learning_rate": 0.0016349692548971854, "loss": 1.2428, "step": 114500 }, { "epoch": 23.04, "learning_rate": 0.001633299364402438, "loss": 1.2422, "step": 114600 }, { "epoch": 23.06, "learning_rate": 0.0016316287185096973, "loss": 1.2432, "step": 114700 }, { "epoch": 23.08, "learning_rate": 0.0016299573205114343, "loss": 1.2439, "step": 114800 }, { "epoch": 23.1, "learning_rate": 0.001628301898865501, "loss": 1.2479, "step": 114900 }, { "epoch": 23.12, "learning_rate": 0.0016266290139783787, "loss": 1.2494, "step": 115000 }, { "epoch": 23.14, "learning_rate": 0.0016249553868390434, "loss": 1.2441, "step": 115100 }, { "epoch": 23.16, "learning_rate": 0.0016232810207458424, "loss": 1.245, "step": 115200 }, { "epoch": 23.18, "learning_rate": 0.0016216059189985796, "loss": 1.2475, "step": 115300 }, { "epoch": 23.2, "learning_rate": 0.0016199300848985091, "loss": 1.2451, "step": 115400 }, { "epoch": 23.22, "learning_rate": 0.0016182535217483282, "loss": 1.247, "step": 115500 }, { "epoch": 23.24, "learning_rate": 0.0016165762328521703, "loss": 1.2434, "step": 115600 }, { "epoch": 23.26, "learning_rate": 0.0016148982215156002, "loss": 1.2469, "step": 115700 }, { "epoch": 23.28, "learning_rate": 0.0016132194910456056, "loss": 1.245, "step": 115800 }, { "epoch": 23.3, "learning_rate": 0.0016115400447505918, "loss": 1.2507, "step": 115900 }, { "epoch": 23.32, "learning_rate": 0.0016098598859403746, "loss": 1.2434, "step": 116000 }, { "epoch": 23.34, "learning_rate": 0.0016081790179261746, "loss": 1.2418, "step": 116100 }, { "epoch": 23.37, "learning_rate": 0.0016064974440206093, "loss": 1.2476, "step": 116200 }, { "epoch": 23.39, "learning_rate": 0.0016048151675376878, "loss": 1.2492, "step": 116300 }, { "epoch": 23.41, "learning_rate": 0.001603132191792804, "loss": 1.2383, "step": 116400 }, { "epoch": 23.43, "learning_rate": 0.0016014485201027297, "loss": 1.2473, "step": 116500 }, { "epoch": 23.45, "learning_rate": 0.0015997641557856073, "loss": 1.2489, "step": 116600 }, { "epoch": 23.47, "learning_rate": 0.0015980791021609464, "loss": 1.2463, "step": 116700 }, { "epoch": 23.49, "learning_rate": 0.001596393362549613, "loss": 1.2469, "step": 116800 }, { "epoch": 23.51, "learning_rate": 0.0015947069402738262, "loss": 1.2443, "step": 116900 }, { "epoch": 23.53, "learning_rate": 0.0015930367130251378, "loss": 1.2448, "step": 117000 }, { "epoch": 23.55, "learning_rate": 0.0015913489421361716, "loss": 1.2469, "step": 117100 }, { "epoch": 23.57, "learning_rate": 0.0015896604985241852, "loss": 1.2453, "step": 117200 }, { "epoch": 23.59, "learning_rate": 0.0015879713855167263, "loss": 1.2467, "step": 117300 }, { "epoch": 23.61, "learning_rate": 0.0015862816064426619, "loss": 1.2437, "step": 117400 }, { "epoch": 23.63, "learning_rate": 0.0015845911646321712, "loss": 1.2453, "step": 117500 }, { "epoch": 23.65, "learning_rate": 0.00158290006341674, "loss": 1.2459, "step": 117600 }, { "epoch": 23.67, "learning_rate": 0.0015812083061291539, "loss": 1.2456, "step": 117700 }, { "epoch": 23.69, "learning_rate": 0.0015795158961034905, "loss": 1.2425, "step": 117800 }, { "epoch": 23.71, "learning_rate": 0.0015778228366751152, "loss": 1.2451, "step": 117900 }, { "epoch": 23.73, "learning_rate": 0.001576129131180672, "loss": 1.246, "step": 118000 }, { "epoch": 23.75, "learning_rate": 0.001574434782958078, "loss": 1.2433, "step": 118100 }, { "epoch": 23.77, "learning_rate": 0.001572739795346519, "loss": 1.2463, "step": 118200 }, { "epoch": 23.79, "learning_rate": 0.001571044171686438, "loss": 1.2468, "step": 118300 }, { "epoch": 23.81, "learning_rate": 0.001569347915319534, "loss": 1.2445, "step": 118400 }, { "epoch": 23.83, "learning_rate": 0.0015676510295887514, "loss": 1.246, "step": 118500 }, { "epoch": 23.85, "learning_rate": 0.0015659535178382759, "loss": 1.2459, "step": 118600 }, { "epoch": 23.87, "learning_rate": 0.0015642553834135256, "loss": 1.2422, "step": 118700 }, { "epoch": 23.89, "learning_rate": 0.0015625566296611475, "loss": 1.2426, "step": 118800 }, { "epoch": 23.91, "learning_rate": 0.0015608572599290084, "loss": 1.2443, "step": 118900 }, { "epoch": 23.93, "learning_rate": 0.001559157277566188, "loss": 1.245, "step": 119000 }, { "epoch": 23.95, "learning_rate": 0.001557456685922975, "loss": 1.2447, "step": 119100 }, { "epoch": 23.97, "learning_rate": 0.0015557554883508585, "loss": 1.245, "step": 119200 }, { "epoch": 23.99, "learning_rate": 0.0015540536882025203, "loss": 1.2394, "step": 119300 }, { "epoch": 24.0, "eval_accuracy": 0.408813924305172, "eval_loss": 1.208786129951477, "eval_runtime": 19.9606, "eval_samples_per_second": 3986.246, "eval_steps_per_second": 15.581, "step": 119358 }, { "epoch": 24.01, "learning_rate": 0.001552351288831832, "loss": 1.239, "step": 119400 }, { "epoch": 24.03, "learning_rate": 0.0015506482935938443, "loss": 1.2356, "step": 119500 }, { "epoch": 24.05, "learning_rate": 0.0015489447058447835, "loss": 1.2393, "step": 119600 }, { "epoch": 24.07, "learning_rate": 0.0015472405289420421, "loss": 1.2373, "step": 119700 }, { "epoch": 24.09, "learning_rate": 0.0015455357662441758, "loss": 1.2384, "step": 119800 }, { "epoch": 24.11, "learning_rate": 0.0015438304211108924, "loss": 1.2399, "step": 119900 }, { "epoch": 24.13, "learning_rate": 0.00154212449690305, "loss": 1.238, "step": 120000 }, { "epoch": 24.15, "learning_rate": 0.0015404179969826454, "loss": 1.2407, "step": 120100 }, { "epoch": 24.17, "learning_rate": 0.0015387109247128126, "loss": 1.2427, "step": 120200 }, { "epoch": 24.19, "learning_rate": 0.001537003283457811, "loss": 1.2432, "step": 120300 }, { "epoch": 24.21, "learning_rate": 0.0015352950765830234, "loss": 1.2396, "step": 120400 }, { "epoch": 24.23, "learning_rate": 0.001533586307454946, "loss": 1.2404, "step": 120500 }, { "epoch": 24.25, "learning_rate": 0.0015318769794411841, "loss": 1.2383, "step": 120600 }, { "epoch": 24.27, "learning_rate": 0.0015301670959104435, "loss": 1.2408, "step": 120700 }, { "epoch": 24.29, "learning_rate": 0.0015284566602325254, "loss": 1.2389, "step": 120800 }, { "epoch": 24.31, "learning_rate": 0.0015267456757783189, "loss": 1.2397, "step": 120900 }, { "epoch": 24.33, "learning_rate": 0.0015250341459197947, "loss": 1.2415, "step": 121000 }, { "epoch": 24.35, "learning_rate": 0.0015233563207578788, "loss": 1.2356, "step": 121100 }, { "epoch": 24.37, "learning_rate": 0.0015216437209509943, "loss": 1.2364, "step": 121200 }, { "epoch": 24.39, "learning_rate": 0.001519930585794613, "loss": 1.2443, "step": 121300 }, { "epoch": 24.41, "learning_rate": 0.0015182169186649438, "loss": 1.2386, "step": 121400 }, { "epoch": 24.43, "learning_rate": 0.001516502722939245, "loss": 1.2404, "step": 121500 }, { "epoch": 24.45, "learning_rate": 0.0015147880019958154, "loss": 1.2416, "step": 121600 }, { "epoch": 24.47, "learning_rate": 0.0015130727592139904, "loss": 1.2418, "step": 121700 }, { "epoch": 24.49, "learning_rate": 0.0015113741581417957, "loss": 1.2432, "step": 121800 }, { "epoch": 24.51, "learning_rate": 0.001509657886959314, "loss": 1.2408, "step": 121900 }, { "epoch": 24.53, "learning_rate": 0.0015079411040487545, "loss": 1.242, "step": 122000 }, { "epoch": 24.55, "learning_rate": 0.0015062238127935158, "loss": 1.2398, "step": 122100 }, { "epoch": 24.57, "learning_rate": 0.0015045060165779975, "loss": 1.2379, "step": 122200 }, { "epoch": 24.59, "learning_rate": 0.001502787718787595, "loss": 1.2364, "step": 122300 }, { "epoch": 24.61, "learning_rate": 0.0015010689228086916, "loss": 1.2375, "step": 122400 }, { "epoch": 24.63, "learning_rate": 0.0014993496320286532, "loss": 1.2353, "step": 122500 }, { "epoch": 24.65, "learning_rate": 0.00149762984983582, "loss": 1.2391, "step": 122600 }, { "epoch": 24.67, "learning_rate": 0.0014959095796195018, "loss": 1.2424, "step": 122700 }, { "epoch": 24.69, "learning_rate": 0.0014941888247699687, "loss": 1.2423, "step": 122800 }, { "epoch": 24.71, "learning_rate": 0.0014924675886784473, "loss": 1.2402, "step": 122900 }, { "epoch": 24.73, "learning_rate": 0.001490745874737111, "loss": 1.2396, "step": 123000 }, { "epoch": 24.75, "learning_rate": 0.001489023686339077, "loss": 1.2386, "step": 123100 }, { "epoch": 24.77, "learning_rate": 0.001487301026878396, "loss": 1.2402, "step": 123200 }, { "epoch": 24.79, "learning_rate": 0.001485577899750048, "loss": 1.2379, "step": 123300 }, { "epoch": 24.81, "learning_rate": 0.0014838543083499334, "loss": 1.2432, "step": 123400 }, { "epoch": 24.83, "learning_rate": 0.001482130256074869, "loss": 1.24, "step": 123500 }, { "epoch": 24.85, "learning_rate": 0.001480405746322579, "loss": 1.2389, "step": 123600 }, { "epoch": 24.87, "learning_rate": 0.0014786807824916897, "loss": 1.2393, "step": 123700 }, { "epoch": 24.89, "learning_rate": 0.0014769553679817215, "loss": 1.237, "step": 123800 }, { "epoch": 24.91, "learning_rate": 0.0014752295061930846, "loss": 1.2358, "step": 123900 }, { "epoch": 24.93, "learning_rate": 0.0014735032005270684, "loss": 1.2392, "step": 124000 }, { "epoch": 24.95, "learning_rate": 0.0014717764543858392, "loss": 1.2389, "step": 124100 }, { "epoch": 24.97, "learning_rate": 0.00147004927117243, "loss": 1.2392, "step": 124200 }, { "epoch": 24.99, "learning_rate": 0.001468321654290736, "loss": 1.2387, "step": 124300 }, { "epoch": 25.0, "eval_accuracy": 0.40966806196499067, "eval_loss": 1.2022136449813843, "eval_runtime": 19.7693, "eval_samples_per_second": 4024.827, "eval_steps_per_second": 15.731, "step": 124331 }, { "epoch": 25.01, "learning_rate": 0.0014665936071455062, "loss": 1.2317, "step": 124400 }, { "epoch": 25.03, "learning_rate": 0.0014648651331423384, "loss": 1.23, "step": 124500 }, { "epoch": 25.05, "learning_rate": 0.0014631362356876715, "loss": 1.2321, "step": 124600 }, { "epoch": 25.07, "learning_rate": 0.0014614069181887784, "loss": 1.2331, "step": 124700 }, { "epoch": 25.09, "learning_rate": 0.0014596771840537605, "loss": 1.2315, "step": 124800 }, { "epoch": 25.11, "learning_rate": 0.0014579470366915396, "loss": 1.2357, "step": 124900 }, { "epoch": 25.13, "learning_rate": 0.0014562164795118523, "loss": 1.2365, "step": 125000 }, { "epoch": 25.15, "learning_rate": 0.0014544855159252432, "loss": 1.2316, "step": 125100 }, { "epoch": 25.17, "learning_rate": 0.001452754149343057, "loss": 1.2337, "step": 125200 }, { "epoch": 25.19, "learning_rate": 0.001451022383177433, "loss": 1.235, "step": 125300 }, { "epoch": 25.21, "learning_rate": 0.0014492902208412984, "loss": 1.2352, "step": 125400 }, { "epoch": 25.24, "learning_rate": 0.0014475576657483604, "loss": 1.231, "step": 125500 }, { "epoch": 25.26, "learning_rate": 0.001445824721313101, "loss": 1.2361, "step": 125600 }, { "epoch": 25.28, "learning_rate": 0.0014441087261535155, "loss": 1.2323, "step": 125700 }, { "epoch": 25.3, "learning_rate": 0.0014423750170883193, "loss": 1.2378, "step": 125800 }, { "epoch": 25.32, "learning_rate": 0.001440640928894652, "loss": 1.2323, "step": 125900 }, { "epoch": 25.34, "learning_rate": 0.0014389064649900165, "loss": 1.2387, "step": 126000 }, { "epoch": 25.36, "learning_rate": 0.001437171628792657, "loss": 1.2401, "step": 126100 }, { "epoch": 25.38, "learning_rate": 0.0014354364237215494, "loss": 1.2334, "step": 126200 }, { "epoch": 25.4, "learning_rate": 0.0014337008531963994, "loss": 1.2348, "step": 126300 }, { "epoch": 25.42, "learning_rate": 0.0014319649206376301, "loss": 1.236, "step": 126400 }, { "epoch": 25.44, "learning_rate": 0.00143022862946638, "loss": 1.2343, "step": 126500 }, { "epoch": 25.46, "learning_rate": 0.0014284919831044935, "loss": 1.2348, "step": 126600 }, { "epoch": 25.48, "learning_rate": 0.0014267549849745155, "loss": 1.2337, "step": 126700 }, { "epoch": 25.5, "learning_rate": 0.0014250176384996832, "loss": 1.2328, "step": 126800 }, { "epoch": 25.52, "learning_rate": 0.0014232799471039221, "loss": 1.2345, "step": 126900 }, { "epoch": 25.54, "learning_rate": 0.0014215419142118353, "loss": 1.2391, "step": 127000 }, { "epoch": 25.56, "learning_rate": 0.0014198035432487004, "loss": 1.2323, "step": 127100 }, { "epoch": 25.58, "learning_rate": 0.0014180648376404608, "loss": 1.2352, "step": 127200 }, { "epoch": 25.6, "learning_rate": 0.0014163258008137198, "loss": 1.2319, "step": 127300 }, { "epoch": 25.62, "learning_rate": 0.0014145864361957325, "loss": 1.2329, "step": 127400 }, { "epoch": 25.64, "learning_rate": 0.001412846747214401, "loss": 1.2351, "step": 127500 }, { "epoch": 25.66, "learning_rate": 0.001411106737298266, "loss": 1.2341, "step": 127600 }, { "epoch": 25.68, "learning_rate": 0.001409366409876501, "loss": 1.2328, "step": 127700 }, { "epoch": 25.7, "learning_rate": 0.001407625768378905, "loss": 1.235, "step": 127800 }, { "epoch": 25.72, "learning_rate": 0.0014058848162358966, "loss": 1.2304, "step": 127900 }, { "epoch": 25.74, "learning_rate": 0.001404160970981524, "loss": 1.2324, "step": 128000 }, { "epoch": 25.76, "learning_rate": 0.0014024194108622253, "loss": 1.2356, "step": 128100 }, { "epoch": 25.78, "learning_rate": 0.0014006775503580902, "loss": 1.2296, "step": 128200 }, { "epoch": 25.8, "learning_rate": 0.0013989353929019378, "loss": 1.2354, "step": 128300 }, { "epoch": 25.82, "learning_rate": 0.0013971929419271745, "loss": 1.2314, "step": 128400 }, { "epoch": 25.84, "learning_rate": 0.0013954502008677843, "loss": 1.2336, "step": 128500 }, { "epoch": 25.86, "learning_rate": 0.0013937071731583237, "loss": 1.2356, "step": 128600 }, { "epoch": 25.88, "learning_rate": 0.001391963862233913, "loss": 1.2314, "step": 128700 }, { "epoch": 25.9, "learning_rate": 0.0013902202715302314, "loss": 1.2336, "step": 128800 }, { "epoch": 25.92, "learning_rate": 0.0013884764044835088, "loss": 1.234, "step": 128900 }, { "epoch": 25.94, "learning_rate": 0.001386732264530521, "loss": 1.2355, "step": 129000 }, { "epoch": 25.96, "learning_rate": 0.0013849878551085804, "loss": 1.2334, "step": 129100 }, { "epoch": 25.98, "learning_rate": 0.0013832431796555308, "loss": 1.2339, "step": 129200 }, { "epoch": 26.0, "learning_rate": 0.0013814982416097406, "loss": 1.234, "step": 129300 }, { "epoch": 26.0, "eval_accuracy": 0.4099652363267976, "eval_loss": 1.1980103254318237, "eval_runtime": 19.8186, "eval_samples_per_second": 4014.806, "eval_steps_per_second": 15.692, "step": 129304 }, { "epoch": 26.02, "learning_rate": 0.0013797530444100952, "loss": 1.2258, "step": 129400 }, { "epoch": 26.04, "learning_rate": 0.0013780075914959912, "loss": 1.2242, "step": 129500 }, { "epoch": 26.06, "learning_rate": 0.001376261886307329, "loss": 1.2273, "step": 129600 }, { "epoch": 26.08, "learning_rate": 0.001374515932284506, "loss": 1.2251, "step": 129700 }, { "epoch": 26.1, "learning_rate": 0.001372787196065969, "loss": 1.23, "step": 129800 }, { "epoch": 26.12, "learning_rate": 0.0013710407571004548, "loss": 1.2325, "step": 129900 }, { "epoch": 26.14, "learning_rate": 0.0013692940795904673, "loss": 1.2274, "step": 130000 }, { "epoch": 26.16, "learning_rate": 0.0013675471669783198, "loss": 1.2278, "step": 130100 }, { "epoch": 26.18, "learning_rate": 0.0013658000227067901, "loss": 1.2245, "step": 130200 }, { "epoch": 26.2, "learning_rate": 0.0013640526502191111, "loss": 1.2266, "step": 130300 }, { "epoch": 26.22, "learning_rate": 0.0013623050529589667, "loss": 1.2315, "step": 130400 }, { "epoch": 26.24, "learning_rate": 0.001360557234370483, "loss": 1.2302, "step": 130500 }, { "epoch": 26.26, "learning_rate": 0.001358809197898223, "loss": 1.2324, "step": 130600 }, { "epoch": 26.28, "learning_rate": 0.0013570609469871781, "loss": 1.2305, "step": 130700 }, { "epoch": 26.3, "learning_rate": 0.001355312485082764, "loss": 1.2294, "step": 130800 }, { "epoch": 26.32, "learning_rate": 0.0013535638156308098, "loss": 1.2278, "step": 130900 }, { "epoch": 26.34, "learning_rate": 0.0013518149420775557, "loss": 1.224, "step": 131000 }, { "epoch": 26.36, "learning_rate": 0.0013500658678696432, "loss": 1.2296, "step": 131100 }, { "epoch": 26.38, "learning_rate": 0.00134831659645411, "loss": 1.228, "step": 131200 }, { "epoch": 26.4, "learning_rate": 0.0013465671312783809, "loss": 1.2245, "step": 131300 }, { "epoch": 26.42, "learning_rate": 0.0013448174757902645, "loss": 1.2305, "step": 131400 }, { "epoch": 26.44, "learning_rate": 0.0013430676334379426, "loss": 1.2305, "step": 131500 }, { "epoch": 26.46, "learning_rate": 0.0013413176076699674, "loss": 1.2294, "step": 131600 }, { "epoch": 26.48, "learning_rate": 0.0013395674019352503, "loss": 1.2312, "step": 131700 }, { "epoch": 26.5, "learning_rate": 0.0013378170196830588, "loss": 1.2278, "step": 131800 }, { "epoch": 26.52, "learning_rate": 0.001336066464363008, "loss": 1.2289, "step": 131900 }, { "epoch": 26.54, "learning_rate": 0.0013343157394250537, "loss": 1.2242, "step": 132000 }, { "epoch": 26.56, "learning_rate": 0.0013325648483194865, "loss": 1.2275, "step": 132100 }, { "epoch": 26.58, "learning_rate": 0.0013308137944969248, "loss": 1.2262, "step": 132200 }, { "epoch": 26.6, "learning_rate": 0.001329062581408306, "loss": 1.2289, "step": 132300 }, { "epoch": 26.62, "learning_rate": 0.0013273112125048833, "loss": 1.2341, "step": 132400 }, { "epoch": 26.64, "learning_rate": 0.0013255596912382156, "loss": 1.2288, "step": 132500 }, { "epoch": 26.66, "learning_rate": 0.0013238080210601635, "loss": 1.2267, "step": 132600 }, { "epoch": 26.68, "learning_rate": 0.0013220562054228793, "loss": 1.2267, "step": 132700 }, { "epoch": 26.7, "learning_rate": 0.0013203042477788038, "loss": 1.2287, "step": 132800 }, { "epoch": 26.72, "learning_rate": 0.001318552151580656, "loss": 1.226, "step": 132900 }, { "epoch": 26.74, "learning_rate": 0.001316799920281429, "loss": 1.2294, "step": 133000 }, { "epoch": 26.76, "learning_rate": 0.0013150475573343817, "loss": 1.2272, "step": 133100 }, { "epoch": 26.78, "learning_rate": 0.0013132950661930332, "loss": 1.23, "step": 133200 }, { "epoch": 26.8, "learning_rate": 0.001311542450311154, "loss": 1.2282, "step": 133300 }, { "epoch": 26.82, "learning_rate": 0.0013097897131427616, "loss": 1.2297, "step": 133400 }, { "epoch": 26.84, "learning_rate": 0.0013080368581421117, "loss": 1.2269, "step": 133500 }, { "epoch": 26.86, "learning_rate": 0.0013062838887636927, "loss": 1.2283, "step": 133600 }, { "epoch": 26.88, "learning_rate": 0.001304530808462218, "loss": 1.2292, "step": 133700 }, { "epoch": 26.9, "learning_rate": 0.0013027776206926205, "loss": 1.2306, "step": 133800 }, { "epoch": 26.92, "learning_rate": 0.0013010243289100437, "loss": 1.2264, "step": 133900 }, { "epoch": 26.94, "learning_rate": 0.0012992709365698368, "loss": 1.2249, "step": 134000 }, { "epoch": 26.96, "learning_rate": 0.001297517447127547, "loss": 1.2279, "step": 134100 }, { "epoch": 26.98, "learning_rate": 0.001295763864038913, "loss": 1.2272, "step": 134200 }, { "epoch": 27.0, "eval_accuracy": 0.41072249005388595, "eval_loss": 1.1898874044418335, "eval_runtime": 19.6468, "eval_samples_per_second": 4049.93, "eval_steps_per_second": 15.83, "step": 134277 }, { "epoch": 27.0, "learning_rate": 0.0012940101907598575, "loss": 1.2222, "step": 134300 }, { "epoch": 27.02, "learning_rate": 0.0012922564307464824, "loss": 1.2168, "step": 134400 }, { "epoch": 27.04, "learning_rate": 0.0012905025874550586, "loss": 1.2195, "step": 134500 }, { "epoch": 27.06, "learning_rate": 0.0012887486643420223, "loss": 1.2188, "step": 134600 }, { "epoch": 27.08, "learning_rate": 0.001286994664863967, "loss": 1.2162, "step": 134700 }, { "epoch": 27.11, "learning_rate": 0.0012852405924776362, "loss": 1.2236, "step": 134800 }, { "epoch": 27.13, "learning_rate": 0.0012834864506399174, "loss": 1.2245, "step": 134900 }, { "epoch": 27.15, "learning_rate": 0.001281732242807835, "loss": 1.2193, "step": 135000 }, { "epoch": 27.17, "learning_rate": 0.0012799779724385432, "loss": 1.2235, "step": 135100 }, { "epoch": 27.19, "learning_rate": 0.0012782236429893202, "loss": 1.2252, "step": 135200 }, { "epoch": 27.21, "learning_rate": 0.0012764692579175594, "loss": 1.2253, "step": 135300 }, { "epoch": 27.23, "learning_rate": 0.0012747148206807646, "loss": 1.2226, "step": 135400 }, { "epoch": 27.25, "learning_rate": 0.0012729603347365424, "loss": 1.2217, "step": 135500 }, { "epoch": 27.27, "learning_rate": 0.0012712058035425956, "loss": 1.2222, "step": 135600 }, { "epoch": 27.29, "learning_rate": 0.0012694512305567152, "loss": 1.2209, "step": 135700 }, { "epoch": 27.31, "learning_rate": 0.0012677141655283745, "loss": 1.2256, "step": 135800 }, { "epoch": 27.33, "learning_rate": 0.0012659595196639695, "loss": 1.2215, "step": 135900 }, { "epoch": 27.35, "learning_rate": 0.0012642048423468924, "loss": 1.2238, "step": 136000 }, { "epoch": 27.37, "learning_rate": 0.0012624501370352233, "loss": 1.222, "step": 136100 }, { "epoch": 27.39, "learning_rate": 0.0012606954071870964, "loss": 1.2196, "step": 136200 }, { "epoch": 27.41, "learning_rate": 0.0012589406562606954, "loss": 1.2271, "step": 136300 }, { "epoch": 27.43, "learning_rate": 0.0012571858877142449, "loss": 1.2228, "step": 136400 }, { "epoch": 27.45, "learning_rate": 0.001255431105006004, "loss": 1.2225, "step": 136500 }, { "epoch": 27.47, "learning_rate": 0.0012536763115942604, "loss": 1.2253, "step": 136600 }, { "epoch": 27.49, "learning_rate": 0.0012519215109373229, "loss": 1.222, "step": 136700 }, { "epoch": 27.51, "learning_rate": 0.001250166706493513, "loss": 1.2234, "step": 136800 }, { "epoch": 27.53, "learning_rate": 0.001248411901721162, "loss": 1.2235, "step": 136900 }, { "epoch": 27.55, "learning_rate": 0.0012466571000786, "loss": 1.2218, "step": 137000 }, { "epoch": 27.57, "learning_rate": 0.001244902305024152, "loss": 1.2255, "step": 137100 }, { "epoch": 27.59, "learning_rate": 0.0012431475200161302, "loss": 1.2253, "step": 137200 }, { "epoch": 27.61, "learning_rate": 0.0012413927485128253, "loss": 1.2223, "step": 137300 }, { "epoch": 27.63, "learning_rate": 0.001239637993972503, "loss": 1.2251, "step": 137400 }, { "epoch": 27.65, "learning_rate": 0.0012378832598533957, "loss": 1.2225, "step": 137500 }, { "epoch": 27.67, "learning_rate": 0.0012361285496136948, "loss": 1.2206, "step": 137600 }, { "epoch": 27.69, "learning_rate": 0.001234373866711544, "loss": 1.2238, "step": 137700 }, { "epoch": 27.71, "learning_rate": 0.0012326192146050346, "loss": 1.2259, "step": 137800 }, { "epoch": 27.73, "learning_rate": 0.0012308645967521966, "loss": 1.2215, "step": 137900 }, { "epoch": 27.75, "learning_rate": 0.0012291100166109926, "loss": 1.2229, "step": 138000 }, { "epoch": 27.77, "learning_rate": 0.0012273554776393101, "loss": 1.2194, "step": 138100 }, { "epoch": 27.79, "learning_rate": 0.0012256009832949562, "loss": 1.2188, "step": 138200 }, { "epoch": 27.81, "learning_rate": 0.0012238465370356501, "loss": 1.2199, "step": 138300 }, { "epoch": 27.83, "learning_rate": 0.0012220921423190167, "loss": 1.2211, "step": 138400 }, { "epoch": 27.85, "learning_rate": 0.0012203378026025777, "loss": 1.2184, "step": 138500 }, { "epoch": 27.87, "learning_rate": 0.0012185835213437478, "loss": 1.2187, "step": 138600 }, { "epoch": 27.89, "learning_rate": 0.0012168293019998258, "loss": 1.2232, "step": 138700 }, { "epoch": 27.91, "learning_rate": 0.0012150751480279897, "loss": 1.2241, "step": 138800 }, { "epoch": 27.93, "learning_rate": 0.0012133210628852867, "loss": 1.2215, "step": 138900 }, { "epoch": 27.95, "learning_rate": 0.0012115670500286294, "loss": 1.2231, "step": 139000 }, { "epoch": 27.97, "learning_rate": 0.0012098131129147888, "loss": 1.2214, "step": 139100 }, { "epoch": 27.99, "learning_rate": 0.0012080767931761424, "loss": 1.2187, "step": 139200 }, { "epoch": 28.0, "eval_accuracy": 0.4111801831154055, "eval_loss": 1.1840488910675049, "eval_runtime": 19.7999, "eval_samples_per_second": 4018.599, "eval_steps_per_second": 15.707, "step": 139251 }, { "epoch": 28.01, "learning_rate": 0.0012063230170739731, "loss": 1.2143, "step": 139300 }, { "epoch": 28.03, "learning_rate": 0.0012045693270494448, "loss": 1.211, "step": 139400 }, { "epoch": 28.05, "learning_rate": 0.0012028157265586918, "loss": 1.2165, "step": 139500 }, { "epoch": 28.07, "learning_rate": 0.0012010622190576717, "loss": 1.217, "step": 139600 }, { "epoch": 28.09, "learning_rate": 0.001199308808002159, "loss": 1.2124, "step": 139700 }, { "epoch": 28.11, "learning_rate": 0.001197555496847737, "loss": 1.2182, "step": 139800 }, { "epoch": 28.13, "learning_rate": 0.0011958022890497934, "loss": 1.2156, "step": 139900 }, { "epoch": 28.15, "learning_rate": 0.0011940491880635118, "loss": 1.2185, "step": 140000 }, { "epoch": 28.17, "learning_rate": 0.0011922961973438657, "loss": 1.2143, "step": 140100 }, { "epoch": 28.19, "learning_rate": 0.0011905433203456097, "loss": 1.2148, "step": 140200 }, { "epoch": 28.21, "learning_rate": 0.001188790560523276, "loss": 1.2149, "step": 140300 }, { "epoch": 28.23, "learning_rate": 0.0011870379213311652, "loss": 1.2188, "step": 140400 }, { "epoch": 28.25, "learning_rate": 0.0011852854062233409, "loss": 1.2153, "step": 140500 }, { "epoch": 28.27, "learning_rate": 0.0011835330186536204, "loss": 1.2163, "step": 140600 }, { "epoch": 28.29, "learning_rate": 0.0011817807620755712, "loss": 1.2149, "step": 140700 }, { "epoch": 28.31, "learning_rate": 0.001180028639942502, "loss": 1.2188, "step": 140800 }, { "epoch": 28.33, "learning_rate": 0.0011782766557074578, "loss": 1.2193, "step": 140900 }, { "epoch": 28.35, "learning_rate": 0.0011765248128232095, "loss": 1.2181, "step": 141000 }, { "epoch": 28.37, "learning_rate": 0.001174773114742251, "loss": 1.2133, "step": 141100 }, { "epoch": 28.39, "learning_rate": 0.0011730215649167904, "loss": 1.2198, "step": 141200 }, { "epoch": 28.41, "learning_rate": 0.001171270166798745, "loss": 1.2163, "step": 141300 }, { "epoch": 28.43, "learning_rate": 0.00116951892383973, "loss": 1.219, "step": 141400 }, { "epoch": 28.45, "learning_rate": 0.0011677678394910577, "loss": 1.2176, "step": 141500 }, { "epoch": 28.47, "learning_rate": 0.0011660169172037266, "loss": 1.2186, "step": 141600 }, { "epoch": 28.49, "learning_rate": 0.0011642661604284164, "loss": 1.2152, "step": 141700 }, { "epoch": 28.51, "learning_rate": 0.0011625155726154794, "loss": 1.2113, "step": 141800 }, { "epoch": 28.53, "learning_rate": 0.0011607651572149362, "loss": 1.2138, "step": 141900 }, { "epoch": 28.55, "learning_rate": 0.001159014917676467, "loss": 1.2151, "step": 142000 }, { "epoch": 28.57, "learning_rate": 0.0011572648574494063, "loss": 1.217, "step": 142100 }, { "epoch": 28.59, "learning_rate": 0.001155514979982733, "loss": 1.2148, "step": 142200 }, { "epoch": 28.61, "learning_rate": 0.0011537652887250683, "loss": 1.2149, "step": 142300 }, { "epoch": 28.63, "learning_rate": 0.0011520157871246655, "loss": 1.2161, "step": 142400 }, { "epoch": 28.65, "learning_rate": 0.001150266478629404, "loss": 1.2111, "step": 142500 }, { "epoch": 28.67, "learning_rate": 0.001148517366686782, "loss": 1.2201, "step": 142600 }, { "epoch": 28.69, "learning_rate": 0.0011467684547439116, "loss": 1.2148, "step": 142700 }, { "epoch": 28.71, "learning_rate": 0.00114501974624751, "loss": 1.2185, "step": 142800 }, { "epoch": 28.73, "learning_rate": 0.0011432887286244955, "loss": 1.2136, "step": 142900 }, { "epoch": 28.75, "learning_rate": 0.0011415404352391302, "loss": 1.2153, "step": 143000 }, { "epoch": 28.77, "learning_rate": 0.0011397923556035006, "loss": 1.2159, "step": 143100 }, { "epoch": 28.79, "learning_rate": 0.0011380444931626827, "loss": 1.2155, "step": 143200 }, { "epoch": 28.81, "learning_rate": 0.0011362968513613262, "loss": 1.2182, "step": 143300 }, { "epoch": 28.83, "learning_rate": 0.001134549433643645, "loss": 1.2113, "step": 143400 }, { "epoch": 28.85, "learning_rate": 0.0011328022434534126, "loss": 1.2165, "step": 143500 }, { "epoch": 28.87, "learning_rate": 0.0011310552842339516, "loss": 1.2119, "step": 143600 }, { "epoch": 28.89, "learning_rate": 0.001129308559428132, "loss": 1.2145, "step": 143700 }, { "epoch": 28.91, "learning_rate": 0.0011275620724783605, "loss": 1.2153, "step": 143800 }, { "epoch": 28.93, "learning_rate": 0.001125815826826576, "loss": 1.2151, "step": 143900 }, { "epoch": 28.95, "learning_rate": 0.0011240698259142399, "loss": 1.219, "step": 144000 }, { "epoch": 28.98, "learning_rate": 0.0011223240731823335, "loss": 1.2149, "step": 144100 }, { "epoch": 29.0, "learning_rate": 0.0011205785720713479, "loss": 1.2162, "step": 144200 }, { "epoch": 29.0, "eval_accuracy": 0.41123713623185454, "eval_loss": 1.1828089952468872, "eval_runtime": 19.8923, "eval_samples_per_second": 3999.934, "eval_steps_per_second": 15.634, "step": 144224 }, { "epoch": 29.02, "learning_rate": 0.0011188333260212788, "loss": 1.2079, "step": 144300 }, { "epoch": 29.04, "learning_rate": 0.001117088338471618, "loss": 1.2047, "step": 144400 }, { "epoch": 29.06, "learning_rate": 0.001115343612861349, "loss": 1.2053, "step": 144500 }, { "epoch": 29.08, "learning_rate": 0.0011135991526289393, "loss": 1.2075, "step": 144600 }, { "epoch": 29.1, "learning_rate": 0.0011118549612123333, "loss": 1.2061, "step": 144700 }, { "epoch": 29.12, "learning_rate": 0.0011101110420489442, "loss": 1.2101, "step": 144800 }, { "epoch": 29.14, "learning_rate": 0.0011083673985756498, "loss": 1.2117, "step": 144900 }, { "epoch": 29.16, "learning_rate": 0.001106624034228785, "loss": 1.2104, "step": 145000 }, { "epoch": 29.18, "learning_rate": 0.0011048809524441346, "loss": 1.2105, "step": 145100 }, { "epoch": 29.2, "learning_rate": 0.0011031381566569247, "loss": 1.2052, "step": 145200 }, { "epoch": 29.22, "learning_rate": 0.0011013956503018196, "loss": 1.2104, "step": 145300 }, { "epoch": 29.24, "learning_rate": 0.0010996534368129128, "loss": 1.2103, "step": 145400 }, { "epoch": 29.26, "learning_rate": 0.0010979115196237208, "loss": 1.2142, "step": 145500 }, { "epoch": 29.28, "learning_rate": 0.001096169902167175, "loss": 1.2038, "step": 145600 }, { "epoch": 29.3, "learning_rate": 0.0010944285878756177, "loss": 1.2115, "step": 145700 }, { "epoch": 29.32, "learning_rate": 0.0010926875801807927, "loss": 1.2122, "step": 145800 }, { "epoch": 29.34, "learning_rate": 0.0010909468825138404, "loss": 1.2085, "step": 145900 }, { "epoch": 29.36, "learning_rate": 0.0010892064983052884, "loss": 1.2095, "step": 146000 }, { "epoch": 29.38, "learning_rate": 0.0010874664309850487, "loss": 1.2102, "step": 146100 }, { "epoch": 29.4, "learning_rate": 0.0010857266839824074, "loss": 1.2116, "step": 146200 }, { "epoch": 29.42, "learning_rate": 0.0010839872607260209, "loss": 1.2121, "step": 146300 }, { "epoch": 29.44, "learning_rate": 0.0010822481646439047, "loss": 1.2101, "step": 146400 }, { "epoch": 29.46, "learning_rate": 0.0010805093991634325, "loss": 1.2077, "step": 146500 }, { "epoch": 29.48, "learning_rate": 0.001078770967711325, "loss": 1.2103, "step": 146600 }, { "epoch": 29.5, "learning_rate": 0.001077032873713645, "loss": 1.2079, "step": 146700 }, { "epoch": 29.52, "learning_rate": 0.0010752951205957896, "loss": 1.2105, "step": 146800 }, { "epoch": 29.54, "learning_rate": 0.0010735577117824847, "loss": 1.2118, "step": 146900 }, { "epoch": 29.56, "learning_rate": 0.0010718206506977778, "loss": 1.2105, "step": 147000 }, { "epoch": 29.58, "learning_rate": 0.0010700839407650313, "loss": 1.2045, "step": 147100 }, { "epoch": 29.6, "learning_rate": 0.001068347585406914, "loss": 1.2123, "step": 147200 }, { "epoch": 29.62, "learning_rate": 0.0010666115880453974, "loss": 1.2078, "step": 147300 }, { "epoch": 29.64, "learning_rate": 0.0010648759521017476, "loss": 1.2099, "step": 147400 }, { "epoch": 29.66, "learning_rate": 0.0010631406809965178, "loss": 1.2085, "step": 147500 }, { "epoch": 29.68, "learning_rate": 0.0010614057781495414, "loss": 1.2119, "step": 147600 }, { "epoch": 29.7, "learning_rate": 0.001059671246979928, "loss": 1.2093, "step": 147700 }, { "epoch": 29.72, "learning_rate": 0.001057937090906053, "loss": 1.2063, "step": 147800 }, { "epoch": 29.74, "learning_rate": 0.001056203313345554, "loss": 1.2055, "step": 147900 }, { "epoch": 29.76, "learning_rate": 0.0010544699177153208, "loss": 1.2086, "step": 148000 }, { "epoch": 29.78, "learning_rate": 0.0010527369074314922, "loss": 1.2115, "step": 148100 }, { "epoch": 29.8, "learning_rate": 0.0010510042859094464, "loss": 1.2102, "step": 148200 }, { "epoch": 29.82, "learning_rate": 0.0010492720565637972, "loss": 1.2079, "step": 148300 }, { "epoch": 29.84, "learning_rate": 0.001047540222808383, "loss": 1.2114, "step": 148400 }, { "epoch": 29.86, "learning_rate": 0.001045808788056264, "loss": 1.2092, "step": 148500 }, { "epoch": 29.88, "learning_rate": 0.001044077755719714, "loss": 1.2106, "step": 148600 }, { "epoch": 29.9, "learning_rate": 0.0010423471292102147, "loss": 1.2099, "step": 148700 }, { "epoch": 29.92, "learning_rate": 0.0010406169119384452, "loss": 1.2035, "step": 148800 }, { "epoch": 29.94, "learning_rate": 0.0010388871073142806, "loss": 1.2079, "step": 148900 }, { "epoch": 29.96, "learning_rate": 0.0010371577187467818, "loss": 1.2114, "step": 149000 }, { "epoch": 29.98, "learning_rate": 0.001035428749644191, "loss": 1.2087, "step": 149100 }, { "epoch": 30.0, "eval_accuracy": 0.41176053219028325, "eval_loss": 1.1731864213943481, "eval_runtime": 19.7738, "eval_samples_per_second": 4023.916, "eval_steps_per_second": 15.728, "step": 149197 }, { "epoch": 30.0, "learning_rate": 0.001033700203413921, "loss": 1.2107, "step": 149200 }, { "epoch": 30.02, "learning_rate": 0.0010319893625408032, "loss": 1.204, "step": 149300 }, { "epoch": 30.04, "learning_rate": 0.0010302616679603773, "loss": 1.2, "step": 149400 }, { "epoch": 30.06, "learning_rate": 0.0010285344064354445, "loss": 1.2002, "step": 149500 }, { "epoch": 30.08, "learning_rate": 0.0010268075813700541, "loss": 1.1993, "step": 149600 }, { "epoch": 30.1, "learning_rate": 0.0010250811961673946, "loss": 1.2016, "step": 149700 }, { "epoch": 30.12, "learning_rate": 0.0010233552542297884, "loss": 1.2029, "step": 149800 }, { "epoch": 30.14, "learning_rate": 0.001021629758958684, "loss": 1.2027, "step": 149900 }, { "epoch": 30.16, "learning_rate": 0.0010199047137546503, "loss": 1.2043, "step": 150000 }, { "epoch": 30.18, "learning_rate": 0.0010181801220173676, "loss": 1.2028, "step": 150100 }, { "epoch": 30.2, "learning_rate": 0.0010164559871456242, "loss": 1.2031, "step": 150200 }, { "epoch": 30.22, "learning_rate": 0.0010147323125373072, "loss": 1.2036, "step": 150300 }, { "epoch": 30.24, "learning_rate": 0.0010130263313926052, "loss": 1.2056, "step": 150400 }, { "epoch": 30.26, "learning_rate": 0.001011303582813796, "loss": 1.2015, "step": 150500 }, { "epoch": 30.28, "learning_rate": 0.0010095813046526582, "loss": 1.2047, "step": 150600 }, { "epoch": 30.3, "learning_rate": 0.0010078595003034205, "loss": 1.2056, "step": 150700 }, { "epoch": 30.32, "learning_rate": 0.0010061381731593774, "loss": 1.2017, "step": 150800 }, { "epoch": 30.34, "learning_rate": 0.0010044345326882533, "loss": 1.2007, "step": 150900 }, { "epoch": 30.36, "learning_rate": 0.0010027141652740393, "loss": 1.1998, "step": 151000 }, { "epoch": 30.38, "learning_rate": 0.0010009942852053342, "loss": 1.2035, "step": 151100 }, { "epoch": 30.4, "learning_rate": 0.0009992748958716382, "loss": 1.1996, "step": 151200 }, { "epoch": 30.42, "learning_rate": 0.0009975560006614873, "loss": 1.2052, "step": 151300 }, { "epoch": 30.44, "learning_rate": 0.0009958376029624422, "loss": 1.2017, "step": 151400 }, { "epoch": 30.46, "learning_rate": 0.0009941197061610842, "loss": 1.2044, "step": 151500 }, { "epoch": 30.48, "learning_rate": 0.0009924023136430055, "loss": 1.2078, "step": 151600 }, { "epoch": 30.5, "learning_rate": 0.000990685428792806, "loss": 1.2025, "step": 151700 }, { "epoch": 30.52, "learning_rate": 0.0009889690549940852, "loss": 1.2001, "step": 151800 }, { "epoch": 30.54, "learning_rate": 0.0009872531956294354, "loss": 1.1996, "step": 151900 }, { "epoch": 30.56, "learning_rate": 0.0009855378540804332, "loss": 1.2012, "step": 152000 }, { "epoch": 30.58, "learning_rate": 0.0009838230337276372, "loss": 1.2039, "step": 152100 }, { "epoch": 30.6, "learning_rate": 0.0009821087379505776, "loss": 1.2026, "step": 152200 }, { "epoch": 30.62, "learning_rate": 0.0009803949701277515, "loss": 1.2004, "step": 152300 }, { "epoch": 30.64, "learning_rate": 0.0009786817336366138, "loss": 1.2015, "step": 152400 }, { "epoch": 30.66, "learning_rate": 0.0009769690318535743, "loss": 1.2026, "step": 152500 }, { "epoch": 30.68, "learning_rate": 0.0009752739871163907, "loss": 1.2037, "step": 152600 }, { "epoch": 30.7, "learning_rate": 0.0009735623594432755, "loss": 1.2016, "step": 152700 }, { "epoch": 30.72, "learning_rate": 0.0009718512765674095, "loss": 1.2024, "step": 152800 }, { "epoch": 30.74, "learning_rate": 0.0009701407418609562, "loss": 1.202, "step": 152900 }, { "epoch": 30.76, "learning_rate": 0.0009684307586950005, "loss": 1.206, "step": 153000 }, { "epoch": 30.78, "learning_rate": 0.0009667213304395399, "loss": 1.1989, "step": 153100 }, { "epoch": 30.8, "learning_rate": 0.0009650124604634786, "loss": 1.2081, "step": 153200 }, { "epoch": 30.82, "learning_rate": 0.0009633041521346189, "loss": 1.2015, "step": 153300 }, { "epoch": 30.85, "learning_rate": 0.0009615964088196581, "loss": 1.1989, "step": 153400 }, { "epoch": 30.87, "learning_rate": 0.0009598892338841794, "loss": 1.2006, "step": 153500 }, { "epoch": 30.89, "learning_rate": 0.0009581826306926464, "loss": 1.1991, "step": 153600 }, { "epoch": 30.91, "learning_rate": 0.000956493660031415, "loss": 1.2033, "step": 153700 }, { "epoch": 30.93, "learning_rate": 0.0009547882046153125, "loss": 1.2024, "step": 153800 }, { "epoch": 30.95, "learning_rate": 0.000953083330996152, "loss": 1.2042, "step": 153900 }, { "epoch": 30.97, "learning_rate": 0.0009513790425338609, "loss": 1.2005, "step": 154000 }, { "epoch": 30.99, "learning_rate": 0.000949675342587214, "loss": 1.2005, "step": 154100 }, { "epoch": 31.0, "eval_accuracy": 0.41267003209737646, "eval_loss": 1.1657705307006836, "eval_runtime": 19.6203, "eval_samples_per_second": 4055.399, "eval_steps_per_second": 15.851, "step": 154170 }, { "epoch": 31.01, "learning_rate": 0.0009479722345138251, "loss": 1.1976, "step": 154200 }, { "epoch": 31.03, "learning_rate": 0.0009462697216701424, "loss": 1.1919, "step": 154300 }, { "epoch": 31.05, "learning_rate": 0.0009445678074114414, "loss": 1.1973, "step": 154400 }, { "epoch": 31.07, "learning_rate": 0.0009428664950918177, "loss": 1.1941, "step": 154500 }, { "epoch": 31.09, "learning_rate": 0.0009411657880641792, "loss": 1.1909, "step": 154600 }, { "epoch": 31.11, "learning_rate": 0.0009394656896802428, "loss": 1.1942, "step": 154700 }, { "epoch": 31.13, "learning_rate": 0.0009377662032905253, "loss": 1.1925, "step": 154800 }, { "epoch": 31.15, "learning_rate": 0.0009360673322443375, "loss": 1.1955, "step": 154900 }, { "epoch": 31.17, "learning_rate": 0.0009343690798897762, "loss": 1.1939, "step": 155000 }, { "epoch": 31.19, "learning_rate": 0.0009326714495737206, "loss": 1.1936, "step": 155100 }, { "epoch": 31.21, "learning_rate": 0.0009309744446418236, "loss": 1.195, "step": 155200 }, { "epoch": 31.23, "learning_rate": 0.0009292780684385055, "loss": 1.1938, "step": 155300 }, { "epoch": 31.25, "learning_rate": 0.0009275823243069464, "loss": 1.1969, "step": 155400 }, { "epoch": 31.27, "learning_rate": 0.0009258872155890821, "loss": 1.1971, "step": 155500 }, { "epoch": 31.29, "learning_rate": 0.0009241927456255962, "loss": 1.1924, "step": 155600 }, { "epoch": 31.31, "learning_rate": 0.0009224989177559132, "loss": 1.197, "step": 155700 }, { "epoch": 31.33, "learning_rate": 0.0009208057353181909, "loss": 1.1955, "step": 155800 }, { "epoch": 31.35, "learning_rate": 0.0009191132016493168, "loss": 1.1927, "step": 155900 }, { "epoch": 31.37, "learning_rate": 0.0009174213200848991, "loss": 1.1954, "step": 156000 }, { "epoch": 31.39, "learning_rate": 0.0009157300939592614, "loss": 1.1945, "step": 156100 }, { "epoch": 31.41, "learning_rate": 0.0009140395266054343, "loss": 1.1989, "step": 156200 }, { "epoch": 31.43, "learning_rate": 0.0009123496213551513, "loss": 1.1914, "step": 156300 }, { "epoch": 31.45, "learning_rate": 0.0009106603815388409, "loss": 1.1953, "step": 156400 }, { "epoch": 31.47, "learning_rate": 0.0009089718104856201, "loss": 1.1952, "step": 156500 }, { "epoch": 31.49, "learning_rate": 0.0009072839115232867, "loss": 1.1936, "step": 156600 }, { "epoch": 31.51, "learning_rate": 0.0009055966879783159, "loss": 1.197, "step": 156700 }, { "epoch": 31.53, "learning_rate": 0.0009039101431758506, "loss": 1.1957, "step": 156800 }, { "epoch": 31.55, "learning_rate": 0.0009022242804396972, "loss": 1.1971, "step": 156900 }, { "epoch": 31.57, "learning_rate": 0.0009005391030923156, "loss": 1.1941, "step": 157000 }, { "epoch": 31.59, "learning_rate": 0.0008988546144548173, "loss": 1.1935, "step": 157100 }, { "epoch": 31.61, "learning_rate": 0.0008971708178469554, "loss": 1.1977, "step": 157200 }, { "epoch": 31.63, "learning_rate": 0.00089548771658712, "loss": 1.1978, "step": 157300 }, { "epoch": 31.65, "learning_rate": 0.0008938053139923291, "loss": 1.1932, "step": 157400 }, { "epoch": 31.67, "learning_rate": 0.0008921236133782254, "loss": 1.1928, "step": 157500 }, { "epoch": 31.69, "learning_rate": 0.0008904426180590678, "loss": 1.1976, "step": 157600 }, { "epoch": 31.71, "learning_rate": 0.0008887623313477256, "loss": 1.1946, "step": 157700 }, { "epoch": 31.73, "learning_rate": 0.0008870827565556696, "loss": 1.1975, "step": 157800 }, { "epoch": 31.75, "learning_rate": 0.0008854038969929701, "loss": 1.195, "step": 157900 }, { "epoch": 31.77, "learning_rate": 0.0008837257559682865, "loss": 1.1947, "step": 158000 }, { "epoch": 31.79, "learning_rate": 0.0008820483367888628, "loss": 1.1963, "step": 158100 }, { "epoch": 31.81, "learning_rate": 0.0008803716427605191, "loss": 1.1963, "step": 158200 }, { "epoch": 31.83, "learning_rate": 0.0008786956771876478, "loss": 1.1923, "step": 158300 }, { "epoch": 31.85, "learning_rate": 0.000877020443373205, "loss": 1.1971, "step": 158400 }, { "epoch": 31.87, "learning_rate": 0.0008753459446187053, "loss": 1.1951, "step": 158500 }, { "epoch": 31.89, "learning_rate": 0.0008736721842242136, "loss": 1.1981, "step": 158600 }, { "epoch": 31.91, "learning_rate": 0.0008719991654883402, "loss": 1.1991, "step": 158700 }, { "epoch": 31.93, "learning_rate": 0.0008703268917082342, "loss": 1.1949, "step": 158800 }, { "epoch": 31.95, "learning_rate": 0.0008686553661795765, "loss": 1.1921, "step": 158900 }, { "epoch": 31.97, "learning_rate": 0.0008669845921965718, "loss": 1.1974, "step": 159000 }, { "epoch": 31.99, "learning_rate": 0.0008653145730519456, "loss": 1.1944, "step": 159100 }, { "epoch": 32.0, "eval_accuracy": 0.4131173845092614, "eval_loss": 1.1602274179458618, "eval_runtime": 19.7773, "eval_samples_per_second": 4023.208, "eval_steps_per_second": 15.725, "step": 159144 }, { "epoch": 32.01, "learning_rate": 0.000863645312036935, "loss": 1.1882, "step": 159200 }, { "epoch": 32.03, "learning_rate": 0.0008619768124412836, "loss": 1.1821, "step": 159300 }, { "epoch": 32.05, "learning_rate": 0.000860309077553233, "loss": 1.1848, "step": 159400 }, { "epoch": 32.07, "learning_rate": 0.0008586421106595186, "loss": 1.1857, "step": 159500 }, { "epoch": 32.09, "learning_rate": 0.0008569759150453628, "loss": 1.1859, "step": 159600 }, { "epoch": 32.11, "learning_rate": 0.0008553104939944677, "loss": 1.189, "step": 159700 }, { "epoch": 32.13, "learning_rate": 0.0008536458507890077, "loss": 1.189, "step": 159800 }, { "epoch": 32.15, "learning_rate": 0.0008519819887096256, "loss": 1.1908, "step": 159900 }, { "epoch": 32.17, "learning_rate": 0.0008503189110354243, "loss": 1.1872, "step": 160000 }, { "epoch": 32.19, "learning_rate": 0.0008486566210439614, "loss": 1.1861, "step": 160100 }, { "epoch": 32.21, "learning_rate": 0.0008470117330755695, "loss": 1.1838, "step": 160200 }, { "epoch": 32.23, "learning_rate": 0.0008453510203175021, "loss": 1.1876, "step": 160300 }, { "epoch": 32.25, "learning_rate": 0.0008436911050327827, "loss": 1.1843, "step": 160400 }, { "epoch": 32.27, "learning_rate": 0.0008420319904927365, "loss": 1.1884, "step": 160500 }, { "epoch": 32.29, "learning_rate": 0.0008403736799671097, "loss": 1.191, "step": 160600 }, { "epoch": 32.31, "learning_rate": 0.000838716176724065, "loss": 1.1915, "step": 160700 }, { "epoch": 32.33, "learning_rate": 0.0008370594840301723, "loss": 1.188, "step": 160800 }, { "epoch": 32.35, "learning_rate": 0.000835403605150406, "loss": 1.1913, "step": 160900 }, { "epoch": 32.37, "learning_rate": 0.000833748543348136, "loss": 1.1905, "step": 161000 }, { "epoch": 32.39, "learning_rate": 0.0008320943018851221, "loss": 1.1876, "step": 161100 }, { "epoch": 32.41, "learning_rate": 0.0008304408840215062, "loss": 1.1882, "step": 161200 }, { "epoch": 32.43, "learning_rate": 0.0008287882930158088, "loss": 1.1874, "step": 161300 }, { "epoch": 32.45, "learning_rate": 0.0008271365321249197, "loss": 1.1908, "step": 161400 }, { "epoch": 32.47, "learning_rate": 0.0008254856046040937, "loss": 1.1903, "step": 161500 }, { "epoch": 32.49, "learning_rate": 0.0008238355137069418, "loss": 1.192, "step": 161600 }, { "epoch": 32.51, "learning_rate": 0.0008221862626854274, "loss": 1.1883, "step": 161700 }, { "epoch": 32.53, "learning_rate": 0.0008205378547898581, "loss": 1.1918, "step": 161800 }, { "epoch": 32.55, "learning_rate": 0.0008188902932688807, "loss": 1.1885, "step": 161900 }, { "epoch": 32.57, "learning_rate": 0.0008172435813694726, "loss": 1.1887, "step": 162000 }, { "epoch": 32.59, "learning_rate": 0.0008155977223369379, "loss": 1.1873, "step": 162100 }, { "epoch": 32.61, "learning_rate": 0.0008139527194148993, "loss": 1.1863, "step": 162200 }, { "epoch": 32.63, "learning_rate": 0.0008123085758452935, "loss": 1.1897, "step": 162300 }, { "epoch": 32.65, "learning_rate": 0.0008106652948683613, "loss": 1.1856, "step": 162400 }, { "epoch": 32.67, "learning_rate": 0.0008090392995776086, "loss": 1.1922, "step": 162500 }, { "epoch": 32.69, "learning_rate": 0.0008073977447932476, "loss": 1.1861, "step": 162600 }, { "epoch": 32.72, "learning_rate": 0.0008057570622797192, "loss": 1.1881, "step": 162700 }, { "epoch": 32.74, "learning_rate": 0.0008041336489961784, "loss": 1.1852, "step": 162800 }, { "epoch": 32.76, "learning_rate": 0.0008024947119195025, "loss": 1.1851, "step": 162900 }, { "epoch": 32.78, "learning_rate": 0.000800856656776449, "loss": 1.1819, "step": 163000 }, { "epoch": 32.8, "learning_rate": 0.0007992194867952607, "loss": 1.1882, "step": 163100 }, { "epoch": 32.82, "learning_rate": 0.0007975832052024367, "loss": 1.1864, "step": 163200 }, { "epoch": 32.84, "learning_rate": 0.0007959478152227251, "loss": 1.1914, "step": 163300 }, { "epoch": 32.86, "learning_rate": 0.0007943133200791164, "loss": 1.1888, "step": 163400 }, { "epoch": 32.88, "learning_rate": 0.0007926797229928376, "loss": 1.1831, "step": 163500 }, { "epoch": 32.9, "learning_rate": 0.0007910470271833464, "loss": 1.1878, "step": 163600 }, { "epoch": 32.92, "learning_rate": 0.0007894152358683243, "loss": 1.1889, "step": 163700 }, { "epoch": 32.94, "learning_rate": 0.0007877843522636694, "loss": 1.1872, "step": 163800 }, { "epoch": 32.96, "learning_rate": 0.0007861543795834913, "loss": 1.1851, "step": 163900 }, { "epoch": 32.98, "learning_rate": 0.0007845253210401045, "loss": 1.1905, "step": 164000 }, { "epoch": 33.0, "learning_rate": 0.0007828971798440226, "loss": 1.1887, "step": 164100 }, { "epoch": 33.0, "eval_accuracy": 0.41392475061534817, "eval_loss": 1.1511569023132324, "eval_runtime": 19.7992, "eval_samples_per_second": 4018.746, "eval_steps_per_second": 15.708, "step": 164117 }, { "epoch": 33.02, "learning_rate": 0.0007812699592039499, "loss": 1.1783, "step": 164200 }, { "epoch": 33.04, "learning_rate": 0.0007796436623267771, "loss": 1.1784, "step": 164300 }, { "epoch": 33.06, "learning_rate": 0.0007780182924175748, "loss": 1.178, "step": 164400 }, { "epoch": 33.08, "learning_rate": 0.0007763938526795867, "loss": 1.177, "step": 164500 }, { "epoch": 33.1, "learning_rate": 0.0007747703463142225, "loss": 1.1826, "step": 164600 }, { "epoch": 33.12, "learning_rate": 0.0007731477765210531, "loss": 1.1774, "step": 164700 }, { "epoch": 33.14, "learning_rate": 0.0007715261464978038, "loss": 1.1816, "step": 164800 }, { "epoch": 33.16, "learning_rate": 0.0007699054594403476, "loss": 1.1792, "step": 164900 }, { "epoch": 33.18, "learning_rate": 0.0007682857185426989, "loss": 1.1801, "step": 165000 }, { "epoch": 33.2, "learning_rate": 0.0007666669269970071, "loss": 1.1801, "step": 165100 }, { "epoch": 33.22, "learning_rate": 0.0007650490879935517, "loss": 1.1833, "step": 165200 }, { "epoch": 33.24, "learning_rate": 0.0007634322047207346, "loss": 1.1818, "step": 165300 }, { "epoch": 33.26, "learning_rate": 0.0007618162803650734, "loss": 1.1805, "step": 165400 }, { "epoch": 33.28, "learning_rate": 0.0007602013181111966, "loss": 1.179, "step": 165500 }, { "epoch": 33.3, "learning_rate": 0.0007585873211418363, "loss": 1.1792, "step": 165600 }, { "epoch": 33.32, "learning_rate": 0.000756974292637823, "loss": 1.1775, "step": 165700 }, { "epoch": 33.34, "learning_rate": 0.0007553622357780774, "loss": 1.183, "step": 165800 }, { "epoch": 33.36, "learning_rate": 0.0007537511537396058, "loss": 1.1838, "step": 165900 }, { "epoch": 33.38, "learning_rate": 0.0007521410496974935, "loss": 1.182, "step": 166000 }, { "epoch": 33.4, "learning_rate": 0.0007505319268248988, "loss": 1.1809, "step": 166100 }, { "epoch": 33.42, "learning_rate": 0.0007489237882930453, "loss": 1.1785, "step": 166200 }, { "epoch": 33.44, "learning_rate": 0.0007473166372712171, "loss": 1.184, "step": 166300 }, { "epoch": 33.46, "learning_rate": 0.0007457104769267522, "loss": 1.1809, "step": 166400 }, { "epoch": 33.48, "learning_rate": 0.000744105310425037, "loss": 1.1809, "step": 166500 }, { "epoch": 33.5, "learning_rate": 0.0007425011409294979, "loss": 1.1788, "step": 166600 }, { "epoch": 33.52, "learning_rate": 0.0007408979716015968, "loss": 1.1821, "step": 166700 }, { "epoch": 33.54, "learning_rate": 0.000739295805600825, "loss": 1.1827, "step": 166800 }, { "epoch": 33.56, "learning_rate": 0.0007376946460846965, "loss": 1.1819, "step": 166900 }, { "epoch": 33.58, "learning_rate": 0.0007360944962087409, "loss": 1.1793, "step": 167000 }, { "epoch": 33.6, "learning_rate": 0.0007344953591264986, "loss": 1.1827, "step": 167100 }, { "epoch": 33.62, "learning_rate": 0.000732897237989514, "loss": 1.1813, "step": 167200 }, { "epoch": 33.64, "learning_rate": 0.0007313001359473295, "loss": 1.1835, "step": 167300 }, { "epoch": 33.66, "learning_rate": 0.0007297040561474782, "loss": 1.1772, "step": 167400 }, { "epoch": 33.68, "learning_rate": 0.0007281090017354799, "loss": 1.1828, "step": 167500 }, { "epoch": 33.7, "learning_rate": 0.0007265149758548325, "loss": 1.182, "step": 167600 }, { "epoch": 33.72, "learning_rate": 0.0007249219816470082, "loss": 1.1799, "step": 167700 }, { "epoch": 33.74, "learning_rate": 0.0007233300222514435, "loss": 1.1794, "step": 167800 }, { "epoch": 33.76, "learning_rate": 0.0007217391008055382, "loss": 1.1759, "step": 167900 }, { "epoch": 33.78, "learning_rate": 0.0007201492204446453, "loss": 1.1799, "step": 168000 }, { "epoch": 33.8, "learning_rate": 0.0007185603843020663, "loss": 1.1757, "step": 168100 }, { "epoch": 33.82, "learning_rate": 0.0007169725955090442, "loss": 1.1791, "step": 168200 }, { "epoch": 33.84, "learning_rate": 0.0007153858571947587, "loss": 1.1777, "step": 168300 }, { "epoch": 33.86, "learning_rate": 0.0007138160241077916, "loss": 1.1769, "step": 168400 }, { "epoch": 33.88, "learning_rate": 0.0007122313855474593, "loss": 1.1821, "step": 168500 }, { "epoch": 33.9, "learning_rate": 0.0007106478068097369, "loss": 1.1786, "step": 168600 }, { "epoch": 33.92, "learning_rate": 0.0007090652910155055, "loss": 1.1816, "step": 168700 }, { "epoch": 33.94, "learning_rate": 0.0007074838412835532, "loss": 1.1778, "step": 168800 }, { "epoch": 33.96, "learning_rate": 0.0007059034607305667, "loss": 1.1819, "step": 168900 }, { "epoch": 33.98, "learning_rate": 0.0007043241524711256, "loss": 1.1795, "step": 169000 }, { "epoch": 34.0, "eval_accuracy": 0.4142251067155042, "eval_loss": 1.1453089714050293, "eval_runtime": 19.8806, "eval_samples_per_second": 4002.288, "eval_steps_per_second": 15.643, "step": 169090 }, { "epoch": 34.0, "learning_rate": 0.000702745919617695, "loss": 1.1752, "step": 169100 }, { "epoch": 34.02, "learning_rate": 0.0007011687652806225, "loss": 1.1677, "step": 169200 }, { "epoch": 34.04, "learning_rate": 0.000699592692568129, "loss": 1.17, "step": 169300 }, { "epoch": 34.06, "learning_rate": 0.0006980177045863047, "loss": 1.1725, "step": 169400 }, { "epoch": 34.08, "learning_rate": 0.0006964438044391006, "loss": 1.1729, "step": 169500 }, { "epoch": 34.1, "learning_rate": 0.0006948709952283247, "loss": 1.1716, "step": 169600 }, { "epoch": 34.12, "learning_rate": 0.0006932992800536353, "loss": 1.174, "step": 169700 }, { "epoch": 34.14, "learning_rate": 0.0006917286620125349, "loss": 1.1747, "step": 169800 }, { "epoch": 34.16, "learning_rate": 0.0006901591442003616, "loss": 1.1702, "step": 169900 }, { "epoch": 34.18, "learning_rate": 0.0006885907297102878, "loss": 1.1726, "step": 170000 }, { "epoch": 34.2, "learning_rate": 0.0006870234216333101, "loss": 1.1726, "step": 170100 }, { "epoch": 34.22, "learning_rate": 0.0006854572230582455, "loss": 1.1732, "step": 170200 }, { "epoch": 34.24, "learning_rate": 0.000683892137071723, "loss": 1.1736, "step": 170300 }, { "epoch": 34.26, "learning_rate": 0.00068232816675818, "loss": 1.1748, "step": 170400 }, { "epoch": 34.28, "learning_rate": 0.0006807653151998552, "loss": 1.1725, "step": 170500 }, { "epoch": 34.3, "learning_rate": 0.0006792035854767827, "loss": 1.1689, "step": 170600 }, { "epoch": 34.32, "learning_rate": 0.0006776429806667841, "loss": 1.1719, "step": 170700 }, { "epoch": 34.34, "learning_rate": 0.0006760835038454657, "loss": 1.1711, "step": 170800 }, { "epoch": 34.36, "learning_rate": 0.0006745407359349601, "loss": 1.1753, "step": 170900 }, { "epoch": 34.38, "learning_rate": 0.0006729835129523944, "loss": 1.1734, "step": 171000 }, { "epoch": 34.4, "learning_rate": 0.0006714274271412859, "loss": 1.174, "step": 171100 }, { "epoch": 34.42, "learning_rate": 0.0006698724815683352, "loss": 1.1727, "step": 171200 }, { "epoch": 34.44, "learning_rate": 0.0006683186792979937, "loss": 1.1714, "step": 171300 }, { "epoch": 34.46, "learning_rate": 0.0006667660233924612, "loss": 1.1702, "step": 171400 }, { "epoch": 34.48, "learning_rate": 0.0006652145169116783, "loss": 1.1687, "step": 171500 }, { "epoch": 34.5, "learning_rate": 0.0006636641629133204, "loss": 1.1715, "step": 171600 }, { "epoch": 34.52, "learning_rate": 0.0006621149644527902, "loss": 1.1716, "step": 171700 }, { "epoch": 34.54, "learning_rate": 0.0006605669245832145, "loss": 1.1719, "step": 171800 }, { "epoch": 34.56, "learning_rate": 0.0006590200463554366, "loss": 1.1726, "step": 171900 }, { "epoch": 34.59, "learning_rate": 0.0006574743328180105, "loss": 1.1751, "step": 172000 }, { "epoch": 34.61, "learning_rate": 0.0006559297870171938, "loss": 1.1714, "step": 172100 }, { "epoch": 34.63, "learning_rate": 0.0006543864119969438, "loss": 1.1752, "step": 172200 }, { "epoch": 34.65, "learning_rate": 0.0006528442107989105, "loss": 1.1756, "step": 172300 }, { "epoch": 34.67, "learning_rate": 0.0006513031864624303, "loss": 1.1723, "step": 172400 }, { "epoch": 34.69, "learning_rate": 0.0006497633420245197, "loss": 1.1723, "step": 172500 }, { "epoch": 34.71, "learning_rate": 0.0006482246805198708, "loss": 1.1735, "step": 172600 }, { "epoch": 34.73, "learning_rate": 0.0006466872049808438, "loss": 1.1732, "step": 172700 }, { "epoch": 34.75, "learning_rate": 0.0006451509184374624, "loss": 1.1732, "step": 172800 }, { "epoch": 34.77, "learning_rate": 0.0006436158239174055, "loss": 1.1751, "step": 172900 }, { "epoch": 34.79, "learning_rate": 0.0006420819244460042, "loss": 1.1721, "step": 173000 }, { "epoch": 34.81, "learning_rate": 0.0006405492230462343, "loss": 1.1709, "step": 173100 }, { "epoch": 34.83, "learning_rate": 0.0006390177227387101, "loss": 1.1696, "step": 173200 }, { "epoch": 34.85, "learning_rate": 0.0006374874265416783, "loss": 1.17, "step": 173300 }, { "epoch": 34.87, "learning_rate": 0.0006359583374710134, "loss": 1.1774, "step": 173400 }, { "epoch": 34.89, "learning_rate": 0.0006344304585402111, "loss": 1.1666, "step": 173500 }, { "epoch": 34.91, "learning_rate": 0.0006329037927603816, "loss": 1.1701, "step": 173600 }, { "epoch": 34.93, "learning_rate": 0.0006313783431402438, "loss": 1.1708, "step": 173700 }, { "epoch": 34.95, "learning_rate": 0.0006298541126861209, "loss": 1.1724, "step": 173800 }, { "epoch": 34.97, "learning_rate": 0.0006283311044019327, "loss": 1.1727, "step": 173900 }, { "epoch": 34.99, "learning_rate": 0.0006268093212891912, "loss": 1.1685, "step": 174000 }, { "epoch": 35.0, "eval_accuracy": 0.41499333743989697, "eval_loss": 1.1372462511062622, "eval_runtime": 19.9974, "eval_samples_per_second": 3978.919, "eval_steps_per_second": 15.552, "step": 174063 }, { "epoch": 35.01, "learning_rate": 0.0006253039658071285, "loss": 1.1616, "step": 174100 }, { "epoch": 35.03, "learning_rate": 0.0006237846297056513, "loss": 1.1594, "step": 174200 }, { "epoch": 35.05, "learning_rate": 0.0006222665277357129, "loss": 1.1619, "step": 174300 }, { "epoch": 35.07, "learning_rate": 0.0006207496628891555, "loss": 1.1655, "step": 174400 }, { "epoch": 35.09, "learning_rate": 0.0006192340381553838, "loss": 1.1588, "step": 174500 }, { "epoch": 35.11, "learning_rate": 0.0006177196565213567, "loss": 1.1639, "step": 174600 }, { "epoch": 35.13, "learning_rate": 0.0006162065209715849, "loss": 1.1594, "step": 174700 }, { "epoch": 35.15, "learning_rate": 0.0006146946344881228, "loss": 1.164, "step": 174800 }, { "epoch": 35.17, "learning_rate": 0.0006131840000505637, "loss": 1.1602, "step": 174900 }, { "epoch": 35.19, "learning_rate": 0.0006116746206360317, "loss": 1.1629, "step": 175000 }, { "epoch": 35.21, "learning_rate": 0.0006101664992191795, "loss": 1.1656, "step": 175100 }, { "epoch": 35.23, "learning_rate": 0.0006086596387721796, "loss": 1.166, "step": 175200 }, { "epoch": 35.25, "learning_rate": 0.0006071540422647201, "loss": 1.1632, "step": 175300 }, { "epoch": 35.27, "learning_rate": 0.0006056497126639966, "loss": 1.1649, "step": 175400 }, { "epoch": 35.29, "learning_rate": 0.0006041466529347094, "loss": 1.1621, "step": 175500 }, { "epoch": 35.31, "learning_rate": 0.0006026448660390557, "loss": 1.1657, "step": 175600 }, { "epoch": 35.33, "learning_rate": 0.0006011443549367248, "loss": 1.1663, "step": 175700 }, { "epoch": 35.35, "learning_rate": 0.0005996451225848903, "loss": 1.1682, "step": 175800 }, { "epoch": 35.37, "learning_rate": 0.0005981471719382066, "loss": 1.1618, "step": 175900 }, { "epoch": 35.39, "learning_rate": 0.0005966505059488022, "loss": 1.1687, "step": 176000 }, { "epoch": 35.41, "learning_rate": 0.0005951551275662743, "loss": 1.162, "step": 176100 }, { "epoch": 35.43, "learning_rate": 0.0005936610397376806, "loss": 1.1656, "step": 176200 }, { "epoch": 35.45, "learning_rate": 0.0005921682454075374, "loss": 1.1663, "step": 176300 }, { "epoch": 35.47, "learning_rate": 0.0005906767475178108, "loss": 1.163, "step": 176400 }, { "epoch": 35.49, "learning_rate": 0.0005891865490079131, "loss": 1.1663, "step": 176500 }, { "epoch": 35.51, "learning_rate": 0.0005876976528146937, "loss": 1.1646, "step": 176600 }, { "epoch": 35.53, "learning_rate": 0.000586210061872437, "loss": 1.1609, "step": 176700 }, { "epoch": 35.55, "learning_rate": 0.0005847237791128547, "loss": 1.1629, "step": 176800 }, { "epoch": 35.57, "learning_rate": 0.0005832388074650808, "loss": 1.1615, "step": 176900 }, { "epoch": 35.59, "learning_rate": 0.0005817551498556642, "loss": 1.1688, "step": 177000 }, { "epoch": 35.61, "learning_rate": 0.0005802728092085649, "loss": 1.1621, "step": 177100 }, { "epoch": 35.63, "learning_rate": 0.0005787917884451475, "loss": 1.1638, "step": 177200 }, { "epoch": 35.65, "learning_rate": 0.000577312090484176, "loss": 1.1656, "step": 177300 }, { "epoch": 35.67, "learning_rate": 0.0005758337182418055, "loss": 1.1625, "step": 177400 }, { "epoch": 35.69, "learning_rate": 0.0005743566746315804, "loss": 1.1613, "step": 177500 }, { "epoch": 35.71, "learning_rate": 0.0005728809625644257, "loss": 1.167, "step": 177600 }, { "epoch": 35.73, "learning_rate": 0.0005714065849486429, "loss": 1.1632, "step": 177700 }, { "epoch": 35.75, "learning_rate": 0.0005699335446899022, "loss": 1.1662, "step": 177800 }, { "epoch": 35.77, "learning_rate": 0.0005684618446912396, "loss": 1.1628, "step": 177900 }, { "epoch": 35.79, "learning_rate": 0.0005669914878530493, "loss": 1.1689, "step": 178000 }, { "epoch": 35.81, "learning_rate": 0.0005655224770730786, "loss": 1.1652, "step": 178100 }, { "epoch": 35.83, "learning_rate": 0.0005640694851778694, "loss": 1.1631, "step": 178200 }, { "epoch": 35.85, "learning_rate": 0.0005626031616641948, "loss": 1.1611, "step": 178300 }, { "epoch": 35.87, "learning_rate": 0.0005611381928571567, "loss": 1.1598, "step": 178400 }, { "epoch": 35.89, "learning_rate": 0.0005596745816438834, "loss": 1.1639, "step": 178500 }, { "epoch": 35.91, "learning_rate": 0.0005582123309088284, "loss": 1.1606, "step": 178600 }, { "epoch": 35.93, "learning_rate": 0.0005567514435337624, "loss": 1.1622, "step": 178700 }, { "epoch": 35.95, "learning_rate": 0.000555291922397771, "loss": 1.162, "step": 178800 }, { "epoch": 35.97, "learning_rate": 0.000553833770377246, "loss": 1.164, "step": 178900 }, { "epoch": 35.99, "learning_rate": 0.0005523769903458824, "loss": 1.1658, "step": 179000 }, { "epoch": 36.0, "eval_accuracy": 0.4156597525371182, "eval_loss": 1.1301237344741821, "eval_runtime": 19.5705, "eval_samples_per_second": 4065.719, "eval_steps_per_second": 15.891, "step": 179037 }, { "epoch": 36.01, "learning_rate": 0.0005509215851746693, "loss": 1.1566, "step": 179100 }, { "epoch": 36.03, "learning_rate": 0.0005494675577318875, "loss": 1.1495, "step": 179200 }, { "epoch": 36.05, "learning_rate": 0.0005480149108831029, "loss": 1.1467, "step": 179300 }, { "epoch": 36.07, "learning_rate": 0.0005465636474911603, "loss": 1.1519, "step": 179400 }, { "epoch": 36.09, "learning_rate": 0.000545113770416177, "loss": 1.1524, "step": 179500 }, { "epoch": 36.11, "learning_rate": 0.0005436652825155394, "loss": 1.1548, "step": 179600 }, { "epoch": 36.13, "learning_rate": 0.0005422181866438958, "loss": 1.1576, "step": 179700 }, { "epoch": 36.15, "learning_rate": 0.0005407724856531514, "loss": 1.1558, "step": 179800 }, { "epoch": 36.17, "learning_rate": 0.0005393281823924612, "loss": 1.1539, "step": 179900 }, { "epoch": 36.19, "learning_rate": 0.000537885279708227, "loss": 1.1562, "step": 180000 }, { "epoch": 36.21, "learning_rate": 0.0005364437804440896, "loss": 1.1551, "step": 180100 }, { "epoch": 36.23, "learning_rate": 0.0005350036874409248, "loss": 1.1554, "step": 180200 }, { "epoch": 36.25, "learning_rate": 0.0005335650035368354, "loss": 1.1547, "step": 180300 }, { "epoch": 36.27, "learning_rate": 0.0005321277315671485, "loss": 1.159, "step": 180400 }, { "epoch": 36.29, "learning_rate": 0.0005306918743644085, "loss": 1.1551, "step": 180500 }, { "epoch": 36.31, "learning_rate": 0.0005292574347583714, "loss": 1.1557, "step": 180600 }, { "epoch": 36.33, "learning_rate": 0.0005278244155759988, "loss": 1.1572, "step": 180700 }, { "epoch": 36.35, "learning_rate": 0.0005263928196414538, "loss": 1.1536, "step": 180800 }, { "epoch": 36.37, "learning_rate": 0.0005249626497760943, "loss": 1.1544, "step": 180900 }, { "epoch": 36.39, "learning_rate": 0.0005235339087984682, "loss": 1.1571, "step": 181000 }, { "epoch": 36.41, "learning_rate": 0.0005221065995243063, "loss": 1.1575, "step": 181100 }, { "epoch": 36.43, "learning_rate": 0.0005206807247665185, "loss": 1.1507, "step": 181200 }, { "epoch": 36.46, "learning_rate": 0.0005192562873351877, "loss": 1.1566, "step": 181300 }, { "epoch": 36.48, "learning_rate": 0.0005178332900375647, "loss": 1.1561, "step": 181400 }, { "epoch": 36.5, "learning_rate": 0.0005164117356780603, "loss": 1.1568, "step": 181500 }, { "epoch": 36.52, "learning_rate": 0.000514991627058243, "loss": 1.1599, "step": 181600 }, { "epoch": 36.54, "learning_rate": 0.0005135729669768321, "loss": 1.1601, "step": 181700 }, { "epoch": 36.56, "learning_rate": 0.0005121557582296915, "loss": 1.1561, "step": 181800 }, { "epoch": 36.58, "learning_rate": 0.0005107400036098259, "loss": 1.1579, "step": 181900 }, { "epoch": 36.6, "learning_rate": 0.0005093257059073723, "loss": 1.1532, "step": 182000 }, { "epoch": 36.62, "learning_rate": 0.0005079128679095983, "loss": 1.159, "step": 182100 }, { "epoch": 36.64, "learning_rate": 0.0005065014924008942, "loss": 1.1549, "step": 182200 }, { "epoch": 36.66, "learning_rate": 0.0005051056740029337, "loss": 1.1537, "step": 182300 }, { "epoch": 36.68, "learning_rate": 0.000503697217119769, "loss": 1.1526, "step": 182400 }, { "epoch": 36.7, "learning_rate": 0.0005022902310337858, "loss": 1.1616, "step": 182500 }, { "epoch": 36.72, "learning_rate": 0.000500884718517842, "loss": 1.1551, "step": 182600 }, { "epoch": 36.74, "learning_rate": 0.0004994806823418908, "loss": 1.1537, "step": 182700 }, { "epoch": 36.76, "learning_rate": 0.0004980781252729766, "loss": 1.1562, "step": 182800 }, { "epoch": 36.78, "learning_rate": 0.000496677050075227, "loss": 1.1569, "step": 182900 }, { "epoch": 36.8, "learning_rate": 0.000495277459509851, "loss": 1.1549, "step": 183000 }, { "epoch": 36.82, "learning_rate": 0.0004938793563351308, "loss": 1.1525, "step": 183100 }, { "epoch": 36.84, "learning_rate": 0.0004924827433064183, "loss": 1.1588, "step": 183200 }, { "epoch": 36.86, "learning_rate": 0.0004910876231761266, "loss": 1.1492, "step": 183300 }, { "epoch": 36.88, "learning_rate": 0.0004896939986937287, "loss": 1.1494, "step": 183400 }, { "epoch": 36.9, "learning_rate": 0.000488301872605749, "loss": 1.1526, "step": 183500 }, { "epoch": 36.92, "learning_rate": 0.00048691124765575953, "loss": 1.1514, "step": 183600 }, { "epoch": 36.94, "learning_rate": 0.0004855221265843726, "loss": 1.1562, "step": 183700 }, { "epoch": 36.96, "learning_rate": 0.00048413451212923826, "loss": 1.1559, "step": 183800 }, { "epoch": 36.98, "learning_rate": 0.0004827622605958196, "loss": 1.1564, "step": 183900 }, { "epoch": 37.0, "learning_rate": 0.0004813776524399174, "loss": 1.1529, "step": 184000 }, { "epoch": 37.0, "eval_accuracy": 0.416447232778523, "eval_loss": 1.121274709701538, "eval_runtime": 19.5197, "eval_samples_per_second": 4076.295, "eval_steps_per_second": 15.933, "step": 184010 }, { "epoch": 37.02, "learning_rate": 0.0004799945590681072, "loss": 1.1429, "step": 184100 }, { "epoch": 37.04, "learning_rate": 0.0004786129832061578, "loss": 1.143, "step": 184200 }, { "epoch": 37.06, "learning_rate": 0.00047723292757684944, "loss": 1.1453, "step": 184300 }, { "epoch": 37.08, "learning_rate": 0.00047585439489996554, "loss": 1.1486, "step": 184400 }, { "epoch": 37.1, "learning_rate": 0.0004744773878922883, "loss": 1.1424, "step": 184500 }, { "epoch": 37.12, "learning_rate": 0.0004731019092675921, "loss": 1.1469, "step": 184600 }, { "epoch": 37.14, "learning_rate": 0.00047172796173664076, "loss": 1.1446, "step": 184700 }, { "epoch": 37.16, "learning_rate": 0.0004703555480071799, "loss": 1.1482, "step": 184800 }, { "epoch": 37.18, "learning_rate": 0.00046898467078393294, "loss": 1.1476, "step": 184900 }, { "epoch": 37.2, "learning_rate": 0.00046761533276859366, "loss": 1.1463, "step": 185000 }, { "epoch": 37.22, "learning_rate": 0.0004662475366598239, "loss": 1.1455, "step": 185100 }, { "epoch": 37.24, "learning_rate": 0.00046488128515324634, "loss": 1.1469, "step": 185200 }, { "epoch": 37.26, "learning_rate": 0.00046351658094144005, "loss": 1.1439, "step": 185300 }, { "epoch": 37.28, "learning_rate": 0.0004621534267139332, "loss": 1.1458, "step": 185400 }, { "epoch": 37.3, "learning_rate": 0.00046079182515720076, "loss": 1.1454, "step": 185500 }, { "epoch": 37.32, "learning_rate": 0.00045943177895465734, "loss": 1.1487, "step": 185600 }, { "epoch": 37.34, "learning_rate": 0.0004580732907866525, "loss": 1.1434, "step": 185700 }, { "epoch": 37.36, "learning_rate": 0.00045671636333046426, "loss": 1.152, "step": 185800 }, { "epoch": 37.38, "learning_rate": 0.00045536099926029585, "loss": 1.1452, "step": 185900 }, { "epoch": 37.4, "learning_rate": 0.00045400720124726915, "loss": 1.1455, "step": 186000 }, { "epoch": 37.42, "learning_rate": 0.00045265497195942, "loss": 1.146, "step": 186100 }, { "epoch": 37.44, "learning_rate": 0.00045130431406169156, "loss": 1.1436, "step": 186200 }, { "epoch": 37.46, "learning_rate": 0.00044995523021593126, "loss": 1.1471, "step": 186300 }, { "epoch": 37.48, "learning_rate": 0.0004486077230808838, "loss": 1.1461, "step": 186400 }, { "epoch": 37.5, "learning_rate": 0.00044726179531218727, "loss": 1.1437, "step": 186500 }, { "epoch": 37.52, "learning_rate": 0.00044591744956236595, "loss": 1.1465, "step": 186600 }, { "epoch": 37.54, "learning_rate": 0.00044457468848082713, "loss": 1.1502, "step": 186700 }, { "epoch": 37.56, "learning_rate": 0.00044323351471385527, "loss": 1.1474, "step": 186800 }, { "epoch": 37.58, "learning_rate": 0.0004418939309046065, "loss": 1.1465, "step": 186900 }, { "epoch": 37.6, "learning_rate": 0.00044056931171319787, "loss": 1.1458, "step": 187000 }, { "epoch": 37.62, "learning_rate": 0.0004392328997709341, "loss": 1.1453, "step": 187100 }, { "epoch": 37.64, "learning_rate": 0.00043789808567071725, "loss": 1.1459, "step": 187200 }, { "epoch": 37.66, "learning_rate": 0.0004365648720431699, "loss": 1.144, "step": 187300 }, { "epoch": 37.68, "learning_rate": 0.0004352332615157606, "loss": 1.1478, "step": 187400 }, { "epoch": 37.7, "learning_rate": 0.00043390325671279736, "loss": 1.1437, "step": 187500 }, { "epoch": 37.72, "learning_rate": 0.00043257486025542497, "loss": 1.1467, "step": 187600 }, { "epoch": 37.74, "learning_rate": 0.0004312480747616181, "loss": 1.1486, "step": 187700 }, { "epoch": 37.76, "learning_rate": 0.00042992290284617695, "loss": 1.1459, "step": 187800 }, { "epoch": 37.78, "learning_rate": 0.00042859934712072045, "loss": 1.1466, "step": 187900 }, { "epoch": 37.8, "learning_rate": 0.00042727741019368354, "loss": 1.147, "step": 188000 }, { "epoch": 37.82, "learning_rate": 0.0004259570946703109, "loss": 1.1458, "step": 188100 }, { "epoch": 37.84, "learning_rate": 0.00042463840315265153, "loss": 1.1443, "step": 188200 }, { "epoch": 37.86, "learning_rate": 0.00042332133823955317, "loss": 1.1469, "step": 188300 }, { "epoch": 37.88, "learning_rate": 0.00042200590252665886, "loss": 1.1453, "step": 188400 }, { "epoch": 37.9, "learning_rate": 0.0004206920986064004, "loss": 1.1479, "step": 188500 }, { "epoch": 37.92, "learning_rate": 0.00041937992906799436, "loss": 1.1438, "step": 188600 }, { "epoch": 37.94, "learning_rate": 0.00041806939649743484, "loss": 1.1443, "step": 188700 }, { "epoch": 37.96, "learning_rate": 0.00041676050347749116, "loss": 1.1442, "step": 188800 }, { "epoch": 37.98, "learning_rate": 0.00041545325258770095, "loss": 1.1463, "step": 188900 }, { "epoch": 38.0, "eval_accuracy": 0.41712748843756287, "eval_loss": 1.113813042640686, "eval_runtime": 19.8226, "eval_samples_per_second": 4013.998, "eval_steps_per_second": 15.689, "step": 188983 }, { "epoch": 38.0, "learning_rate": 0.00041414764640436614, "loss": 1.1435, "step": 189000 }, { "epoch": 38.02, "learning_rate": 0.0004128436875005459, "loss": 1.1341, "step": 189100 }, { "epoch": 38.04, "learning_rate": 0.0004115413784460545, "loss": 1.1316, "step": 189200 }, { "epoch": 38.06, "learning_rate": 0.00041024072180745373, "loss": 1.1341, "step": 189300 }, { "epoch": 38.08, "learning_rate": 0.00040894172014805, "loss": 1.1296, "step": 189400 }, { "epoch": 38.1, "learning_rate": 0.00040764437602788627, "loss": 1.1366, "step": 189500 }, { "epoch": 38.12, "learning_rate": 0.00040634869200374056, "loss": 1.1403, "step": 189600 }, { "epoch": 38.14, "learning_rate": 0.00040505467062911833, "loss": 1.1389, "step": 189700 }, { "epoch": 38.16, "learning_rate": 0.00040376231445424904, "loss": 1.1316, "step": 189800 }, { "epoch": 38.18, "learning_rate": 0.00040247162602607923, "loss": 1.1382, "step": 189900 }, { "epoch": 38.2, "learning_rate": 0.00040118260788826965, "loss": 1.1355, "step": 190000 }, { "epoch": 38.22, "learning_rate": 0.00039989526258118915, "loss": 1.1365, "step": 190100 }, { "epoch": 38.24, "learning_rate": 0.00039860959264191007, "loss": 1.136, "step": 190200 }, { "epoch": 38.26, "learning_rate": 0.0003973256006042017, "loss": 1.1405, "step": 190300 }, { "epoch": 38.28, "learning_rate": 0.00039604328899852786, "loss": 1.1402, "step": 190400 }, { "epoch": 38.3, "learning_rate": 0.00039476266035204003, "loss": 1.1375, "step": 190500 }, { "epoch": 38.33, "learning_rate": 0.00039348371718857386, "loss": 1.1352, "step": 190600 }, { "epoch": 38.35, "learning_rate": 0.0003922064620286414, "loss": 1.1363, "step": 190700 }, { "epoch": 38.37, "learning_rate": 0.00039093089738942975, "loss": 1.1377, "step": 190800 }, { "epoch": 38.39, "learning_rate": 0.0003896570257847937, "loss": 1.1385, "step": 190900 }, { "epoch": 38.41, "learning_rate": 0.00038839756308466366, "loss": 1.1418, "step": 191000 }, { "epoch": 38.43, "learning_rate": 0.0003871270680844698, "loss": 1.1369, "step": 191100 }, { "epoch": 38.45, "learning_rate": 0.0003858582736153553, "loss": 1.1408, "step": 191200 }, { "epoch": 38.47, "learning_rate": 0.00038459118217783184, "loss": 1.1376, "step": 191300 }, { "epoch": 38.49, "learning_rate": 0.000383325796269056, "loss": 1.1401, "step": 191400 }, { "epoch": 38.51, "learning_rate": 0.0003820621183828224, "loss": 1.1371, "step": 191500 }, { "epoch": 38.53, "learning_rate": 0.0003808001510095603, "loss": 1.1346, "step": 191600 }, { "epoch": 38.55, "learning_rate": 0.0003795398966363266, "loss": 1.1439, "step": 191700 }, { "epoch": 38.57, "learning_rate": 0.0003782813577468032, "loss": 1.1426, "step": 191800 }, { "epoch": 38.59, "learning_rate": 0.00037702453682129114, "loss": 1.1383, "step": 191900 }, { "epoch": 38.61, "learning_rate": 0.00037576943633670605, "loss": 1.1362, "step": 192000 }, { "epoch": 38.63, "learning_rate": 0.0003745160587665715, "loss": 1.1356, "step": 192100 }, { "epoch": 38.65, "learning_rate": 0.000373264406581017, "loss": 1.1333, "step": 192200 }, { "epoch": 38.67, "learning_rate": 0.00037201448224677153, "loss": 1.14, "step": 192300 }, { "epoch": 38.69, "learning_rate": 0.0003707662882271585, "loss": 1.1349, "step": 192400 }, { "epoch": 38.71, "learning_rate": 0.00036951982698209094, "loss": 1.1371, "step": 192500 }, { "epoch": 38.73, "learning_rate": 0.0003682751009680678, "loss": 1.1391, "step": 192600 }, { "epoch": 38.75, "learning_rate": 0.0003670321126381676, "loss": 1.134, "step": 192700 }, { "epoch": 38.77, "learning_rate": 0.000365790864442045, "loss": 1.1358, "step": 192800 }, { "epoch": 38.79, "learning_rate": 0.0003645513588259242, "loss": 1.1331, "step": 192900 }, { "epoch": 38.81, "learning_rate": 0.0003633135982325958, "loss": 1.1376, "step": 193000 }, { "epoch": 38.83, "learning_rate": 0.0003620775851014114, "loss": 1.1353, "step": 193100 }, { "epoch": 38.85, "learning_rate": 0.00036084332186827895, "loss": 1.1376, "step": 193200 }, { "epoch": 38.87, "learning_rate": 0.0003596231273926687, "loss": 1.1346, "step": 193300 }, { "epoch": 38.89, "learning_rate": 0.0003583923536899551, "loss": 1.1375, "step": 193400 }, { "epoch": 38.91, "learning_rate": 0.0003571633371480666, "loss": 1.1387, "step": 193500 }, { "epoch": 38.93, "learning_rate": 0.0003559360801891219, "loss": 1.1375, "step": 193600 }, { "epoch": 38.95, "learning_rate": 0.00035471058523177265, "loss": 1.1357, "step": 193700 }, { "epoch": 38.97, "learning_rate": 0.00035348685469119664, "loss": 1.136, "step": 193800 }, { "epoch": 38.99, "learning_rate": 0.0003522648909790957, "loss": 1.1352, "step": 193900 }, { "epoch": 39.0, "eval_accuracy": 0.41790908246302183, "eval_loss": 1.1047524213790894, "eval_runtime": 19.8895, "eval_samples_per_second": 4000.506, "eval_steps_per_second": 15.636, "step": 193956 }, { "epoch": 39.01, "learning_rate": 0.000351044696503689, "loss": 1.1315, "step": 194000 }, { "epoch": 39.03, "learning_rate": 0.0003498262736697096, "loss": 1.1213, "step": 194100 }, { "epoch": 39.05, "learning_rate": 0.00034860962487839767, "loss": 1.126, "step": 194200 }, { "epoch": 39.07, "learning_rate": 0.00034739475252749854, "loss": 1.125, "step": 194300 }, { "epoch": 39.09, "learning_rate": 0.0003461816590112558, "loss": 1.1259, "step": 194400 }, { "epoch": 39.11, "learning_rate": 0.00034497034672040824, "loss": 1.1266, "step": 194500 }, { "epoch": 39.13, "learning_rate": 0.00034376081804218263, "loss": 1.1286, "step": 194600 }, { "epoch": 39.15, "learning_rate": 0.00034255307536029206, "loss": 1.1286, "step": 194700 }, { "epoch": 39.17, "learning_rate": 0.00034134712105492946, "loss": 1.1294, "step": 194800 }, { "epoch": 39.19, "learning_rate": 0.00034014295750276326, "loss": 1.1256, "step": 194900 }, { "epoch": 39.21, "learning_rate": 0.0003389405870769323, "loss": 1.1315, "step": 195000 }, { "epoch": 39.23, "learning_rate": 0.00033774001214704184, "loss": 1.1328, "step": 195100 }, { "epoch": 39.25, "learning_rate": 0.00033654123507915887, "loss": 1.1278, "step": 195200 }, { "epoch": 39.27, "learning_rate": 0.0003353562190853805, "loss": 1.1284, "step": 195300 }, { "epoch": 39.29, "learning_rate": 0.00033416102678803547, "loss": 1.1292, "step": 195400 }, { "epoch": 39.31, "learning_rate": 0.00033296763940608335, "loss": 1.1335, "step": 195500 }, { "epoch": 39.33, "learning_rate": 0.0003317760592914265, "loss": 1.1264, "step": 195600 }, { "epoch": 39.35, "learning_rate": 0.00033058628879240477, "loss": 1.1293, "step": 195700 }, { "epoch": 39.37, "learning_rate": 0.0003293983302537923, "loss": 1.1275, "step": 195800 }, { "epoch": 39.39, "learning_rate": 0.00032821218601679066, "loss": 1.127, "step": 195900 }, { "epoch": 39.41, "learning_rate": 0.00032702785841902783, "loss": 1.1287, "step": 196000 }, { "epoch": 39.43, "learning_rate": 0.0003258453497945503, "loss": 1.1278, "step": 196100 }, { "epoch": 39.45, "learning_rate": 0.0003246646624738207, "loss": 1.1296, "step": 196200 }, { "epoch": 39.47, "learning_rate": 0.00032348579878371114, "loss": 1.1309, "step": 196300 }, { "epoch": 39.49, "learning_rate": 0.00032230876104750046, "loss": 1.1274, "step": 196400 }, { "epoch": 39.51, "learning_rate": 0.000321133551584869, "loss": 1.1295, "step": 196500 }, { "epoch": 39.53, "learning_rate": 0.00031996017271189437, "loss": 1.1271, "step": 196600 }, { "epoch": 39.55, "learning_rate": 0.0003187886267410448, "loss": 1.1254, "step": 196700 }, { "epoch": 39.57, "learning_rate": 0.0003176189159811782, "loss": 1.1303, "step": 196800 }, { "epoch": 39.59, "learning_rate": 0.00031645104273753474, "loss": 1.1273, "step": 196900 }, { "epoch": 39.61, "learning_rate": 0.0003152850093117338, "loss": 1.13, "step": 197000 }, { "epoch": 39.63, "learning_rate": 0.0003141208180017678, "loss": 1.1253, "step": 197100 }, { "epoch": 39.65, "learning_rate": 0.00031295847110199976, "loss": 1.1286, "step": 197200 }, { "epoch": 39.67, "learning_rate": 0.0003117979709031578, "loss": 1.1295, "step": 197300 }, { "epoch": 39.69, "learning_rate": 0.0003106393196923304, "loss": 1.1279, "step": 197400 }, { "epoch": 39.71, "learning_rate": 0.00030949407858107237, "loss": 1.1279, "step": 197500 }, { "epoch": 39.73, "learning_rate": 0.00030833911364617274, "loss": 1.1268, "step": 197600 }, { "epoch": 39.75, "learning_rate": 0.0003071860045159272, "loss": 1.1229, "step": 197700 }, { "epoch": 39.77, "learning_rate": 0.00030603475346285824, "loss": 1.1263, "step": 197800 }, { "epoch": 39.79, "learning_rate": 0.0003048853627558264, "loss": 1.1254, "step": 197900 }, { "epoch": 39.81, "learning_rate": 0.0003037378346600259, "loss": 1.1294, "step": 198000 }, { "epoch": 39.83, "learning_rate": 0.0003025921714369792, "loss": 1.1278, "step": 198100 }, { "epoch": 39.85, "learning_rate": 0.0003014483753445349, "loss": 1.1253, "step": 198200 }, { "epoch": 39.87, "learning_rate": 0.000300306448636861, "loss": 1.1272, "step": 198300 }, { "epoch": 39.89, "learning_rate": 0.0002991663935644423, "loss": 1.1225, "step": 198400 }, { "epoch": 39.91, "learning_rate": 0.0002980282123740735, "loss": 1.1233, "step": 198500 }, { "epoch": 39.93, "learning_rate": 0.00029689190730885747, "loss": 1.1229, "step": 198600 }, { "epoch": 39.95, "learning_rate": 0.00029575748060819946, "loss": 1.1273, "step": 198700 }, { "epoch": 39.97, "learning_rate": 0.00029462493450780307, "loss": 1.1271, "step": 198800 }, { "epoch": 39.99, "learning_rate": 0.0002934942712396647, "loss": 1.1259, "step": 198900 }, { "epoch": 40.0, "eval_accuracy": 0.41850995375025135, "eval_loss": 1.096311330795288, "eval_runtime": 19.5694, "eval_samples_per_second": 4065.946, "eval_steps_per_second": 15.892, "step": 198930 }, { "epoch": 40.01, "learning_rate": 0.0002923654930320711, "loss": 1.116, "step": 199000 }, { "epoch": 40.03, "learning_rate": 0.0002912386021095936, "loss": 1.1166, "step": 199100 }, { "epoch": 40.05, "learning_rate": 0.00029011360069308446, "loss": 1.1156, "step": 199200 }, { "epoch": 40.07, "learning_rate": 0.0002889904909996713, "loss": 1.1193, "step": 199300 }, { "epoch": 40.09, "learning_rate": 0.0002878692752427543, "loss": 1.1196, "step": 199400 }, { "epoch": 40.11, "learning_rate": 0.0002867611394349388, "loss": 1.123, "step": 199500 }, { "epoch": 40.13, "learning_rate": 0.0002856436991818517, "loss": 1.117, "step": 199600 }, { "epoch": 40.15, "learning_rate": 0.00028452815946104537, "loss": 1.1189, "step": 199700 }, { "epoch": 40.17, "learning_rate": 0.00028341452247099996, "loss": 1.1162, "step": 199800 }, { "epoch": 40.2, "learning_rate": 0.0002823027904064475, "loss": 1.1149, "step": 199900 }, { "epoch": 40.22, "learning_rate": 0.00028119296545836515, "loss": 1.1151, "step": 200000 }, { "epoch": 40.24, "learning_rate": 0.000280085049813972, "loss": 1.1168, "step": 200100 }, { "epoch": 40.26, "learning_rate": 0.00027897904565672314, "loss": 1.1182, "step": 200200 }, { "epoch": 40.28, "learning_rate": 0.00027787495516630776, "loss": 1.1149, "step": 200300 }, { "epoch": 40.3, "learning_rate": 0.0002767727805186432, "loss": 1.1155, "step": 200400 }, { "epoch": 40.32, "learning_rate": 0.0002756725238858715, "loss": 1.1182, "step": 200500 }, { "epoch": 40.34, "learning_rate": 0.00027457418743635374, "loss": 1.1201, "step": 200600 }, { "epoch": 40.36, "learning_rate": 0.00027347777333466746, "loss": 1.1188, "step": 200700 }, { "epoch": 40.38, "learning_rate": 0.0002723832837416017, "loss": 1.1222, "step": 200800 }, { "epoch": 40.4, "learning_rate": 0.0002712907208141528, "loss": 1.1186, "step": 200900 }, { "epoch": 40.42, "learning_rate": 0.00027020008670551935, "loss": 1.121, "step": 201000 }, { "epoch": 40.44, "learning_rate": 0.0002691113835650995, "loss": 1.1172, "step": 201100 }, { "epoch": 40.46, "learning_rate": 0.0002680246135384853, "loss": 1.1223, "step": 201200 }, { "epoch": 40.48, "learning_rate": 0.00026693977876746, "loss": 1.118, "step": 201300 }, { "epoch": 40.5, "learning_rate": 0.0002658568813899914, "loss": 1.1155, "step": 201400 }, { "epoch": 40.52, "learning_rate": 0.00026478672351106777, "loss": 1.1191, "step": 201500 }, { "epoch": 40.54, "learning_rate": 0.00026370768789222914, "loss": 1.1167, "step": 201600 }, { "epoch": 40.56, "learning_rate": 0.0002626305960366815, "loss": 1.1194, "step": 201700 }, { "epoch": 40.58, "learning_rate": 0.00026155545006713424, "loss": 1.1171, "step": 201800 }, { "epoch": 40.6, "learning_rate": 0.00026048225210246063, "loss": 1.1199, "step": 201900 }, { "epoch": 40.62, "learning_rate": 0.0002594110042576962, "loss": 1.1161, "step": 202000 }, { "epoch": 40.64, "learning_rate": 0.00025834170864403287, "loss": 1.1146, "step": 202100 }, { "epoch": 40.66, "learning_rate": 0.00025727436736881505, "loss": 1.117, "step": 202200 }, { "epoch": 40.68, "learning_rate": 0.00025620898253553515, "loss": 1.1194, "step": 202300 }, { "epoch": 40.7, "learning_rate": 0.00025514555624383053, "loss": 1.1188, "step": 202400 }, { "epoch": 40.72, "learning_rate": 0.0002540840905894784, "loss": 1.1166, "step": 202500 }, { "epoch": 40.74, "learning_rate": 0.0002530245876643923, "loss": 1.1188, "step": 202600 }, { "epoch": 40.76, "learning_rate": 0.0002519670495566169, "loss": 1.1191, "step": 202700 }, { "epoch": 40.78, "learning_rate": 0.00025091147835032526, "loss": 1.1151, "step": 202800 }, { "epoch": 40.8, "learning_rate": 0.00024985787612581423, "loss": 1.1194, "step": 202900 }, { "epoch": 40.82, "learning_rate": 0.00024880624495950024, "loss": 1.1167, "step": 203000 }, { "epoch": 40.84, "learning_rate": 0.00024775658692391416, "loss": 1.1208, "step": 203100 }, { "epoch": 40.86, "learning_rate": 0.0002467089040876995, "loss": 1.1195, "step": 203200 }, { "epoch": 40.88, "learning_rate": 0.00024566319851560675, "loss": 1.1142, "step": 203300 }, { "epoch": 40.9, "learning_rate": 0.00024461947226848984, "loss": 1.1159, "step": 203400 }, { "epoch": 40.92, "learning_rate": 0.00024357772740330076, "loss": 1.1104, "step": 203500 }, { "epoch": 40.94, "learning_rate": 0.00024254835376265563, "loss": 1.1169, "step": 203600 }, { "epoch": 40.96, "learning_rate": 0.00024151055795158666, "loss": 1.1174, "step": 203700 }, { "epoch": 40.98, "learning_rate": 0.00024047474964942626, "loss": 1.1196, "step": 203800 }, { "epoch": 41.0, "learning_rate": 0.00023944093089752302, "loss": 1.1194, "step": 203900 }, { "epoch": 41.0, "eval_accuracy": 0.41941595374516055, "eval_loss": 1.0883480310440063, "eval_runtime": 19.8865, "eval_samples_per_second": 4001.112, "eval_steps_per_second": 15.639, "step": 203903 }, { "epoch": 41.02, "learning_rate": 0.00023840910373330374, "loss": 1.1042, "step": 204000 }, { "epoch": 41.04, "learning_rate": 0.00023737927019027105, "loss": 1.1048, "step": 204100 }, { "epoch": 41.06, "learning_rate": 0.00023635143229799844, "loss": 1.1014, "step": 204200 }, { "epoch": 41.08, "learning_rate": 0.0002353255920821265, "loss": 1.1069, "step": 204300 }, { "epoch": 41.1, "learning_rate": 0.0002343017515643582, "loss": 1.107, "step": 204400 }, { "epoch": 41.12, "learning_rate": 0.00023327991276245617, "loss": 1.1049, "step": 204500 }, { "epoch": 41.14, "learning_rate": 0.00023226007769023795, "loss": 1.106, "step": 204600 }, { "epoch": 41.16, "learning_rate": 0.00023124224835757254, "loss": 1.109, "step": 204700 }, { "epoch": 41.18, "learning_rate": 0.00023022642677037505, "loss": 1.1107, "step": 204800 }, { "epoch": 41.2, "learning_rate": 0.0002292126149306048, "loss": 1.1064, "step": 204900 }, { "epoch": 41.22, "learning_rate": 0.00022820081483625993, "loss": 1.1077, "step": 205000 }, { "epoch": 41.24, "learning_rate": 0.00022719102848137426, "loss": 1.1066, "step": 205100 }, { "epoch": 41.26, "learning_rate": 0.0002261832578560119, "loss": 1.1104, "step": 205200 }, { "epoch": 41.28, "learning_rate": 0.00022517750494626537, "loss": 1.1109, "step": 205300 }, { "epoch": 41.3, "learning_rate": 0.00022417377173425068, "loss": 1.1095, "step": 205400 }, { "epoch": 41.32, "learning_rate": 0.00022317206019810355, "loss": 1.1118, "step": 205500 }, { "epoch": 41.34, "learning_rate": 0.00022218235916729556, "loss": 1.1085, "step": 205600 }, { "epoch": 41.36, "learning_rate": 0.00022118467663540713, "loss": 1.1091, "step": 205700 }, { "epoch": 41.38, "learning_rate": 0.00022018902167022874, "loss": 1.1113, "step": 205800 }, { "epoch": 41.4, "learning_rate": 0.00021919539623397572, "loss": 1.1041, "step": 205900 }, { "epoch": 41.42, "learning_rate": 0.00021820380228486328, "loss": 1.1076, "step": 206000 }, { "epoch": 41.44, "learning_rate": 0.00021721424177710335, "loss": 1.1081, "step": 206100 }, { "epoch": 41.46, "learning_rate": 0.00021622671666089957, "loss": 1.1087, "step": 206200 }, { "epoch": 41.48, "learning_rate": 0.00021524122888244514, "loss": 1.1065, "step": 206300 }, { "epoch": 41.5, "learning_rate": 0.00021425778038391768, "loss": 1.1096, "step": 206400 }, { "epoch": 41.52, "learning_rate": 0.00021327637310347648, "loss": 1.1105, "step": 206500 }, { "epoch": 41.54, "learning_rate": 0.0002122970089752567, "loss": 1.109, "step": 206600 }, { "epoch": 41.56, "learning_rate": 0.00021131968992936804, "loss": 1.1114, "step": 206700 }, { "epoch": 41.58, "learning_rate": 0.00021034441789188953, "loss": 1.1096, "step": 206800 }, { "epoch": 41.6, "learning_rate": 0.00020937119478486616, "loss": 1.1161, "step": 206900 }, { "epoch": 41.62, "learning_rate": 0.00020840002252630428, "loss": 1.1108, "step": 207000 }, { "epoch": 41.64, "learning_rate": 0.00020743090303016923, "loss": 1.1092, "step": 207100 }, { "epoch": 41.66, "learning_rate": 0.00020646383820638022, "loss": 1.1069, "step": 207200 }, { "epoch": 41.68, "learning_rate": 0.00020549882996080793, "loss": 1.108, "step": 207300 }, { "epoch": 41.7, "learning_rate": 0.0002045358801952689, "loss": 1.1072, "step": 207400 }, { "epoch": 41.72, "learning_rate": 0.00020357499080752366, "loss": 1.108, "step": 207500 }, { "epoch": 41.74, "learning_rate": 0.00020261616369127183, "loss": 1.1074, "step": 207600 }, { "epoch": 41.76, "learning_rate": 0.0002016689581419083, "loss": 1.1075, "step": 207700 }, { "epoch": 41.78, "learning_rate": 0.00020071424056369538, "loss": 1.1085, "step": 207800 }, { "epoch": 41.8, "learning_rate": 0.00019976159089487967, "loss": 1.1082, "step": 207900 }, { "epoch": 41.82, "learning_rate": 0.0001988110110129225, "loss": 1.1102, "step": 208000 }, { "epoch": 41.84, "learning_rate": 0.00019786250279120606, "loss": 1.1108, "step": 208100 }, { "epoch": 41.86, "learning_rate": 0.00019691606809902964, "loss": 1.108, "step": 208200 }, { "epoch": 41.88, "learning_rate": 0.00019597170880160568, "loss": 1.1053, "step": 208300 }, { "epoch": 41.9, "learning_rate": 0.00019502942676005705, "loss": 1.1093, "step": 208400 }, { "epoch": 41.92, "learning_rate": 0.00019408922383141235, "loss": 1.109, "step": 208500 }, { "epoch": 41.94, "learning_rate": 0.0001931511018686033, "loss": 1.1064, "step": 208600 }, { "epoch": 41.96, "learning_rate": 0.0001922150627204597, "loss": 1.1058, "step": 208700 }, { "epoch": 41.98, "learning_rate": 0.00019128110823170717, "loss": 1.1035, "step": 208800 }, { "epoch": 42.0, "eval_accuracy": 0.42008968684058473, "eval_loss": 1.080112099647522, "eval_runtime": 19.7792, "eval_samples_per_second": 4022.819, "eval_steps_per_second": 15.724, "step": 208876 }, { "epoch": 42.0, "learning_rate": 0.00019034924024296282, "loss": 1.1021, "step": 208900 }, { "epoch": 42.02, "learning_rate": 0.00018941946059073192, "loss": 1.097, "step": 209000 }, { "epoch": 42.04, "learning_rate": 0.0001884917711074033, "loss": 1.0949, "step": 209100 }, { "epoch": 42.07, "learning_rate": 0.00018756617362124722, "loss": 1.094, "step": 209200 }, { "epoch": 42.09, "learning_rate": 0.00018664266995641125, "loss": 1.0974, "step": 209300 }, { "epoch": 42.11, "learning_rate": 0.00018572126193291613, "loss": 1.1003, "step": 209400 }, { "epoch": 42.13, "learning_rate": 0.00018480195136665227, "loss": 1.0985, "step": 209500 }, { "epoch": 42.15, "learning_rate": 0.00018388474006937703, "loss": 1.0984, "step": 209600 }, { "epoch": 42.17, "learning_rate": 0.0001829787705446627, "loss": 1.0988, "step": 209700 }, { "epoch": 42.19, "learning_rate": 0.0001820657421663692, "loss": 1.0996, "step": 209800 }, { "epoch": 42.21, "learning_rate": 0.00018115481844952584, "loss": 1.099, "step": 209900 }, { "epoch": 42.23, "learning_rate": 0.00018024600118936058, "loss": 1.0987, "step": 210000 }, { "epoch": 42.25, "learning_rate": 0.0001793392921769506, "loss": 1.1014, "step": 210100 }, { "epoch": 42.27, "learning_rate": 0.0001784346931992184, "loss": 1.1018, "step": 210200 }, { "epoch": 42.29, "learning_rate": 0.0001775322060389281, "loss": 1.0979, "step": 210300 }, { "epoch": 42.31, "learning_rate": 0.000176631832474681, "loss": 1.1027, "step": 210400 }, { "epoch": 42.33, "learning_rate": 0.00017573357428091394, "loss": 1.0946, "step": 210500 }, { "epoch": 42.35, "learning_rate": 0.0001748374332278946, "loss": 1.0991, "step": 210600 }, { "epoch": 42.37, "learning_rate": 0.00017394341108171812, "loss": 1.1001, "step": 210700 }, { "epoch": 42.39, "learning_rate": 0.00017305150960430338, "loss": 1.0999, "step": 210800 }, { "epoch": 42.41, "learning_rate": 0.00017216173055339067, "loss": 1.1005, "step": 210900 }, { "epoch": 42.43, "learning_rate": 0.0001712740756825369, "loss": 1.0971, "step": 211000 }, { "epoch": 42.45, "learning_rate": 0.0001703885467411133, "loss": 1.1002, "step": 211100 }, { "epoch": 42.47, "learning_rate": 0.00016950514547430003, "loss": 1.1011, "step": 211200 }, { "epoch": 42.49, "learning_rate": 0.00016862387362308565, "loss": 1.1001, "step": 211300 }, { "epoch": 42.51, "learning_rate": 0.00016774473292426124, "loss": 1.0965, "step": 211400 }, { "epoch": 42.53, "learning_rate": 0.00016686772511041823, "loss": 1.0978, "step": 211500 }, { "epoch": 42.55, "learning_rate": 0.00016599285190994393, "loss": 1.098, "step": 211600 }, { "epoch": 42.57, "learning_rate": 0.00016512011504701954, "loss": 1.0967, "step": 211700 }, { "epoch": 42.59, "learning_rate": 0.00016425821164064812, "loss": 1.0975, "step": 211800 }, { "epoch": 42.61, "learning_rate": 0.00016338973120231013, "loss": 1.1008, "step": 211900 }, { "epoch": 42.63, "learning_rate": 0.00016252339223169542, "loss": 1.1004, "step": 212000 }, { "epoch": 42.65, "learning_rate": 0.0001616591964361662, "loss": 1.0986, "step": 212100 }, { "epoch": 42.67, "learning_rate": 0.00016079714551885991, "loss": 1.0978, "step": 212200 }, { "epoch": 42.69, "learning_rate": 0.00015993724117868786, "loss": 1.1004, "step": 212300 }, { "epoch": 42.71, "learning_rate": 0.00015907948511033082, "loss": 1.0987, "step": 212400 }, { "epoch": 42.73, "learning_rate": 0.00015822387900423591, "loss": 1.1042, "step": 212500 }, { "epoch": 42.75, "learning_rate": 0.00015737042454661232, "loss": 1.0976, "step": 212600 }, { "epoch": 42.77, "learning_rate": 0.00015652762576620364, "loss": 1.1006, "step": 212700 }, { "epoch": 42.79, "learning_rate": 0.00015567845808881514, "loss": 1.1024, "step": 212800 }, { "epoch": 42.81, "learning_rate": 0.00015483144707635803, "loss": 1.0976, "step": 212900 }, { "epoch": 42.83, "learning_rate": 0.0001539865943981035, "loss": 1.0966, "step": 213000 }, { "epoch": 42.85, "learning_rate": 0.0001531439017190679, "loss": 1.1029, "step": 213100 }, { "epoch": 42.87, "learning_rate": 0.0001523033707000121, "loss": 1.0952, "step": 213200 }, { "epoch": 42.89, "learning_rate": 0.00015146500299743584, "loss": 1.0991, "step": 213300 }, { "epoch": 42.91, "learning_rate": 0.0001506288002635764, "loss": 1.0977, "step": 213400 }, { "epoch": 42.93, "learning_rate": 0.00014979476414640313, "loss": 1.095, "step": 213500 }, { "epoch": 42.95, "learning_rate": 0.00014896289628961654, "loss": 1.0932, "step": 213600 }, { "epoch": 42.97, "learning_rate": 0.00014813319833264339, "loss": 1.0971, "step": 213700 }, { "epoch": 42.99, "learning_rate": 0.00014730567191063472, "loss": 1.0962, "step": 213800 }, { "epoch": 43.0, "eval_accuracy": 0.4207685107173674, "eval_loss": 1.071601152420044, "eval_runtime": 19.6492, "eval_samples_per_second": 4049.421, "eval_steps_per_second": 15.828, "step": 213849 }, { "epoch": 43.01, "learning_rate": 0.00014648031865446083, "loss": 1.0912, "step": 213900 }, { "epoch": 43.03, "learning_rate": 0.00014565714019071016, "loss": 1.0872, "step": 214000 }, { "epoch": 43.05, "learning_rate": 0.00014483613814168474, "loss": 1.0874, "step": 214100 }, { "epoch": 43.07, "learning_rate": 0.0001440173141253978, "loss": 1.0936, "step": 214200 }, { "epoch": 43.09, "learning_rate": 0.00014320066975556919, "loss": 1.0912, "step": 214300 }, { "epoch": 43.11, "learning_rate": 0.000142386206641624, "loss": 1.0885, "step": 214400 }, { "epoch": 43.13, "learning_rate": 0.00014157392638868823, "loss": 1.0879, "step": 214500 }, { "epoch": 43.15, "learning_rate": 0.00014076383059758642, "loss": 1.0882, "step": 214600 }, { "epoch": 43.17, "learning_rate": 0.0001399559208648371, "loss": 1.0894, "step": 214700 }, { "epoch": 43.19, "learning_rate": 0.00013915019878265114, "loss": 1.0883, "step": 214800 }, { "epoch": 43.21, "learning_rate": 0.0001383466659389282, "loss": 1.0865, "step": 214900 }, { "epoch": 43.23, "learning_rate": 0.00013754532391725333, "loss": 1.0899, "step": 215000 }, { "epoch": 43.25, "learning_rate": 0.0001367461742968934, "loss": 1.09, "step": 215100 }, { "epoch": 43.27, "learning_rate": 0.0001359492186527951, "loss": 1.0892, "step": 215200 }, { "epoch": 43.29, "learning_rate": 0.0001351544585555814, "loss": 1.0893, "step": 215300 }, { "epoch": 43.31, "learning_rate": 0.00013436189557154823, "loss": 1.0895, "step": 215400 }, { "epoch": 43.33, "learning_rate": 0.0001335715312626608, "loss": 1.0932, "step": 215500 }, { "epoch": 43.35, "learning_rate": 0.00013278336718655206, "loss": 1.0897, "step": 215600 }, { "epoch": 43.37, "learning_rate": 0.00013199740489651862, "loss": 1.0899, "step": 215700 }, { "epoch": 43.39, "learning_rate": 0.0001312136459415178, "loss": 1.0911, "step": 215800 }, { "epoch": 43.41, "learning_rate": 0.00013043209186616432, "loss": 1.0859, "step": 215900 }, { "epoch": 43.43, "learning_rate": 0.000129652744210728, "loss": 1.0889, "step": 216000 }, { "epoch": 43.45, "learning_rate": 0.00012887560451113, "loss": 1.0887, "step": 216100 }, { "epoch": 43.47, "learning_rate": 0.0001281006742989406, "loss": 1.0937, "step": 216200 }, { "epoch": 43.49, "learning_rate": 0.0001273279551013748, "loss": 1.0875, "step": 216300 }, { "epoch": 43.51, "learning_rate": 0.00012655744844129082, "loss": 1.0894, "step": 216400 }, { "epoch": 43.53, "learning_rate": 0.0001257891558371864, "loss": 1.088, "step": 216500 }, { "epoch": 43.55, "learning_rate": 0.00012502307880319613, "loss": 1.0907, "step": 216600 }, { "epoch": 43.57, "learning_rate": 0.00012426684646913629, "loss": 1.0884, "step": 216700 }, { "epoch": 43.59, "learning_rate": 0.0001235051829070172, "loss": 1.0873, "step": 216800 }, { "epoch": 43.61, "learning_rate": 0.00012274573941621584, "loss": 1.0884, "step": 216900 }, { "epoch": 43.63, "learning_rate": 0.00012198851749342654, "loss": 1.0917, "step": 217000 }, { "epoch": 43.65, "learning_rate": 0.00012123351863096627, "loss": 1.0934, "step": 217100 }, { "epoch": 43.67, "learning_rate": 0.0001204807443167695, "loss": 1.0904, "step": 217200 }, { "epoch": 43.69, "learning_rate": 0.00011973019603438773, "loss": 1.0906, "step": 217300 }, { "epoch": 43.71, "learning_rate": 0.00011898187526298495, "loss": 1.0889, "step": 217400 }, { "epoch": 43.73, "learning_rate": 0.00011823578347733579, "loss": 1.09, "step": 217500 }, { "epoch": 43.75, "learning_rate": 0.00011749192214782101, "loss": 1.0912, "step": 217600 }, { "epoch": 43.77, "learning_rate": 0.00011675029274042654, "loss": 1.0905, "step": 217700 }, { "epoch": 43.79, "learning_rate": 0.0001160108967167392, "loss": 1.0874, "step": 217800 }, { "epoch": 43.81, "learning_rate": 0.0001152737355339449, "loss": 1.0902, "step": 217900 }, { "epoch": 43.83, "learning_rate": 0.00011453881064482418, "loss": 1.088, "step": 218000 }, { "epoch": 43.85, "learning_rate": 0.0001138061234977511, "loss": 1.0878, "step": 218100 }, { "epoch": 43.87, "learning_rate": 0.00011307567553668963, "loss": 1.0927, "step": 218200 }, { "epoch": 43.89, "learning_rate": 0.00011234746820119087, "loss": 1.0882, "step": 218300 }, { "epoch": 43.91, "learning_rate": 0.00011162150292638924, "loss": 1.0927, "step": 218400 }, { "epoch": 43.94, "learning_rate": 0.00011089778114300134, "loss": 1.0885, "step": 218500 }, { "epoch": 43.96, "learning_rate": 0.00011017630427732253, "loss": 1.0884, "step": 218600 }, { "epoch": 43.98, "learning_rate": 0.00010945707375122376, "loss": 1.0878, "step": 218700 }, { "epoch": 44.0, "learning_rate": 0.0001087472496787982, "loss": 1.0855, "step": 218800 }, { "epoch": 44.0, "eval_accuracy": 0.4214934698002123, "eval_loss": 1.0645169019699097, "eval_runtime": 19.7931, "eval_samples_per_second": 4019.996, "eval_steps_per_second": 15.713, "step": 218823 }, { "epoch": 44.02, "learning_rate": 0.00010803249358108017, "loss": 1.0767, "step": 218900 }, { "epoch": 44.04, "learning_rate": 0.00010731998804791671, "loss": 1.0805, "step": 219000 }, { "epoch": 44.06, "learning_rate": 0.00010660973448349847, "loss": 1.0767, "step": 219100 }, { "epoch": 44.08, "learning_rate": 0.00010590173428757774, "loss": 1.0796, "step": 219200 }, { "epoch": 44.1, "learning_rate": 0.00010519598885546585, "loss": 1.0782, "step": 219300 }, { "epoch": 44.12, "learning_rate": 0.00010449249957803011, "loss": 1.0765, "step": 219400 }, { "epoch": 44.14, "learning_rate": 0.00010379126784169191, "loss": 1.0828, "step": 219500 }, { "epoch": 44.16, "learning_rate": 0.00010309229502842355, "loss": 1.077, "step": 219600 }, { "epoch": 44.18, "learning_rate": 0.00010239558251574535, "loss": 1.0754, "step": 219700 }, { "epoch": 44.2, "learning_rate": 0.00010170113167672274, "loss": 1.0823, "step": 219800 }, { "epoch": 44.22, "learning_rate": 0.00010100894387996454, "loss": 1.082, "step": 219900 }, { "epoch": 44.24, "learning_rate": 0.00010031902048961913, "loss": 1.0795, "step": 220000 }, { "epoch": 44.26, "learning_rate": 9.963136286537278e-05, "loss": 1.0816, "step": 220100 }, { "epoch": 44.28, "learning_rate": 9.894597236244558e-05, "loss": 1.079, "step": 220200 }, { "epoch": 44.3, "learning_rate": 9.826285033159035e-05, "loss": 1.0816, "step": 220300 }, { "epoch": 44.32, "learning_rate": 9.758199811908924e-05, "loss": 1.0845, "step": 220400 }, { "epoch": 44.34, "learning_rate": 9.690341706675043e-05, "loss": 1.0838, "step": 220500 }, { "epoch": 44.36, "learning_rate": 9.622710851190694e-05, "loss": 1.0781, "step": 220600 }, { "epoch": 44.38, "learning_rate": 9.555307378741259e-05, "loss": 1.0841, "step": 220700 }, { "epoch": 44.4, "learning_rate": 9.488802055091186e-05, "loss": 1.0813, "step": 220800 }, { "epoch": 44.42, "learning_rate": 9.421851469638642e-05, "loss": 1.0834, "step": 220900 }, { "epoch": 44.44, "learning_rate": 9.355128663070311e-05, "loss": 1.0805, "step": 221000 }, { "epoch": 44.46, "learning_rate": 9.288633766882021e-05, "loss": 1.0837, "step": 221100 }, { "epoch": 44.48, "learning_rate": 9.222366912120445e-05, "loss": 1.079, "step": 221200 }, { "epoch": 44.5, "learning_rate": 9.156328229382879e-05, "loss": 1.082, "step": 221300 }, { "epoch": 44.52, "learning_rate": 9.090517848816912e-05, "loss": 1.0806, "step": 221400 }, { "epoch": 44.54, "learning_rate": 9.024935900120185e-05, "loss": 1.082, "step": 221500 }, { "epoch": 44.56, "learning_rate": 8.95958251254017e-05, "loss": 1.0818, "step": 221600 }, { "epoch": 44.58, "learning_rate": 8.894457814873885e-05, "loss": 1.0804, "step": 221700 }, { "epoch": 44.6, "learning_rate": 8.829561935467689e-05, "loss": 1.0799, "step": 221800 }, { "epoch": 44.62, "learning_rate": 8.76489500221693e-05, "loss": 1.0803, "step": 221900 }, { "epoch": 44.64, "learning_rate": 8.700457142565774e-05, "loss": 1.0811, "step": 222000 }, { "epoch": 44.66, "learning_rate": 8.63624848350697e-05, "loss": 1.0835, "step": 222100 }, { "epoch": 44.68, "learning_rate": 8.57226915158156e-05, "loss": 1.0782, "step": 222200 }, { "epoch": 44.7, "learning_rate": 8.508519272878545e-05, "loss": 1.0845, "step": 222300 }, { "epoch": 44.72, "learning_rate": 8.444998973034829e-05, "loss": 1.0814, "step": 222400 }, { "epoch": 44.74, "learning_rate": 8.381708377234828e-05, "loss": 1.0828, "step": 222500 }, { "epoch": 44.76, "learning_rate": 8.318647610210284e-05, "loss": 1.0845, "step": 222600 }, { "epoch": 44.78, "learning_rate": 8.25581679623992e-05, "loss": 1.0805, "step": 222700 }, { "epoch": 44.8, "learning_rate": 8.193216059149355e-05, "loss": 1.0768, "step": 222800 }, { "epoch": 44.82, "learning_rate": 8.131468087783922e-05, "loss": 1.0814, "step": 222900 }, { "epoch": 44.84, "learning_rate": 8.06932557027712e-05, "loss": 1.0778, "step": 223000 }, { "epoch": 44.86, "learning_rate": 8.007413497182939e-05, "loss": 1.083, "step": 223100 }, { "epoch": 44.88, "learning_rate": 7.94573199051636e-05, "loss": 1.075, "step": 223200 }, { "epoch": 44.9, "learning_rate": 7.884281171837912e-05, "loss": 1.0767, "step": 223300 }, { "epoch": 44.92, "learning_rate": 7.82306116225355e-05, "loss": 1.0833, "step": 223400 }, { "epoch": 44.94, "learning_rate": 7.762072082414307e-05, "loss": 1.0793, "step": 223500 }, { "epoch": 44.96, "learning_rate": 7.701314052516146e-05, "loss": 1.0785, "step": 223600 }, { "epoch": 44.98, "learning_rate": 7.640787192299645e-05, "loss": 1.0801, "step": 223700 }, { "epoch": 45.0, "eval_accuracy": 0.4221078634754255, "eval_loss": 1.0577867031097412, "eval_runtime": 19.8376, "eval_samples_per_second": 4010.968, "eval_steps_per_second": 15.677, "step": 223796 }, { "epoch": 45.0, "learning_rate": 7.580491621049817e-05, "loss": 1.0812, "step": 223800 }, { "epoch": 45.02, "learning_rate": 7.52042745759586e-05, "loss": 1.0668, "step": 223900 }, { "epoch": 45.04, "learning_rate": 7.460594820310931e-05, "loss": 1.0678, "step": 224000 }, { "epoch": 45.06, "learning_rate": 7.400993827111833e-05, "loss": 1.0686, "step": 224100 }, { "epoch": 45.08, "learning_rate": 7.341624595458923e-05, "loss": 1.0737, "step": 224200 }, { "epoch": 45.1, "learning_rate": 7.282487242355806e-05, "loss": 1.0732, "step": 224300 }, { "epoch": 45.12, "learning_rate": 7.223581884349059e-05, "loss": 1.0704, "step": 224400 }, { "epoch": 45.14, "learning_rate": 7.16490863752807e-05, "loss": 1.0695, "step": 224500 }, { "epoch": 45.16, "learning_rate": 7.106467617524787e-05, "loss": 1.0738, "step": 224600 }, { "epoch": 45.18, "learning_rate": 7.048258939513497e-05, "loss": 1.07, "step": 224700 }, { "epoch": 45.2, "learning_rate": 6.990282718210616e-05, "loss": 1.0724, "step": 224800 }, { "epoch": 45.22, "learning_rate": 6.93311535277738e-05, "loss": 1.071, "step": 224900 }, { "epoch": 45.24, "learning_rate": 6.875602059798258e-05, "loss": 1.0762, "step": 225000 }, { "epoch": 45.26, "learning_rate": 6.818321563795887e-05, "loss": 1.074, "step": 225100 }, { "epoch": 45.28, "learning_rate": 6.761273977657439e-05, "loss": 1.069, "step": 225200 }, { "epoch": 45.3, "learning_rate": 6.704459413811065e-05, "loss": 1.0751, "step": 225300 }, { "epoch": 45.32, "learning_rate": 6.647877984225656e-05, "loss": 1.0755, "step": 225400 }, { "epoch": 45.34, "learning_rate": 6.59152980041064e-05, "loss": 1.0733, "step": 225500 }, { "epoch": 45.36, "learning_rate": 6.535414973415796e-05, "loss": 1.0741, "step": 225600 }, { "epoch": 45.38, "learning_rate": 6.479533613830982e-05, "loss": 1.0741, "step": 225700 }, { "epoch": 45.4, "learning_rate": 6.423885831785992e-05, "loss": 1.076, "step": 225800 }, { "epoch": 45.42, "learning_rate": 6.368471736950224e-05, "loss": 1.073, "step": 225900 }, { "epoch": 45.44, "learning_rate": 6.313291438532556e-05, "loss": 1.075, "step": 226000 }, { "epoch": 45.46, "learning_rate": 6.258345045281138e-05, "loss": 1.0719, "step": 226100 }, { "epoch": 45.48, "learning_rate": 6.203632665483128e-05, "loss": 1.0774, "step": 226200 }, { "epoch": 45.5, "learning_rate": 6.149154406964438e-05, "loss": 1.0707, "step": 226300 }, { "epoch": 45.52, "learning_rate": 6.09491037708966e-05, "loss": 1.0762, "step": 226400 }, { "epoch": 45.54, "learning_rate": 6.040900682761738e-05, "loss": 1.07, "step": 226500 }, { "epoch": 45.56, "learning_rate": 5.987125430421819e-05, "loss": 1.0754, "step": 226600 }, { "epoch": 45.58, "learning_rate": 5.9335847260489354e-05, "loss": 1.0736, "step": 226700 }, { "epoch": 45.6, "learning_rate": 5.8802786751599724e-05, "loss": 1.0737, "step": 226800 }, { "epoch": 45.62, "learning_rate": 5.827207382809313e-05, "loss": 1.0699, "step": 226900 }, { "epoch": 45.64, "learning_rate": 5.774370953588723e-05, "loss": 1.0726, "step": 227000 }, { "epoch": 45.66, "learning_rate": 5.72176949162706e-05, "loss": 1.0727, "step": 227100 }, { "epoch": 45.68, "learning_rate": 5.669403100590123e-05, "loss": 1.0772, "step": 227200 }, { "epoch": 45.7, "learning_rate": 5.617792031399907e-05, "loss": 1.0748, "step": 227300 }, { "epoch": 45.72, "learning_rate": 5.565893738080891e-05, "loss": 1.0718, "step": 227400 }, { "epoch": 45.74, "learning_rate": 5.5142308228831315e-05, "loss": 1.0739, "step": 227500 }, { "epoch": 45.76, "learning_rate": 5.4628033876227974e-05, "loss": 1.075, "step": 227600 }, { "epoch": 45.78, "learning_rate": 5.411611533651911e-05, "loss": 1.0738, "step": 227700 }, { "epoch": 45.81, "learning_rate": 5.3606553618582714e-05, "loss": 1.0752, "step": 227800 }, { "epoch": 45.83, "learning_rate": 5.309934972665201e-05, "loss": 1.0727, "step": 227900 }, { "epoch": 45.85, "learning_rate": 5.259450466031324e-05, "loss": 1.071, "step": 228000 }, { "epoch": 45.87, "learning_rate": 5.2092019414504e-05, "loss": 1.074, "step": 228100 }, { "epoch": 45.89, "learning_rate": 5.1591894979511055e-05, "loss": 1.0699, "step": 228200 }, { "epoch": 45.91, "learning_rate": 5.109413234096888e-05, "loss": 1.0727, "step": 228300 }, { "epoch": 45.93, "learning_rate": 5.059873247985722e-05, "loss": 1.0721, "step": 228400 }, { "epoch": 45.95, "learning_rate": 5.010569637249912e-05, "loss": 1.0708, "step": 228500 }, { "epoch": 45.97, "learning_rate": 4.961502499055928e-05, "loss": 1.0701, "step": 228600 }, { "epoch": 45.99, "learning_rate": 4.912671930104237e-05, "loss": 1.072, "step": 228700 }, { "epoch": 46.0, "eval_accuracy": 0.42260501009247403, "eval_loss": 1.052234172821045, "eval_runtime": 19.8604, "eval_samples_per_second": 4006.368, "eval_steps_per_second": 15.659, "step": 228769 }, { "epoch": 46.01, "learning_rate": 4.864078026629054e-05, "loss": 1.0673, "step": 228800 }, { "epoch": 46.03, "learning_rate": 4.8157208843981476e-05, "loss": 1.0645, "step": 228900 }, { "epoch": 46.05, "learning_rate": 4.767600598712743e-05, "loss": 1.0635, "step": 229000 }, { "epoch": 46.07, "learning_rate": 4.719717264407245e-05, "loss": 1.0651, "step": 229100 }, { "epoch": 46.09, "learning_rate": 4.672070975849069e-05, "loss": 1.0623, "step": 229200 }, { "epoch": 46.11, "learning_rate": 4.625134744279142e-05, "loss": 1.0643, "step": 229300 }, { "epoch": 46.13, "learning_rate": 4.5779604556573094e-05, "loss": 1.0661, "step": 229400 }, { "epoch": 46.15, "learning_rate": 4.5310234921539935e-05, "loss": 1.0668, "step": 229500 }, { "epoch": 46.17, "learning_rate": 4.4843239462715455e-05, "loss": 1.0682, "step": 229600 }, { "epoch": 46.19, "learning_rate": 4.437861910044444e-05, "loss": 1.0681, "step": 229700 }, { "epoch": 46.21, "learning_rate": 4.3916374750390256e-05, "loss": 1.0666, "step": 229800 }, { "epoch": 46.23, "learning_rate": 4.345650732353393e-05, "loss": 1.0656, "step": 229900 }, { "epoch": 46.25, "learning_rate": 4.299901772617215e-05, "loss": 1.0653, "step": 230000 }, { "epoch": 46.27, "learning_rate": 4.2543906859915384e-05, "loss": 1.0678, "step": 230100 }, { "epoch": 46.29, "learning_rate": 4.209117562168643e-05, "loss": 1.064, "step": 230200 }, { "epoch": 46.31, "learning_rate": 4.1640824903717566e-05, "loss": 1.071, "step": 230300 }, { "epoch": 46.33, "learning_rate": 4.119285559355049e-05, "loss": 1.0661, "step": 230400 }, { "epoch": 46.35, "learning_rate": 4.0747268574033294e-05, "loss": 1.0667, "step": 230500 }, { "epoch": 46.37, "learning_rate": 4.0304064723319104e-05, "loss": 1.0685, "step": 230600 }, { "epoch": 46.39, "learning_rate": 3.986324491486421e-05, "loss": 1.0662, "step": 230700 }, { "epoch": 46.41, "learning_rate": 3.942481001742673e-05, "loss": 1.0661, "step": 230800 }, { "epoch": 46.43, "learning_rate": 3.8988760895064675e-05, "loss": 1.0645, "step": 230900 }, { "epoch": 46.45, "learning_rate": 3.8555098407134085e-05, "loss": 1.0628, "step": 231000 }, { "epoch": 46.47, "learning_rate": 3.8123823408287294e-05, "loss": 1.0712, "step": 231100 }, { "epoch": 46.49, "learning_rate": 3.7694936748471633e-05, "loss": 1.0657, "step": 231200 }, { "epoch": 46.51, "learning_rate": 3.726843927292778e-05, "loss": 1.0666, "step": 231300 }, { "epoch": 46.53, "learning_rate": 3.6848561063323876e-05, "loss": 1.0681, "step": 231400 }, { "epoch": 46.55, "learning_rate": 3.6426820560480634e-05, "loss": 1.0666, "step": 231500 }, { "epoch": 46.57, "learning_rate": 3.600747174108493e-05, "loss": 1.0647, "step": 231600 }, { "epoch": 46.59, "learning_rate": 3.5590515431579846e-05, "loss": 1.0647, "step": 231700 }, { "epoch": 46.61, "learning_rate": 3.51759524536939e-05, "loss": 1.0643, "step": 231800 }, { "epoch": 46.63, "learning_rate": 3.476378362443869e-05, "loss": 1.0623, "step": 231900 }, { "epoch": 46.65, "learning_rate": 3.435400975610778e-05, "loss": 1.0654, "step": 232000 }, { "epoch": 46.67, "learning_rate": 3.394663165627407e-05, "loss": 1.0645, "step": 232100 }, { "epoch": 46.69, "learning_rate": 3.3541650127789646e-05, "loss": 1.0678, "step": 232200 }, { "epoch": 46.71, "learning_rate": 3.31390659687833e-05, "loss": 1.0657, "step": 232300 }, { "epoch": 46.73, "learning_rate": 3.2738879972659116e-05, "loss": 1.068, "step": 232400 }, { "epoch": 46.75, "learning_rate": 3.234109292809456e-05, "loss": 1.0686, "step": 232500 }, { "epoch": 46.77, "learning_rate": 3.194570561904003e-05, "loss": 1.0659, "step": 232600 }, { "epoch": 46.79, "learning_rate": 3.1552718824715834e-05, "loss": 1.0674, "step": 232700 }, { "epoch": 46.81, "learning_rate": 3.116213331961215e-05, "loss": 1.0668, "step": 232800 }, { "epoch": 46.83, "learning_rate": 3.077394987348589e-05, "loss": 1.0674, "step": 232900 }, { "epoch": 46.85, "learning_rate": 3.0388169251360788e-05, "loss": 1.0628, "step": 233000 }, { "epoch": 46.87, "learning_rate": 3.0004792213524645e-05, "loss": 1.0639, "step": 233100 }, { "epoch": 46.89, "learning_rate": 2.9623819515528783e-05, "loss": 1.0649, "step": 233200 }, { "epoch": 46.91, "learning_rate": 2.9245251908185526e-05, "loss": 1.0651, "step": 233300 }, { "epoch": 46.93, "learning_rate": 2.8872839843944315e-05, "loss": 1.067, "step": 233400 }, { "epoch": 46.95, "learning_rate": 2.8499060581948928e-05, "loss": 1.0659, "step": 233500 }, { "epoch": 46.97, "learning_rate": 2.812768862725676e-05, "loss": 1.0669, "step": 233600 }, { "epoch": 46.99, "learning_rate": 2.7758724711759303e-05, "loss": 1.0625, "step": 233700 }, { "epoch": 47.0, "eval_accuracy": 0.42298172791300875, "eval_loss": 1.0480923652648926, "eval_runtime": 19.887, "eval_samples_per_second": 4001.005, "eval_steps_per_second": 15.638, "step": 233742 }, { "epoch": 47.01, "learning_rate": 2.7392169562602682e-05, "loss": 1.0617, "step": 233800 }, { "epoch": 47.03, "learning_rate": 2.7028023902185562e-05, "loss": 1.0595, "step": 233900 }, { "epoch": 47.05, "learning_rate": 2.6666288448158464e-05, "loss": 1.0587, "step": 234000 }, { "epoch": 47.07, "learning_rate": 2.6306963913421827e-05, "loss": 1.0602, "step": 234100 }, { "epoch": 47.09, "learning_rate": 2.5950051006124048e-05, "loss": 1.0601, "step": 234200 }, { "epoch": 47.11, "learning_rate": 2.5595550429661775e-05, "loss": 1.0611, "step": 234300 }, { "epoch": 47.13, "learning_rate": 2.5243462882676703e-05, "loss": 1.0591, "step": 234400 }, { "epoch": 47.15, "learning_rate": 2.4893789059055454e-05, "loss": 1.0635, "step": 234500 }, { "epoch": 47.17, "learning_rate": 2.4546529647927335e-05, "loss": 1.059, "step": 234600 }, { "epoch": 47.19, "learning_rate": 2.4201685333663654e-05, "loss": 1.0587, "step": 234700 }, { "epoch": 47.21, "learning_rate": 2.3859256795876057e-05, "loss": 1.0579, "step": 234800 }, { "epoch": 47.23, "learning_rate": 2.351924470941541e-05, "loss": 1.0601, "step": 234900 }, { "epoch": 47.25, "learning_rate": 2.318164974436987e-05, "loss": 1.06, "step": 235000 }, { "epoch": 47.27, "learning_rate": 2.2846472566064037e-05, "loss": 1.0604, "step": 235100 }, { "epoch": 47.29, "learning_rate": 2.2513713835058124e-05, "loss": 1.058, "step": 235200 }, { "epoch": 47.31, "learning_rate": 2.2183374207145472e-05, "loss": 1.0605, "step": 235300 }, { "epoch": 47.33, "learning_rate": 2.1858721552181353e-05, "loss": 1.0602, "step": 235400 }, { "epoch": 47.35, "learning_rate": 2.153319787157798e-05, "loss": 1.0587, "step": 235500 }, { "epoch": 47.37, "learning_rate": 2.1210095226447403e-05, "loss": 1.0582, "step": 235600 }, { "epoch": 47.39, "learning_rate": 2.0889414253553036e-05, "loss": 1.0639, "step": 235700 }, { "epoch": 47.41, "learning_rate": 2.057115558488601e-05, "loss": 1.0615, "step": 235800 }, { "epoch": 47.43, "learning_rate": 2.0255319847663906e-05, "loss": 1.0598, "step": 235900 }, { "epoch": 47.45, "learning_rate": 1.9941907664328407e-05, "loss": 1.0658, "step": 236000 }, { "epoch": 47.47, "learning_rate": 1.9630919652545432e-05, "loss": 1.0611, "step": 236100 }, { "epoch": 47.49, "learning_rate": 1.9322356425203607e-05, "loss": 1.0624, "step": 236200 }, { "epoch": 47.51, "learning_rate": 1.901621859041247e-05, "loss": 1.0621, "step": 236300 }, { "epoch": 47.53, "learning_rate": 1.8712506751501767e-05, "loss": 1.0599, "step": 236400 }, { "epoch": 47.55, "learning_rate": 1.841122150702007e-05, "loss": 1.0564, "step": 236500 }, { "epoch": 47.57, "learning_rate": 1.8112363450733667e-05, "loss": 1.0621, "step": 236600 }, { "epoch": 47.59, "learning_rate": 1.781593317162572e-05, "loss": 1.0629, "step": 236700 }, { "epoch": 47.61, "learning_rate": 1.7521931253894342e-05, "loss": 1.0607, "step": 236800 }, { "epoch": 47.63, "learning_rate": 1.7230358276952156e-05, "loss": 1.0589, "step": 236900 }, { "epoch": 47.65, "learning_rate": 1.6941214815424793e-05, "loss": 1.0609, "step": 237000 }, { "epoch": 47.68, "learning_rate": 1.66545014391499e-05, "loss": 1.0602, "step": 237100 }, { "epoch": 47.7, "learning_rate": 1.6370218713175762e-05, "loss": 1.066, "step": 237200 }, { "epoch": 47.72, "learning_rate": 1.6088367197760607e-05, "loss": 1.0584, "step": 237300 }, { "epoch": 47.74, "learning_rate": 1.5808947448371218e-05, "loss": 1.0585, "step": 237400 }, { "epoch": 47.76, "learning_rate": 1.553471784824498e-05, "loss": 1.0647, "step": 237500 }, { "epoch": 47.78, "learning_rate": 1.5260138946824596e-05, "loss": 1.0597, "step": 237600 }, { "epoch": 47.8, "learning_rate": 1.4987993443684039e-05, "loss": 1.0618, "step": 237700 }, { "epoch": 47.82, "learning_rate": 1.4718281875161916e-05, "loss": 1.0587, "step": 237800 }, { "epoch": 47.84, "learning_rate": 1.4451004772799565e-05, "loss": 1.0562, "step": 237900 }, { "epoch": 47.86, "learning_rate": 1.418616266334133e-05, "loss": 1.0588, "step": 238000 }, { "epoch": 47.88, "learning_rate": 1.392375606873178e-05, "loss": 1.0633, "step": 238100 }, { "epoch": 47.9, "learning_rate": 1.3663785506116133e-05, "loss": 1.0609, "step": 238200 }, { "epoch": 47.92, "learning_rate": 1.3406251487839e-05, "loss": 1.0614, "step": 238300 }, { "epoch": 47.94, "learning_rate": 1.3151154521442582e-05, "loss": 1.0631, "step": 238400 }, { "epoch": 47.96, "learning_rate": 1.2898495109666397e-05, "loss": 1.06, "step": 238500 }, { "epoch": 47.98, "learning_rate": 1.2648273750446026e-05, "loss": 1.0606, "step": 238600 }, { "epoch": 48.0, "learning_rate": 1.2400490936912284e-05, "loss": 1.0639, "step": 238700 }, { "epoch": 48.0, "eval_accuracy": 0.4231766093868917, "eval_loss": 1.045819640159607, "eval_runtime": 19.861, "eval_samples_per_second": 4006.252, "eval_steps_per_second": 15.659, "step": 238716 }, { "epoch": 48.02, "learning_rate": 1.2155147157390245e-05, "loss": 1.0555, "step": 238800 }, { "epoch": 48.04, "learning_rate": 1.1912242895397857e-05, "loss": 1.057, "step": 238900 }, { "epoch": 48.06, "learning_rate": 1.1671778629645525e-05, "loss": 1.0555, "step": 239000 }, { "epoch": 48.08, "learning_rate": 1.1433754834035137e-05, "loss": 1.0566, "step": 239100 }, { "epoch": 48.1, "learning_rate": 1.1198171977658822e-05, "loss": 1.0548, "step": 239200 }, { "epoch": 48.12, "learning_rate": 1.096503052479783e-05, "loss": 1.0572, "step": 239300 }, { "epoch": 48.14, "learning_rate": 1.0734330934922404e-05, "loss": 1.0535, "step": 239400 }, { "epoch": 48.16, "learning_rate": 1.0508344144459226e-05, "loss": 1.0572, "step": 239500 }, { "epoch": 48.18, "learning_rate": 1.0282505209828901e-05, "loss": 1.0567, "step": 239600 }, { "epoch": 48.2, "learning_rate": 1.0059109483290113e-05, "loss": 1.061, "step": 239700 }, { "epoch": 48.22, "learning_rate": 9.838157405106102e-06, "loss": 1.0535, "step": 239800 }, { "epoch": 48.24, "learning_rate": 9.619649410724658e-06, "loss": 1.0575, "step": 239900 }, { "epoch": 48.26, "learning_rate": 9.40358593077631e-06, "loss": 1.0578, "step": 240000 }, { "epoch": 48.28, "learning_rate": 9.189967391074332e-06, "loss": 1.0559, "step": 240100 }, { "epoch": 48.3, "learning_rate": 8.978794212613355e-06, "loss": 1.0572, "step": 240200 }, { "epoch": 48.32, "learning_rate": 8.770066811569083e-06, "loss": 1.0544, "step": 240300 }, { "epoch": 48.34, "learning_rate": 8.563785599296769e-06, "loss": 1.0586, "step": 240400 }, { "epoch": 48.36, "learning_rate": 8.359950982330805e-06, "loss": 1.0592, "step": 240500 }, { "epoch": 48.38, "learning_rate": 8.158563362384158e-06, "loss": 1.0586, "step": 240600 }, { "epoch": 48.4, "learning_rate": 7.959623136347128e-06, "loss": 1.0547, "step": 240700 }, { "epoch": 48.42, "learning_rate": 7.763130696286647e-06, "loss": 1.0525, "step": 240800 }, { "epoch": 48.44, "learning_rate": 7.56908642944587e-06, "loss": 1.0569, "step": 240900 }, { "epoch": 48.46, "learning_rate": 7.3774907182427856e-06, "loss": 1.055, "step": 241000 }, { "epoch": 48.48, "learning_rate": 7.18834394027007e-06, "loss": 1.0579, "step": 241100 }, { "epoch": 48.5, "learning_rate": 7.001646468294265e-06, "loss": 1.0554, "step": 241200 }, { "epoch": 48.52, "learning_rate": 6.817398670254382e-06, "loss": 1.0551, "step": 241300 }, { "epoch": 48.54, "learning_rate": 6.635600909262185e-06, "loss": 1.0599, "step": 241400 }, { "epoch": 48.56, "learning_rate": 6.456253543600521e-06, "loss": 1.0554, "step": 241500 }, { "epoch": 48.58, "learning_rate": 6.281113760537027e-06, "loss": 1.0548, "step": 241600 }, { "epoch": 48.6, "learning_rate": 6.106643728384375e-06, "loss": 1.0615, "step": 241700 }, { "epoch": 48.62, "learning_rate": 5.934625134019766e-06, "loss": 1.0538, "step": 241800 }, { "epoch": 48.64, "learning_rate": 5.76505831645327e-06, "loss": 1.0599, "step": 241900 }, { "epoch": 48.66, "learning_rate": 5.597943609863821e-06, "loss": 1.0539, "step": 242000 }, { "epoch": 48.68, "learning_rate": 5.433281343597135e-06, "loss": 1.0565, "step": 242100 }, { "epoch": 48.7, "learning_rate": 5.2710718421662696e-06, "loss": 1.0596, "step": 242200 }, { "epoch": 48.72, "learning_rate": 5.111315425249952e-06, "loss": 1.0548, "step": 242300 }, { "epoch": 48.74, "learning_rate": 4.954012407692721e-06, "loss": 1.0547, "step": 242400 }, { "epoch": 48.76, "learning_rate": 4.799163099503956e-06, "loss": 1.0514, "step": 242500 }, { "epoch": 48.78, "learning_rate": 4.646767805857183e-06, "loss": 1.0587, "step": 242600 }, { "epoch": 48.8, "learning_rate": 4.496826827089795e-06, "loss": 1.0602, "step": 242700 }, { "epoch": 48.82, "learning_rate": 4.349340458702084e-06, "loss": 1.0576, "step": 242800 }, { "epoch": 48.84, "learning_rate": 4.204308991357098e-06, "loss": 1.0576, "step": 242900 }, { "epoch": 48.86, "learning_rate": 4.061732710879673e-06, "loss": 1.0592, "step": 243000 }, { "epoch": 48.88, "learning_rate": 3.921611898255878e-06, "loss": 1.0601, "step": 243100 }, { "epoch": 48.9, "learning_rate": 3.783946829632734e-06, "loss": 1.0588, "step": 243200 }, { "epoch": 48.92, "learning_rate": 3.648737776317801e-06, "loss": 1.0558, "step": 243300 }, { "epoch": 48.94, "learning_rate": 3.5159850047777885e-06, "loss": 1.057, "step": 243400 }, { "epoch": 48.96, "learning_rate": 3.3856887766392507e-06, "loss": 1.0593, "step": 243500 }, { "epoch": 48.98, "learning_rate": 3.259115580974137e-06, "loss": 1.0585, "step": 243600 }, { "epoch": 49.0, "eval_accuracy": 0.4232847884907615, "eval_loss": 1.044702172279358, "eval_runtime": 19.8772, "eval_samples_per_second": 4002.975, "eval_steps_per_second": 15.646, "step": 243689 }, { "epoch": 49.0, "learning_rate": 3.1337086333987908e-06, "loss": 1.0534, "step": 243700 }, { "epoch": 49.02, "learning_rate": 3.0107589826065816e-06, "loss": 1.0535, "step": 243800 }, { "epoch": 49.04, "learning_rate": 2.8902668709041013e-06, "loss": 1.0535, "step": 243900 }, { "epoch": 49.06, "learning_rate": 2.772232535754593e-06, "loss": 1.0536, "step": 244000 }, { "epoch": 49.08, "learning_rate": 2.6566562097773973e-06, "loss": 1.0529, "step": 244100 }, { "epoch": 49.1, "learning_rate": 2.5435381207479514e-06, "loss": 1.057, "step": 244200 }, { "epoch": 49.12, "learning_rate": 2.432878491596957e-06, "loss": 1.0544, "step": 244300 }, { "epoch": 49.14, "learning_rate": 2.3246775404098252e-06, "loss": 1.0568, "step": 244400 }, { "epoch": 49.16, "learning_rate": 2.218935480426676e-06, "loss": 1.0521, "step": 244500 }, { "epoch": 49.18, "learning_rate": 2.1156525200416444e-06, "loss": 1.0557, "step": 244600 }, { "epoch": 49.2, "learning_rate": 2.0148288628026036e-06, "loss": 1.0553, "step": 244700 }, { "epoch": 49.22, "learning_rate": 1.9164647074104702e-06, "loss": 1.0539, "step": 244800 }, { "epoch": 49.24, "learning_rate": 1.8205602477193439e-06, "loss": 1.0517, "step": 244900 }, { "epoch": 49.26, "learning_rate": 1.7271156727355353e-06, "loss": 1.0535, "step": 245000 }, { "epoch": 49.28, "learning_rate": 1.6361311666174272e-06, "loss": 1.0565, "step": 245100 }, { "epoch": 49.3, "learning_rate": 1.5476069086756139e-06, "loss": 1.0513, "step": 245200 }, { "epoch": 49.32, "learning_rate": 1.4615430733713742e-06, "loss": 1.0539, "step": 245300 }, { "epoch": 49.34, "learning_rate": 1.3779398303177814e-06, "loss": 1.0565, "step": 245400 }, { "epoch": 49.36, "learning_rate": 1.296797344278039e-06, "loss": 1.0572, "step": 245500 }, { "epoch": 49.38, "learning_rate": 1.218115775166173e-06, "loss": 1.0538, "step": 245600 }, { "epoch": 49.4, "learning_rate": 1.1426453002136294e-06, "loss": 1.0577, "step": 245700 }, { "epoch": 49.42, "learning_rate": 1.0688614123491314e-06, "loss": 1.0547, "step": 245800 }, { "epoch": 49.44, "learning_rate": 9.97538890623456e-07, "loss": 1.0539, "step": 245900 }, { "epoch": 49.46, "learning_rate": 9.286778755976388e-07, "loss": 1.0524, "step": 246000 }, { "epoch": 49.48, "learning_rate": 8.622785029814562e-07, "loss": 1.0569, "step": 246100 }, { "epoch": 49.5, "learning_rate": 7.983409036331491e-07, "loss": 1.0525, "step": 246200 }, { "epoch": 49.52, "learning_rate": 7.368652035597001e-07, "loss": 1.0557, "step": 246300 }, { "epoch": 49.55, "learning_rate": 6.778515239161398e-07, "loss": 1.0568, "step": 246400 }, { "epoch": 49.57, "learning_rate": 6.212999810051301e-07, "loss": 1.058, "step": 246500 }, { "epoch": 49.59, "learning_rate": 5.672106862772419e-07, "loss": 1.0585, "step": 246600 }, { "epoch": 49.61, "learning_rate": 5.155837463306778e-07, "loss": 1.0561, "step": 246700 }, { "epoch": 49.63, "learning_rate": 4.664192629104391e-07, "loss": 1.057, "step": 246800 }, { "epoch": 49.65, "learning_rate": 4.201721622633381e-07, "loss": 1.0523, "step": 246900 }, { "epoch": 49.67, "learning_rate": 3.7590825082547965e-07, "loss": 1.0533, "step": 247000 }, { "epoch": 49.69, "learning_rate": 3.3410707118347595e-07, "loss": 1.0538, "step": 247100 }, { "epoch": 49.71, "learning_rate": 2.951498981840217e-07, "loss": 1.0602, "step": 247200 }, { "epoch": 49.73, "learning_rate": 2.5824979513769386e-07, "loss": 1.0534, "step": 247300 }, { "epoch": 49.75, "learning_rate": 2.238126557659037e-07, "loss": 1.0558, "step": 247400 }, { "epoch": 49.77, "learning_rate": 1.9183854793672352e-07, "loss": 1.0555, "step": 247500 }, { "epoch": 49.79, "learning_rate": 1.6232753466377536e-07, "loss": 1.0578, "step": 247600 }, { "epoch": 49.81, "learning_rate": 1.352796741069251e-07, "loss": 1.0576, "step": 247700 }, { "epoch": 49.83, "learning_rate": 1.1069501957144956e-07, "loss": 1.0551, "step": 247800 }, { "epoch": 49.85, "learning_rate": 8.857361950831422e-08, "loss": 1.0557, "step": 247900 }, { "epoch": 49.87, "learning_rate": 6.89155175137568e-08, "loss": 1.0538, "step": 248000 }, { "epoch": 49.89, "learning_rate": 5.172075232956486e-08, "loss": 1.0543, "step": 248100 }, { "epoch": 49.91, "learning_rate": 3.698935784279822e-08, "loss": 1.056, "step": 248200 }, { "epoch": 49.93, "learning_rate": 2.472136308592776e-08, "loss": 1.055, "step": 248300 }, { "epoch": 49.95, "learning_rate": 1.4916792236141507e-08, "loss": 1.0544, "step": 248400 }, { "epoch": 49.97, "learning_rate": 7.575664616454869e-09, "loss": 1.0545, "step": 248500 }, { "epoch": 49.99, "learning_rate": 2.6979946943228584e-09, "loss": 1.0521, "step": 248600 }, { "epoch": 50.0, "eval_accuracy": 0.42330467435544344, "eval_loss": 1.0445035696029663, "eval_runtime": 19.4961, "eval_samples_per_second": 4081.225, "eval_steps_per_second": 15.952, "step": 248650 }, { "epoch": 50.0, "step": 248650, "total_flos": 6.912086038408397e+16, "train_loss": 1.2392261905236766, "train_runtime": 25350.8633, "train_samples_per_second": 2510.968, "train_steps_per_second": 9.808 } ], "max_steps": 248650, "num_train_epochs": 50, "total_flos": 6.912086038408397e+16, "trial_name": null, "trial_params": null }