{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9993284083277367, "global_step": 744, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.865591397849462e-05, "loss": 1.8106, "step": 10 }, { "epoch": 0.01, "eval_accuracy": 0.19744795560836792, "eval_loss": 1.7615934610366821, "eval_runtime": 134.0063, "eval_samples_per_second": 11.111, "eval_steps_per_second": 2.783, "step": 10 }, { "epoch": 0.03, "learning_rate": 9.731182795698925e-05, "loss": 1.7268, "step": 20 }, { "epoch": 0.03, "eval_accuracy": 0.2525184750556946, "eval_loss": 1.7187447547912598, "eval_runtime": 145.4077, "eval_samples_per_second": 10.24, "eval_steps_per_second": 2.565, "step": 20 }, { "epoch": 0.04, "learning_rate": 9.596774193548387e-05, "loss": 1.7269, "step": 30 }, { "epoch": 0.04, "eval_accuracy": 0.309603750705719, "eval_loss": 1.6442092657089233, "eval_runtime": 144.9066, "eval_samples_per_second": 10.276, "eval_steps_per_second": 2.574, "step": 30 }, { "epoch": 0.05, "learning_rate": 9.46236559139785e-05, "loss": 1.7086, "step": 40 }, { "epoch": 0.05, "eval_accuracy": 0.33378106355667114, "eval_loss": 1.583362340927124, "eval_runtime": 135.7155, "eval_samples_per_second": 10.971, "eval_steps_per_second": 2.748, "step": 40 }, { "epoch": 0.07, "learning_rate": 9.327956989247312e-05, "loss": 1.6983, "step": 50 }, { "epoch": 0.07, "eval_accuracy": 0.35997313261032104, "eval_loss": 1.6194798946380615, "eval_runtime": 135.5719, "eval_samples_per_second": 10.983, "eval_steps_per_second": 2.751, "step": 50 }, { "epoch": 0.08, "learning_rate": 9.193548387096774e-05, "loss": 1.5845, "step": 60 }, { "epoch": 0.08, "eval_accuracy": 0.34184014797210693, "eval_loss": 1.575337290763855, "eval_runtime": 135.8178, "eval_samples_per_second": 10.963, "eval_steps_per_second": 2.746, "step": 60 }, { "epoch": 0.09, "learning_rate": 9.059139784946237e-05, "loss": 1.5744, "step": 70 }, { "epoch": 0.09, "eval_accuracy": 0.3707185983657837, "eval_loss": 1.5669182538986206, "eval_runtime": 135.433, "eval_samples_per_second": 10.994, "eval_steps_per_second": 2.754, "step": 70 }, { "epoch": 0.11, "learning_rate": 8.924731182795699e-05, "loss": 1.5915, "step": 80 }, { "epoch": 0.11, "eval_accuracy": 0.3754197359085083, "eval_loss": 1.5411657094955444, "eval_runtime": 135.3362, "eval_samples_per_second": 11.002, "eval_steps_per_second": 2.756, "step": 80 }, { "epoch": 0.12, "learning_rate": 8.790322580645162e-05, "loss": 1.5105, "step": 90 }, { "epoch": 0.12, "eval_accuracy": 0.2612491548061371, "eval_loss": 2.0037343502044678, "eval_runtime": 135.2385, "eval_samples_per_second": 11.01, "eval_steps_per_second": 2.758, "step": 90 }, { "epoch": 0.13, "learning_rate": 8.655913978494624e-05, "loss": 1.4689, "step": 100 }, { "epoch": 0.13, "eval_accuracy": 0.3626595139503479, "eval_loss": 1.5439659357070923, "eval_runtime": 135.2487, "eval_samples_per_second": 11.009, "eval_steps_per_second": 2.758, "step": 100 }, { "epoch": 0.15, "learning_rate": 8.521505376344086e-05, "loss": 1.527, "step": 110 }, { "epoch": 0.15, "eval_accuracy": 0.38616520166397095, "eval_loss": 1.540026068687439, "eval_runtime": 135.6976, "eval_samples_per_second": 10.973, "eval_steps_per_second": 2.749, "step": 110 }, { "epoch": 0.16, "learning_rate": 8.387096774193549e-05, "loss": 1.6481, "step": 120 }, { "epoch": 0.16, "eval_accuracy": 0.32975152134895325, "eval_loss": 1.6678365468978882, "eval_runtime": 135.8659, "eval_samples_per_second": 10.959, "eval_steps_per_second": 2.745, "step": 120 }, { "epoch": 0.17, "learning_rate": 8.252688172043011e-05, "loss": 1.7504, "step": 130 }, { "epoch": 0.17, "eval_accuracy": 0.29952988028526306, "eval_loss": 1.6077724695205688, "eval_runtime": 135.8936, "eval_samples_per_second": 10.957, "eval_steps_per_second": 2.745, "step": 130 }, { "epoch": 0.19, "learning_rate": 8.118279569892473e-05, "loss": 1.3748, "step": 140 }, { "epoch": 0.19, "eval_accuracy": 0.32505038380622864, "eval_loss": 1.5750231742858887, "eval_runtime": 135.7593, "eval_samples_per_second": 10.968, "eval_steps_per_second": 2.748, "step": 140 }, { "epoch": 0.2, "learning_rate": 7.983870967741936e-05, "loss": 1.6417, "step": 150 }, { "epoch": 0.2, "eval_accuracy": 0.25990596413612366, "eval_loss": 1.7033889293670654, "eval_runtime": 140.3888, "eval_samples_per_second": 10.606, "eval_steps_per_second": 2.657, "step": 150 }, { "epoch": 0.21, "learning_rate": 7.849462365591398e-05, "loss": 1.6146, "step": 160 }, { "epoch": 0.21, "eval_accuracy": 0.35191404819488525, "eval_loss": 1.6161645650863647, "eval_runtime": 136.7498, "eval_samples_per_second": 10.889, "eval_steps_per_second": 2.728, "step": 160 }, { "epoch": 0.23, "learning_rate": 7.715053763440861e-05, "loss": 1.4896, "step": 170 }, { "epoch": 0.23, "eval_accuracy": 0.37407657504081726, "eval_loss": 1.5245014429092407, "eval_runtime": 137.1791, "eval_samples_per_second": 10.854, "eval_steps_per_second": 2.719, "step": 170 }, { "epoch": 0.24, "learning_rate": 7.580645161290323e-05, "loss": 1.4278, "step": 180 }, { "epoch": 0.24, "eval_accuracy": 0.24244458973407745, "eval_loss": 1.7536966800689697, "eval_runtime": 137.1089, "eval_samples_per_second": 10.86, "eval_steps_per_second": 2.72, "step": 180 }, { "epoch": 0.26, "learning_rate": 7.446236559139786e-05, "loss": 1.4475, "step": 190 }, { "epoch": 0.26, "eval_accuracy": 0.3881799876689911, "eval_loss": 1.4769032001495361, "eval_runtime": 136.7534, "eval_samples_per_second": 10.888, "eval_steps_per_second": 2.728, "step": 190 }, { "epoch": 0.27, "learning_rate": 7.311827956989248e-05, "loss": 1.5416, "step": 200 }, { "epoch": 0.27, "eval_accuracy": 0.39489591121673584, "eval_loss": 1.4772460460662842, "eval_runtime": 137.0691, "eval_samples_per_second": 10.863, "eval_steps_per_second": 2.721, "step": 200 }, { "epoch": 0.28, "learning_rate": 7.177419354838711e-05, "loss": 1.5997, "step": 210 }, { "epoch": 0.28, "eval_accuracy": 0.4278039038181305, "eval_loss": 1.4428460597991943, "eval_runtime": 137.003, "eval_samples_per_second": 10.868, "eval_steps_per_second": 2.723, "step": 210 }, { "epoch": 0.3, "learning_rate": 7.043010752688173e-05, "loss": 1.4337, "step": 220 }, { "epoch": 0.3, "eval_accuracy": 0.41235730051994324, "eval_loss": 1.435219645500183, "eval_runtime": 137.007, "eval_samples_per_second": 10.868, "eval_steps_per_second": 2.722, "step": 220 }, { "epoch": 0.31, "learning_rate": 6.908602150537635e-05, "loss": 1.415, "step": 230 }, { "epoch": 0.31, "eval_accuracy": 0.4157152473926544, "eval_loss": 1.4404770135879517, "eval_runtime": 137.0627, "eval_samples_per_second": 10.864, "eval_steps_per_second": 2.721, "step": 230 }, { "epoch": 0.32, "learning_rate": 6.774193548387096e-05, "loss": 1.5196, "step": 240 }, { "epoch": 0.32, "eval_accuracy": 0.40429818630218506, "eval_loss": 1.4196510314941406, "eval_runtime": 136.8253, "eval_samples_per_second": 10.882, "eval_steps_per_second": 2.726, "step": 240 }, { "epoch": 0.34, "learning_rate": 6.63978494623656e-05, "loss": 1.3866, "step": 250 }, { "epoch": 0.34, "eval_accuracy": 0.37340497970581055, "eval_loss": 1.524086833000183, "eval_runtime": 137.3465, "eval_samples_per_second": 10.841, "eval_steps_per_second": 2.716, "step": 250 }, { "epoch": 0.35, "learning_rate": 6.505376344086021e-05, "loss": 1.3041, "step": 260 }, { "epoch": 0.35, "eval_accuracy": 0.40429818630218506, "eval_loss": 1.5703184604644775, "eval_runtime": 136.9308, "eval_samples_per_second": 10.874, "eval_steps_per_second": 2.724, "step": 260 }, { "epoch": 0.36, "learning_rate": 6.370967741935485e-05, "loss": 1.3618, "step": 270 }, { "epoch": 0.36, "eval_accuracy": 0.4284754991531372, "eval_loss": 1.3963350057601929, "eval_runtime": 136.0816, "eval_samples_per_second": 10.942, "eval_steps_per_second": 2.741, "step": 270 }, { "epoch": 0.38, "learning_rate": 6.236559139784946e-05, "loss": 1.3293, "step": 280 }, { "epoch": 0.38, "eval_accuracy": 0.4506380259990692, "eval_loss": 1.3478150367736816, "eval_runtime": 135.9673, "eval_samples_per_second": 10.951, "eval_steps_per_second": 2.743, "step": 280 }, { "epoch": 0.39, "learning_rate": 6.102150537634409e-05, "loss": 1.2215, "step": 290 }, { "epoch": 0.39, "eval_accuracy": 0.3841504454612732, "eval_loss": 1.5994166135787964, "eval_runtime": 136.1871, "eval_samples_per_second": 10.933, "eval_steps_per_second": 2.739, "step": 290 }, { "epoch": 0.4, "learning_rate": 5.9677419354838715e-05, "loss": 1.6618, "step": 300 }, { "epoch": 0.4, "eval_accuracy": 0.2276695817708969, "eval_loss": 1.7750705480575562, "eval_runtime": 136.1639, "eval_samples_per_second": 10.935, "eval_steps_per_second": 2.739, "step": 300 }, { "epoch": 0.42, "learning_rate": 5.8467741935483876e-05, "loss": 1.5349, "step": 310 }, { "epoch": 0.42, "eval_accuracy": 0.40362659096717834, "eval_loss": 1.6090513467788696, "eval_runtime": 136.1264, "eval_samples_per_second": 10.938, "eval_steps_per_second": 2.74, "step": 310 }, { "epoch": 0.43, "learning_rate": 5.71236559139785e-05, "loss": 1.4037, "step": 320 }, { "epoch": 0.43, "eval_accuracy": 0.4445936977863312, "eval_loss": 1.4741053581237793, "eval_runtime": 136.0584, "eval_samples_per_second": 10.944, "eval_steps_per_second": 2.741, "step": 320 }, { "epoch": 0.44, "learning_rate": 5.577956989247311e-05, "loss": 1.4844, "step": 330 }, { "epoch": 0.44, "eval_accuracy": 0.4398925304412842, "eval_loss": 1.4170390367507935, "eval_runtime": 136.1183, "eval_samples_per_second": 10.939, "eval_steps_per_second": 2.74, "step": 330 }, { "epoch": 0.46, "learning_rate": 5.443548387096774e-05, "loss": 1.2806, "step": 340 }, { "epoch": 0.46, "eval_accuracy": 0.5050369501113892, "eval_loss": 1.2886841297149658, "eval_runtime": 136.1309, "eval_samples_per_second": 10.938, "eval_steps_per_second": 2.74, "step": 340 }, { "epoch": 0.47, "learning_rate": 5.309139784946236e-05, "loss": 1.3818, "step": 350 }, { "epoch": 0.47, "eval_accuracy": 0.501679003238678, "eval_loss": 1.2668293714523315, "eval_runtime": 135.9509, "eval_samples_per_second": 10.952, "eval_steps_per_second": 2.744, "step": 350 }, { "epoch": 0.48, "learning_rate": 5.174731182795699e-05, "loss": 1.3491, "step": 360 }, { "epoch": 0.48, "eval_accuracy": 0.4593687057495117, "eval_loss": 1.4720617532730103, "eval_runtime": 136.1018, "eval_samples_per_second": 10.94, "eval_steps_per_second": 2.741, "step": 360 }, { "epoch": 0.5, "learning_rate": 5.040322580645161e-05, "loss": 1.2347, "step": 370 }, { "epoch": 0.5, "eval_accuracy": 0.5245131254196167, "eval_loss": 1.2187544107437134, "eval_runtime": 136.1642, "eval_samples_per_second": 10.935, "eval_steps_per_second": 2.739, "step": 370 }, { "epoch": 0.51, "learning_rate": 4.905913978494624e-05, "loss": 1.2182, "step": 380 }, { "epoch": 0.51, "eval_accuracy": 0.45668232440948486, "eval_loss": 1.3813459873199463, "eval_runtime": 136.1044, "eval_samples_per_second": 10.94, "eval_steps_per_second": 2.741, "step": 380 }, { "epoch": 0.52, "learning_rate": 4.771505376344086e-05, "loss": 1.2513, "step": 390 }, { "epoch": 0.52, "eval_accuracy": 0.5204835534095764, "eval_loss": 1.2110750675201416, "eval_runtime": 136.1563, "eval_samples_per_second": 10.936, "eval_steps_per_second": 2.739, "step": 390 }, { "epoch": 0.54, "learning_rate": 4.637096774193548e-05, "loss": 1.2447, "step": 400 }, { "epoch": 0.54, "eval_accuracy": 0.546004056930542, "eval_loss": 1.2230509519577026, "eval_runtime": 136.1489, "eval_samples_per_second": 10.937, "eval_steps_per_second": 2.74, "step": 400 }, { "epoch": 0.55, "learning_rate": 4.516129032258064e-05, "loss": 1.038, "step": 410 }, { "epoch": 0.55, "eval_accuracy": 0.5372733473777771, "eval_loss": 1.2562698125839233, "eval_runtime": 136.2823, "eval_samples_per_second": 10.926, "eval_steps_per_second": 2.737, "step": 410 }, { "epoch": 0.56, "learning_rate": 4.381720430107527e-05, "loss": 1.2409, "step": 420 }, { "epoch": 0.56, "eval_accuracy": 0.4936198890209198, "eval_loss": 1.344766616821289, "eval_runtime": 136.1885, "eval_samples_per_second": 10.933, "eval_steps_per_second": 2.739, "step": 420 }, { "epoch": 0.58, "learning_rate": 4.247311827956989e-05, "loss": 1.2279, "step": 430 }, { "epoch": 0.58, "eval_accuracy": 0.5486903786659241, "eval_loss": 1.1971595287322998, "eval_runtime": 136.416, "eval_samples_per_second": 10.915, "eval_steps_per_second": 2.734, "step": 430 }, { "epoch": 0.59, "learning_rate": 4.112903225806452e-05, "loss": 1.3256, "step": 440 }, { "epoch": 0.59, "eval_accuracy": 0.5742108821868896, "eval_loss": 1.1706324815750122, "eval_runtime": 136.371, "eval_samples_per_second": 10.919, "eval_steps_per_second": 2.735, "step": 440 }, { "epoch": 0.6, "learning_rate": 3.978494623655914e-05, "loss": 1.2866, "step": 450 }, { "epoch": 0.6, "eval_accuracy": 0.5003358125686646, "eval_loss": 1.309117078781128, "eval_runtime": 136.1709, "eval_samples_per_second": 10.935, "eval_steps_per_second": 2.739, "step": 450 }, { "epoch": 0.62, "learning_rate": 3.844086021505376e-05, "loss": 1.0574, "step": 460 }, { "epoch": 0.62, "eval_accuracy": 0.5500335693359375, "eval_loss": 1.2074663639068604, "eval_runtime": 136.5936, "eval_samples_per_second": 10.901, "eval_steps_per_second": 2.731, "step": 460 }, { "epoch": 0.63, "learning_rate": 3.7096774193548386e-05, "loss": 1.2744, "step": 470 }, { "epoch": 0.63, "eval_accuracy": 0.5171256065368652, "eval_loss": 1.2830621004104614, "eval_runtime": 136.8418, "eval_samples_per_second": 10.881, "eval_steps_per_second": 2.726, "step": 470 }, { "epoch": 0.64, "learning_rate": 3.575268817204301e-05, "loss": 1.0836, "step": 480 }, { "epoch": 0.64, "eval_accuracy": 0.5607790350914001, "eval_loss": 1.1768107414245605, "eval_runtime": 136.4906, "eval_samples_per_second": 10.909, "eval_steps_per_second": 2.733, "step": 480 }, { "epoch": 0.66, "learning_rate": 3.4408602150537636e-05, "loss": 1.135, "step": 490 }, { "epoch": 0.66, "eval_accuracy": 0.5775688290596008, "eval_loss": 1.1407707929611206, "eval_runtime": 136.5812, "eval_samples_per_second": 10.902, "eval_steps_per_second": 2.731, "step": 490 }, { "epoch": 0.67, "learning_rate": 3.306451612903226e-05, "loss": 1.1303, "step": 500 }, { "epoch": 0.67, "eval_accuracy": 0.5540631413459778, "eval_loss": 1.2319557666778564, "eval_runtime": 136.447, "eval_samples_per_second": 10.913, "eval_steps_per_second": 2.734, "step": 500 }, { "epoch": 0.69, "learning_rate": 3.172043010752688e-05, "loss": 1.2068, "step": 510 }, { "epoch": 0.69, "eval_accuracy": 0.5795835852622986, "eval_loss": 1.1379237174987793, "eval_runtime": 139.7377, "eval_samples_per_second": 10.656, "eval_steps_per_second": 2.669, "step": 510 }, { "epoch": 0.7, "learning_rate": 3.0376344086021508e-05, "loss": 1.1347, "step": 520 }, { "epoch": 0.7, "eval_accuracy": 0.5896574854850769, "eval_loss": 1.112443447113037, "eval_runtime": 135.8322, "eval_samples_per_second": 10.962, "eval_steps_per_second": 2.746, "step": 520 }, { "epoch": 0.71, "learning_rate": 2.9032258064516133e-05, "loss": 1.1846, "step": 530 }, { "epoch": 0.71, "eval_accuracy": 0.5802552103996277, "eval_loss": 1.1337865591049194, "eval_runtime": 136.0081, "eval_samples_per_second": 10.948, "eval_steps_per_second": 2.742, "step": 530 }, { "epoch": 0.73, "learning_rate": 2.768817204301075e-05, "loss": 1.2409, "step": 540 }, { "epoch": 0.73, "eval_accuracy": 0.5789120197296143, "eval_loss": 1.1259396076202393, "eval_runtime": 136.2241, "eval_samples_per_second": 10.931, "eval_steps_per_second": 2.738, "step": 540 }, { "epoch": 0.74, "learning_rate": 2.6344086021505376e-05, "loss": 1.0664, "step": 550 }, { "epoch": 0.74, "eval_accuracy": 0.6037608981132507, "eval_loss": 1.065294861793518, "eval_runtime": 136.1781, "eval_samples_per_second": 10.934, "eval_steps_per_second": 2.739, "step": 550 }, { "epoch": 0.75, "learning_rate": 2.5e-05, "loss": 1.1637, "step": 560 }, { "epoch": 0.75, "eval_accuracy": 0.5977165699005127, "eval_loss": 1.0549540519714355, "eval_runtime": 135.9124, "eval_samples_per_second": 10.956, "eval_steps_per_second": 2.744, "step": 560 }, { "epoch": 0.77, "learning_rate": 2.3655913978494626e-05, "loss": 1.0707, "step": 570 }, { "epoch": 0.77, "eval_accuracy": 0.5715245008468628, "eval_loss": 1.0996488332748413, "eval_runtime": 136.1326, "eval_samples_per_second": 10.938, "eval_steps_per_second": 2.74, "step": 570 }, { "epoch": 0.78, "learning_rate": 2.2311827956989248e-05, "loss": 1.2258, "step": 580 }, { "epoch": 0.78, "eval_accuracy": 0.5977165699005127, "eval_loss": 1.080415964126587, "eval_runtime": 135.8671, "eval_samples_per_second": 10.959, "eval_steps_per_second": 2.745, "step": 580 }, { "epoch": 0.79, "learning_rate": 2.0967741935483873e-05, "loss": 0.9256, "step": 590 }, { "epoch": 0.79, "eval_accuracy": 0.580926775932312, "eval_loss": 1.1501046419143677, "eval_runtime": 135.8439, "eval_samples_per_second": 10.961, "eval_steps_per_second": 2.746, "step": 590 }, { "epoch": 0.81, "learning_rate": 1.9623655913978494e-05, "loss": 1.1542, "step": 600 }, { "epoch": 0.81, "eval_accuracy": 0.5957018136978149, "eval_loss": 1.1089370250701904, "eval_runtime": 137.3702, "eval_samples_per_second": 10.839, "eval_steps_per_second": 2.715, "step": 600 }, { "epoch": 0.82, "learning_rate": 1.827956989247312e-05, "loss": 1.3931, "step": 610 }, { "epoch": 0.82, "eval_accuracy": 0.5856279134750366, "eval_loss": 1.138110876083374, "eval_runtime": 135.8089, "eval_samples_per_second": 10.964, "eval_steps_per_second": 2.747, "step": 610 }, { "epoch": 0.83, "learning_rate": 1.693548387096774e-05, "loss": 1.1117, "step": 620 }, { "epoch": 0.83, "eval_accuracy": 0.6030893325805664, "eval_loss": 1.0933294296264648, "eval_runtime": 135.7612, "eval_samples_per_second": 10.968, "eval_steps_per_second": 2.747, "step": 620 }, { "epoch": 0.85, "learning_rate": 1.5591397849462366e-05, "loss": 1.1433, "step": 630 }, { "epoch": 0.85, "eval_accuracy": 0.6218938827514648, "eval_loss": 1.0175174474716187, "eval_runtime": 135.6802, "eval_samples_per_second": 10.974, "eval_steps_per_second": 2.749, "step": 630 }, { "epoch": 0.86, "learning_rate": 1.4247311827956991e-05, "loss": 1.0325, "step": 640 }, { "epoch": 0.86, "eval_accuracy": 0.6239086389541626, "eval_loss": 0.9885073304176331, "eval_runtime": 135.5378, "eval_samples_per_second": 10.986, "eval_steps_per_second": 2.752, "step": 640 }, { "epoch": 0.87, "learning_rate": 1.2903225806451613e-05, "loss": 1.111, "step": 650 }, { "epoch": 0.87, "eval_accuracy": 0.6259234547615051, "eval_loss": 1.004755973815918, "eval_runtime": 135.7521, "eval_samples_per_second": 10.969, "eval_steps_per_second": 2.748, "step": 650 }, { "epoch": 0.89, "learning_rate": 1.1559139784946236e-05, "loss": 0.8125, "step": 660 }, { "epoch": 0.89, "eval_accuracy": 0.6165211796760559, "eval_loss": 1.0176496505737305, "eval_runtime": 135.5622, "eval_samples_per_second": 10.984, "eval_steps_per_second": 2.752, "step": 660 }, { "epoch": 0.9, "learning_rate": 1.0215053763440861e-05, "loss": 1.0414, "step": 670 }, { "epoch": 0.9, "eval_accuracy": 0.6185359358787537, "eval_loss": 1.0289984941482544, "eval_runtime": 135.9417, "eval_samples_per_second": 10.953, "eval_steps_per_second": 2.744, "step": 670 }, { "epoch": 0.91, "learning_rate": 8.870967741935484e-06, "loss": 1.0037, "step": 680 }, { "epoch": 0.91, "eval_accuracy": 0.625251829624176, "eval_loss": 1.0268802642822266, "eval_runtime": 135.7529, "eval_samples_per_second": 10.968, "eval_steps_per_second": 2.748, "step": 680 }, { "epoch": 0.93, "learning_rate": 7.526881720430108e-06, "loss": 0.9406, "step": 690 }, { "epoch": 0.93, "eval_accuracy": 0.6272666454315186, "eval_loss": 1.0300624370574951, "eval_runtime": 137.5757, "eval_samples_per_second": 10.823, "eval_steps_per_second": 2.711, "step": 690 }, { "epoch": 0.94, "learning_rate": 6.182795698924732e-06, "loss": 1.0129, "step": 700 }, { "epoch": 0.94, "eval_accuracy": 0.6326393485069275, "eval_loss": 1.0238244533538818, "eval_runtime": 135.84, "eval_samples_per_second": 10.961, "eval_steps_per_second": 2.746, "step": 700 }, { "epoch": 0.95, "learning_rate": 4.838709677419355e-06, "loss": 1.2213, "step": 710 }, { "epoch": 0.95, "eval_accuracy": 0.6272666454315186, "eval_loss": 1.018078088760376, "eval_runtime": 135.9017, "eval_samples_per_second": 10.956, "eval_steps_per_second": 2.745, "step": 710 }, { "epoch": 0.97, "learning_rate": 3.4946236559139785e-06, "loss": 1.2519, "step": 720 }, { "epoch": 0.97, "eval_accuracy": 0.6265950202941895, "eval_loss": 1.0160512924194336, "eval_runtime": 135.809, "eval_samples_per_second": 10.964, "eval_steps_per_second": 2.747, "step": 720 }, { "epoch": 0.98, "learning_rate": 2.1505376344086023e-06, "loss": 0.9932, "step": 730 }, { "epoch": 0.98, "eval_accuracy": 0.6279382109642029, "eval_loss": 1.0112457275390625, "eval_runtime": 136.0311, "eval_samples_per_second": 10.946, "eval_steps_per_second": 2.742, "step": 730 }, { "epoch": 0.99, "learning_rate": 8.064516129032258e-07, "loss": 1.0135, "step": 740 }, { "epoch": 0.99, "eval_accuracy": 0.6312961578369141, "eval_loss": 1.010461449623108, "eval_runtime": 136.2447, "eval_samples_per_second": 10.929, "eval_steps_per_second": 2.738, "step": 740 }, { "epoch": 1.0, "step": 744, "total_flos": 1.673551838186588e+17, "train_loss": 1.320114940725347, "train_runtime": 11616.0305, "train_samples_per_second": 0.512, "train_steps_per_second": 0.064 } ], "max_steps": 744, "num_train_epochs": 1, "total_flos": 1.673551838186588e+17, "trial_name": null, "trial_params": null }